diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,50433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.14544455532347272, + "eval_steps": 500, + "global_step": 72000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 2.0200632683815657e-05, + "grad_norm": 156092.828125, + "learning_rate": 2e-09, + "loss": 152241.3375, + "step": 10 + }, + { + "epoch": 4.040126536763131e-05, + "grad_norm": 103432.0859375, + "learning_rate": 4e-09, + "loss": 100357.475, + "step": 20 + }, + { + "epoch": 6.060189805144697e-05, + "grad_norm": 304186.3125, + "learning_rate": 6e-09, + "loss": 173523.325, + "step": 30 + }, + { + "epoch": 8.080253073526263e-05, + "grad_norm": 8605.58203125, + "learning_rate": 8e-09, + "loss": 167691.1125, + "step": 40 + }, + { + "epoch": 0.00010100316341907829, + "grad_norm": 266906.8125, + "learning_rate": 1e-08, + "loss": 224020.85, + "step": 50 + }, + { + "epoch": 0.00012120379610289395, + "grad_norm": 76745.1484375, + "learning_rate": 1.2e-08, + "loss": 71860.2063, + "step": 60 + }, + { + "epoch": 0.0001414044287867096, + "grad_norm": 189126.921875, + "learning_rate": 1.4000000000000001e-08, + "loss": 167667.3875, + "step": 70 + }, + { + "epoch": 0.00016160506147052525, + "grad_norm": 338904.3125, + "learning_rate": 1.6e-08, + "loss": 177360.775, + "step": 80 + }, + { + "epoch": 0.0001818056941543409, + "grad_norm": 1365957.625, + "learning_rate": 1.8000000000000002e-08, + "loss": 123295.875, + "step": 90 + }, + { + "epoch": 0.00020200632683815657, + "grad_norm": 544155.8125, + "learning_rate": 2e-08, + "loss": 175492.7375, + "step": 100 + }, + { + "epoch": 0.00022220695952197223, + "grad_norm": 68274.625, + "learning_rate": 2.2000000000000002e-08, + "loss": 95997.475, + "step": 110 + }, + { + "epoch": 0.0002424075922057879, + "grad_norm": 647676.25, + "learning_rate": 2.4e-08, + "loss": 168045.9875, + "step": 120 + }, + { + "epoch": 0.0002626082248896035, + "grad_norm": 813742.875, + "learning_rate": 2.6e-08, + "loss": 193586.9375, + "step": 130 + }, + { + "epoch": 0.0002828088575734192, + "grad_norm": 39750.39453125, + "learning_rate": 2.8000000000000003e-08, + "loss": 213820.6, + "step": 140 + }, + { + "epoch": 0.00030300949025723485, + "grad_norm": 444638.71875, + "learning_rate": 3.0000000000000004e-08, + "loss": 211585.8, + "step": 150 + }, + { + "epoch": 0.0003232101229410505, + "grad_norm": 266881.875, + "learning_rate": 3.2e-08, + "loss": 248047.925, + "step": 160 + }, + { + "epoch": 0.00034341075562486617, + "grad_norm": 19488.982421875, + "learning_rate": 3.4e-08, + "loss": 197976.1625, + "step": 170 + }, + { + "epoch": 0.0003636113883086818, + "grad_norm": 347306.5625, + "learning_rate": 3.6000000000000005e-08, + "loss": 125993.3, + "step": 180 + }, + { + "epoch": 0.0003838120209924975, + "grad_norm": 2608097.0, + "learning_rate": 3.8e-08, + "loss": 156943.325, + "step": 190 + }, + { + "epoch": 0.00040401265367631315, + "grad_norm": 569704.375, + "learning_rate": 4e-08, + "loss": 166954.825, + "step": 200 + }, + { + "epoch": 0.0004242132863601288, + "grad_norm": 88803.921875, + "learning_rate": 4.2e-08, + "loss": 202437.925, + "step": 210 + }, + { + "epoch": 0.00044441391904394446, + "grad_norm": 14854.41796875, + "learning_rate": 4.4000000000000004e-08, + "loss": 115099.2, + "step": 220 + }, + { + "epoch": 0.0004646145517277601, + "grad_norm": 523277.0, + "learning_rate": 4.6e-08, + "loss": 78583.6687, + "step": 230 + }, + { + "epoch": 0.0004848151844115758, + "grad_norm": 215962.109375, + "learning_rate": 4.8e-08, + "loss": 257730.6, + "step": 240 + }, + { + "epoch": 0.0005050158170953914, + "grad_norm": 689329.0, + "learning_rate": 5.0000000000000004e-08, + "loss": 238587.25, + "step": 250 + }, + { + "epoch": 0.000525216449779207, + "grad_norm": 1294294.125, + "learning_rate": 5.2e-08, + "loss": 178140.6625, + "step": 260 + }, + { + "epoch": 0.0005454170824630227, + "grad_norm": 8251.6240234375, + "learning_rate": 5.400000000000001e-08, + "loss": 49190.0281, + "step": 270 + }, + { + "epoch": 0.0005656177151468384, + "grad_norm": 197624.234375, + "learning_rate": 5.6000000000000005e-08, + "loss": 314487.625, + "step": 280 + }, + { + "epoch": 0.000585818347830654, + "grad_norm": 979953.375, + "learning_rate": 5.8e-08, + "loss": 238169.15, + "step": 290 + }, + { + "epoch": 0.0006060189805144697, + "grad_norm": 1245312.0, + "learning_rate": 6.000000000000001e-08, + "loss": 141529.725, + "step": 300 + }, + { + "epoch": 0.0006262196131982854, + "grad_norm": 623276.75, + "learning_rate": 6.2e-08, + "loss": 245496.375, + "step": 310 + }, + { + "epoch": 0.000646420245882101, + "grad_norm": 23645.142578125, + "learning_rate": 6.4e-08, + "loss": 125938.1875, + "step": 320 + }, + { + "epoch": 0.0006666208785659167, + "grad_norm": 161712.34375, + "learning_rate": 6.600000000000001e-08, + "loss": 138157.5375, + "step": 330 + }, + { + "epoch": 0.0006868215112497323, + "grad_norm": 443164.25, + "learning_rate": 6.8e-08, + "loss": 246258.175, + "step": 340 + }, + { + "epoch": 0.000707022143933548, + "grad_norm": 282411.125, + "learning_rate": 7e-08, + "loss": 126634.1125, + "step": 350 + }, + { + "epoch": 0.0007272227766173637, + "grad_norm": 359038.84375, + "learning_rate": 7.200000000000001e-08, + "loss": 104799.1625, + "step": 360 + }, + { + "epoch": 0.0007474234093011793, + "grad_norm": 111194.015625, + "learning_rate": 7.400000000000001e-08, + "loss": 74978.4312, + "step": 370 + }, + { + "epoch": 0.000767624041984995, + "grad_norm": 177180.65625, + "learning_rate": 7.6e-08, + "loss": 123581.9375, + "step": 380 + }, + { + "epoch": 0.0007878246746688106, + "grad_norm": 354067.5625, + "learning_rate": 7.8e-08, + "loss": 92415.9125, + "step": 390 + }, + { + "epoch": 0.0008080253073526263, + "grad_norm": 3103311.75, + "learning_rate": 8e-08, + "loss": 263590.05, + "step": 400 + }, + { + "epoch": 0.000828225940036442, + "grad_norm": 3171.897705078125, + "learning_rate": 8.200000000000002e-08, + "loss": 53137.6, + "step": 410 + }, + { + "epoch": 0.0008484265727202576, + "grad_norm": 18717.650390625, + "learning_rate": 8.4e-08, + "loss": 162145.7375, + "step": 420 + }, + { + "epoch": 0.0008686272054040733, + "grad_norm": 60268.69140625, + "learning_rate": 8.6e-08, + "loss": 108984.075, + "step": 430 + }, + { + "epoch": 0.0008888278380878889, + "grad_norm": 418568.78125, + "learning_rate": 8.800000000000001e-08, + "loss": 89227.875, + "step": 440 + }, + { + "epoch": 0.0009090284707717046, + "grad_norm": 404469.84375, + "learning_rate": 9e-08, + "loss": 120598.175, + "step": 450 + }, + { + "epoch": 0.0009292291034555202, + "grad_norm": 409640.59375, + "learning_rate": 9.2e-08, + "loss": 127446.0375, + "step": 460 + }, + { + "epoch": 0.0009494297361393359, + "grad_norm": 35962.44140625, + "learning_rate": 9.400000000000001e-08, + "loss": 82232.3813, + "step": 470 + }, + { + "epoch": 0.0009696303688231516, + "grad_norm": 20380.439453125, + "learning_rate": 9.6e-08, + "loss": 190499.8, + "step": 480 + }, + { + "epoch": 0.0009898310015069671, + "grad_norm": 1218201.5, + "learning_rate": 9.8e-08, + "loss": 301381.525, + "step": 490 + }, + { + "epoch": 0.0010100316341907828, + "grad_norm": 192265.25, + "learning_rate": 1.0000000000000001e-07, + "loss": 124580.6, + "step": 500 + }, + { + "epoch": 0.0010302322668745984, + "grad_norm": 129338.109375, + "learning_rate": 1.0200000000000001e-07, + "loss": 140791.375, + "step": 510 + }, + { + "epoch": 0.001050432899558414, + "grad_norm": 152829.375, + "learning_rate": 1.04e-07, + "loss": 98696.425, + "step": 520 + }, + { + "epoch": 0.0010706335322422298, + "grad_norm": 326204.46875, + "learning_rate": 1.0600000000000001e-07, + "loss": 91044.3438, + "step": 530 + }, + { + "epoch": 0.0010908341649260454, + "grad_norm": 43417.01171875, + "learning_rate": 1.0800000000000001e-07, + "loss": 90017.6062, + "step": 540 + }, + { + "epoch": 0.001111034797609861, + "grad_norm": 187389.078125, + "learning_rate": 1.1e-07, + "loss": 247628.55, + "step": 550 + }, + { + "epoch": 0.0011312354302936767, + "grad_norm": 199126.125, + "learning_rate": 1.1200000000000001e-07, + "loss": 285925.875, + "step": 560 + }, + { + "epoch": 0.0011514360629774924, + "grad_norm": 156848.234375, + "learning_rate": 1.1400000000000001e-07, + "loss": 114101.7125, + "step": 570 + }, + { + "epoch": 0.001171636695661308, + "grad_norm": 931041.375, + "learning_rate": 1.16e-07, + "loss": 95914.5875, + "step": 580 + }, + { + "epoch": 0.0011918373283451237, + "grad_norm": 122881.28125, + "learning_rate": 1.1800000000000001e-07, + "loss": 103882.8, + "step": 590 + }, + { + "epoch": 0.0012120379610289394, + "grad_norm": 178571.1875, + "learning_rate": 1.2000000000000002e-07, + "loss": 148990.875, + "step": 600 + }, + { + "epoch": 0.001232238593712755, + "grad_norm": 791168.5, + "learning_rate": 1.22e-07, + "loss": 172585.0125, + "step": 610 + }, + { + "epoch": 0.0012524392263965707, + "grad_norm": 1169028.625, + "learning_rate": 1.24e-07, + "loss": 213547.775, + "step": 620 + }, + { + "epoch": 0.0012726398590803864, + "grad_norm": 747104.0, + "learning_rate": 1.2600000000000002e-07, + "loss": 172679.525, + "step": 630 + }, + { + "epoch": 0.001292840491764202, + "grad_norm": 95442.0, + "learning_rate": 1.28e-07, + "loss": 281140.9, + "step": 640 + }, + { + "epoch": 0.0013130411244480177, + "grad_norm": 181739.25, + "learning_rate": 1.3e-07, + "loss": 156026.6375, + "step": 650 + }, + { + "epoch": 0.0013332417571318333, + "grad_norm": 205422.09375, + "learning_rate": 1.3200000000000002e-07, + "loss": 194705.8125, + "step": 660 + }, + { + "epoch": 0.001353442389815649, + "grad_norm": 767936.5, + "learning_rate": 1.34e-07, + "loss": 285736.675, + "step": 670 + }, + { + "epoch": 0.0013736430224994647, + "grad_norm": 1026317.75, + "learning_rate": 1.36e-07, + "loss": 114232.975, + "step": 680 + }, + { + "epoch": 0.0013938436551832803, + "grad_norm": 59367.375, + "learning_rate": 1.3800000000000002e-07, + "loss": 214846.9, + "step": 690 + }, + { + "epoch": 0.001414044287867096, + "grad_norm": 123827.1015625, + "learning_rate": 1.4e-07, + "loss": 98755.275, + "step": 700 + }, + { + "epoch": 0.0014342449205509116, + "grad_norm": 4726.1748046875, + "learning_rate": 1.4200000000000003e-07, + "loss": 52456.0062, + "step": 710 + }, + { + "epoch": 0.0014544455532347273, + "grad_norm": 63123.06640625, + "learning_rate": 1.4400000000000002e-07, + "loss": 91487.5125, + "step": 720 + }, + { + "epoch": 0.001474646185918543, + "grad_norm": 704056.6875, + "learning_rate": 1.46e-07, + "loss": 276228.8, + "step": 730 + }, + { + "epoch": 0.0014948468186023586, + "grad_norm": 1798.7669677734375, + "learning_rate": 1.4800000000000003e-07, + "loss": 108179.0375, + "step": 740 + }, + { + "epoch": 0.0015150474512861743, + "grad_norm": 170871.71875, + "learning_rate": 1.5000000000000002e-07, + "loss": 89434.8375, + "step": 750 + }, + { + "epoch": 0.00153524808396999, + "grad_norm": 906753.6875, + "learning_rate": 1.52e-07, + "loss": 115043.8875, + "step": 760 + }, + { + "epoch": 0.0015554487166538056, + "grad_norm": 286758.5625, + "learning_rate": 1.5400000000000003e-07, + "loss": 74720.6875, + "step": 770 + }, + { + "epoch": 0.0015756493493376213, + "grad_norm": 1320107.875, + "learning_rate": 1.56e-07, + "loss": 125423.95, + "step": 780 + }, + { + "epoch": 0.001595849982021437, + "grad_norm": 33375.2734375, + "learning_rate": 1.5800000000000004e-07, + "loss": 48237.1719, + "step": 790 + }, + { + "epoch": 0.0016160506147052526, + "grad_norm": 4277714.5, + "learning_rate": 1.6e-07, + "loss": 208520.25, + "step": 800 + }, + { + "epoch": 0.0016362512473890682, + "grad_norm": 1051827.625, + "learning_rate": 1.62e-07, + "loss": 132006.625, + "step": 810 + }, + { + "epoch": 0.001656451880072884, + "grad_norm": 1283.63720703125, + "learning_rate": 1.6400000000000004e-07, + "loss": 247068.45, + "step": 820 + }, + { + "epoch": 0.0016766525127566996, + "grad_norm": 162306.25, + "learning_rate": 1.66e-07, + "loss": 129757.0625, + "step": 830 + }, + { + "epoch": 0.0016968531454405152, + "grad_norm": 103617.4296875, + "learning_rate": 1.68e-07, + "loss": 50390.3094, + "step": 840 + }, + { + "epoch": 0.0017170537781243309, + "grad_norm": 474210.125, + "learning_rate": 1.7000000000000001e-07, + "loss": 277423.875, + "step": 850 + }, + { + "epoch": 0.0017372544108081465, + "grad_norm": 272941.9375, + "learning_rate": 1.72e-07, + "loss": 46322.3313, + "step": 860 + }, + { + "epoch": 0.0017574550434919622, + "grad_norm": 2629991.5, + "learning_rate": 1.74e-07, + "loss": 158070.3125, + "step": 870 + }, + { + "epoch": 0.0017776556761757779, + "grad_norm": 498037.21875, + "learning_rate": 1.7600000000000001e-07, + "loss": 64262.0813, + "step": 880 + }, + { + "epoch": 0.0017978563088595935, + "grad_norm": 367465.46875, + "learning_rate": 1.78e-07, + "loss": 77301.35, + "step": 890 + }, + { + "epoch": 0.0018180569415434092, + "grad_norm": 3238312.0, + "learning_rate": 1.8e-07, + "loss": 179343.75, + "step": 900 + }, + { + "epoch": 0.0018382575742272248, + "grad_norm": 547911.1875, + "learning_rate": 1.8200000000000002e-07, + "loss": 140939.525, + "step": 910 + }, + { + "epoch": 0.0018584582069110405, + "grad_norm": 462299.46875, + "learning_rate": 1.84e-07, + "loss": 59305.8375, + "step": 920 + }, + { + "epoch": 0.0018786588395948562, + "grad_norm": 192738.796875, + "learning_rate": 1.86e-07, + "loss": 87783.6187, + "step": 930 + }, + { + "epoch": 0.0018988594722786718, + "grad_norm": 1019660.25, + "learning_rate": 1.8800000000000002e-07, + "loss": 235454.75, + "step": 940 + }, + { + "epoch": 0.0019190601049624875, + "grad_norm": 48178.4140625, + "learning_rate": 1.9e-07, + "loss": 81396.3438, + "step": 950 + }, + { + "epoch": 0.0019392607376463031, + "grad_norm": 388725.8125, + "learning_rate": 1.92e-07, + "loss": 181923.4875, + "step": 960 + }, + { + "epoch": 0.0019594613703301186, + "grad_norm": 99933.03125, + "learning_rate": 1.9400000000000002e-07, + "loss": 162193.125, + "step": 970 + }, + { + "epoch": 0.0019796620030139342, + "grad_norm": 356923.78125, + "learning_rate": 1.96e-07, + "loss": 122031.55, + "step": 980 + }, + { + "epoch": 0.00199986263569775, + "grad_norm": 1963142.375, + "learning_rate": 1.9800000000000003e-07, + "loss": 144094.15, + "step": 990 + }, + { + "epoch": 0.0020200632683815656, + "grad_norm": 7201.49267578125, + "learning_rate": 2.0000000000000002e-07, + "loss": 67326.3188, + "step": 1000 + }, + { + "epoch": 0.0020402639010653812, + "grad_norm": 266570.21875, + "learning_rate": 2.02e-07, + "loss": 109325.6625, + "step": 1010 + }, + { + "epoch": 0.002060464533749197, + "grad_norm": 15494.3203125, + "learning_rate": 2.0400000000000003e-07, + "loss": 116900.5625, + "step": 1020 + }, + { + "epoch": 0.0020806651664330125, + "grad_norm": 52507.16015625, + "learning_rate": 2.0600000000000002e-07, + "loss": 112544.15, + "step": 1030 + }, + { + "epoch": 0.002100865799116828, + "grad_norm": 320055.40625, + "learning_rate": 2.08e-07, + "loss": 106988.475, + "step": 1040 + }, + { + "epoch": 0.002121066431800644, + "grad_norm": 22087.3828125, + "learning_rate": 2.1000000000000003e-07, + "loss": 163399.6125, + "step": 1050 + }, + { + "epoch": 0.0021412670644844595, + "grad_norm": 5200.46728515625, + "learning_rate": 2.1200000000000002e-07, + "loss": 72100.7563, + "step": 1060 + }, + { + "epoch": 0.002161467697168275, + "grad_norm": 80848.78125, + "learning_rate": 2.14e-07, + "loss": 51863.475, + "step": 1070 + }, + { + "epoch": 0.002181668329852091, + "grad_norm": 17961.1484375, + "learning_rate": 2.1600000000000003e-07, + "loss": 40483.2531, + "step": 1080 + }, + { + "epoch": 0.0022018689625359065, + "grad_norm": 34715.90625, + "learning_rate": 2.1800000000000002e-07, + "loss": 85566.2375, + "step": 1090 + }, + { + "epoch": 0.002222069595219722, + "grad_norm": 2926473.0, + "learning_rate": 2.2e-07, + "loss": 227618.0, + "step": 1100 + }, + { + "epoch": 0.002242270227903538, + "grad_norm": 62188.921875, + "learning_rate": 2.2200000000000003e-07, + "loss": 49394.7625, + "step": 1110 + }, + { + "epoch": 0.0022624708605873535, + "grad_norm": 965026.4375, + "learning_rate": 2.2400000000000002e-07, + "loss": 124061.0125, + "step": 1120 + }, + { + "epoch": 0.002282671493271169, + "grad_norm": 322538.46875, + "learning_rate": 2.26e-07, + "loss": 90843.3313, + "step": 1130 + }, + { + "epoch": 0.002302872125954985, + "grad_norm": 33015.16015625, + "learning_rate": 2.2800000000000003e-07, + "loss": 55881.7812, + "step": 1140 + }, + { + "epoch": 0.0023230727586388005, + "grad_norm": 517547.4375, + "learning_rate": 2.3000000000000002e-07, + "loss": 95449.3875, + "step": 1150 + }, + { + "epoch": 0.002343273391322616, + "grad_norm": 409222.5, + "learning_rate": 2.32e-07, + "loss": 101155.4812, + "step": 1160 + }, + { + "epoch": 0.0023634740240064318, + "grad_norm": 536209.1875, + "learning_rate": 2.3400000000000003e-07, + "loss": 68712.1687, + "step": 1170 + }, + { + "epoch": 0.0023836746566902474, + "grad_norm": 1735272.75, + "learning_rate": 2.3600000000000002e-07, + "loss": 176102.125, + "step": 1180 + }, + { + "epoch": 0.002403875289374063, + "grad_norm": 256777.5625, + "learning_rate": 2.3800000000000004e-07, + "loss": 68827.7563, + "step": 1190 + }, + { + "epoch": 0.0024240759220578788, + "grad_norm": 222519.15625, + "learning_rate": 2.4000000000000003e-07, + "loss": 43853.2219, + "step": 1200 + }, + { + "epoch": 0.0024442765547416944, + "grad_norm": 226318.015625, + "learning_rate": 2.42e-07, + "loss": 129394.0625, + "step": 1210 + }, + { + "epoch": 0.00246447718742551, + "grad_norm": 160471.703125, + "learning_rate": 2.44e-07, + "loss": 129320.5375, + "step": 1220 + }, + { + "epoch": 0.0024846778201093257, + "grad_norm": 811868.0, + "learning_rate": 2.46e-07, + "loss": 200754.925, + "step": 1230 + }, + { + "epoch": 0.0025048784527931414, + "grad_norm": 11242.0322265625, + "learning_rate": 2.48e-07, + "loss": 112946.475, + "step": 1240 + }, + { + "epoch": 0.002525079085476957, + "grad_norm": 270197.15625, + "learning_rate": 2.5000000000000004e-07, + "loss": 94473.5, + "step": 1250 + }, + { + "epoch": 0.0025452797181607727, + "grad_norm": 49049.984375, + "learning_rate": 2.5200000000000003e-07, + "loss": 138678.4125, + "step": 1260 + }, + { + "epoch": 0.0025654803508445884, + "grad_norm": 108038.4765625, + "learning_rate": 2.54e-07, + "loss": 53755.5563, + "step": 1270 + }, + { + "epoch": 0.002585680983528404, + "grad_norm": 1346268.125, + "learning_rate": 2.56e-07, + "loss": 67492.15, + "step": 1280 + }, + { + "epoch": 0.0026058816162122197, + "grad_norm": 3022.045654296875, + "learning_rate": 2.58e-07, + "loss": 83298.6438, + "step": 1290 + }, + { + "epoch": 0.0026260822488960354, + "grad_norm": 223935.6875, + "learning_rate": 2.6e-07, + "loss": 114402.8125, + "step": 1300 + }, + { + "epoch": 0.002646282881579851, + "grad_norm": 131749.484375, + "learning_rate": 2.6200000000000004e-07, + "loss": 88521.4375, + "step": 1310 + }, + { + "epoch": 0.0026664835142636667, + "grad_norm": 40314.84765625, + "learning_rate": 2.6400000000000003e-07, + "loss": 21847.2047, + "step": 1320 + }, + { + "epoch": 0.0026866841469474823, + "grad_norm": 2239.300537109375, + "learning_rate": 2.66e-07, + "loss": 85078.4563, + "step": 1330 + }, + { + "epoch": 0.002706884779631298, + "grad_norm": 440028.375, + "learning_rate": 2.68e-07, + "loss": 55876.5125, + "step": 1340 + }, + { + "epoch": 0.0027270854123151137, + "grad_norm": 62792.2421875, + "learning_rate": 2.7e-07, + "loss": 121310.5125, + "step": 1350 + }, + { + "epoch": 0.0027472860449989293, + "grad_norm": 142368.59375, + "learning_rate": 2.72e-07, + "loss": 111596.775, + "step": 1360 + }, + { + "epoch": 0.002767486677682745, + "grad_norm": 371694.0, + "learning_rate": 2.7400000000000004e-07, + "loss": 70511.775, + "step": 1370 + }, + { + "epoch": 0.0027876873103665606, + "grad_norm": 2239945.75, + "learning_rate": 2.7600000000000004e-07, + "loss": 111037.825, + "step": 1380 + }, + { + "epoch": 0.0028078879430503763, + "grad_norm": 238516.859375, + "learning_rate": 2.7800000000000003e-07, + "loss": 118856.3, + "step": 1390 + }, + { + "epoch": 0.002828088575734192, + "grad_norm": 15366.751953125, + "learning_rate": 2.8e-07, + "loss": 22044.1594, + "step": 1400 + }, + { + "epoch": 0.0028482892084180076, + "grad_norm": 126048.71875, + "learning_rate": 2.82e-07, + "loss": 45847.5281, + "step": 1410 + }, + { + "epoch": 0.0028684898411018233, + "grad_norm": 508619.875, + "learning_rate": 2.8400000000000005e-07, + "loss": 122673.9125, + "step": 1420 + }, + { + "epoch": 0.002888690473785639, + "grad_norm": 293747.375, + "learning_rate": 2.8600000000000005e-07, + "loss": 86230.2625, + "step": 1430 + }, + { + "epoch": 0.0029088911064694546, + "grad_norm": 819382.9375, + "learning_rate": 2.8800000000000004e-07, + "loss": 110040.25, + "step": 1440 + }, + { + "epoch": 0.0029290917391532703, + "grad_norm": 61461.98828125, + "learning_rate": 2.9000000000000003e-07, + "loss": 49610.475, + "step": 1450 + }, + { + "epoch": 0.002949292371837086, + "grad_norm": 50417.6640625, + "learning_rate": 2.92e-07, + "loss": 50417.9969, + "step": 1460 + }, + { + "epoch": 0.0029694930045209016, + "grad_norm": 117609.328125, + "learning_rate": 2.94e-07, + "loss": 48084.5844, + "step": 1470 + }, + { + "epoch": 0.0029896936372047172, + "grad_norm": 3418397.0, + "learning_rate": 2.9600000000000006e-07, + "loss": 119706.0875, + "step": 1480 + }, + { + "epoch": 0.003009894269888533, + "grad_norm": 27037.810546875, + "learning_rate": 2.9800000000000005e-07, + "loss": 35588.0281, + "step": 1490 + }, + { + "epoch": 0.0030300949025723486, + "grad_norm": 282059.6875, + "learning_rate": 3.0000000000000004e-07, + "loss": 23998.2125, + "step": 1500 + }, + { + "epoch": 0.0030502955352561642, + "grad_norm": 166329.296875, + "learning_rate": 3.0200000000000003e-07, + "loss": 21092.5563, + "step": 1510 + }, + { + "epoch": 0.00307049616793998, + "grad_norm": 18536.9375, + "learning_rate": 3.04e-07, + "loss": 68960.375, + "step": 1520 + }, + { + "epoch": 0.0030906968006237955, + "grad_norm": 14396.48828125, + "learning_rate": 3.06e-07, + "loss": 47424.4969, + "step": 1530 + }, + { + "epoch": 0.003110897433307611, + "grad_norm": 317662.46875, + "learning_rate": 3.0800000000000006e-07, + "loss": 195942.675, + "step": 1540 + }, + { + "epoch": 0.003131098065991427, + "grad_norm": 35644.88671875, + "learning_rate": 3.1000000000000005e-07, + "loss": 31488.3688, + "step": 1550 + }, + { + "epoch": 0.0031512986986752425, + "grad_norm": 217201.53125, + "learning_rate": 3.12e-07, + "loss": 30665.9719, + "step": 1560 + }, + { + "epoch": 0.003171499331359058, + "grad_norm": 357419.34375, + "learning_rate": 3.14e-07, + "loss": 65884.625, + "step": 1570 + }, + { + "epoch": 0.003191699964042874, + "grad_norm": 1835324.125, + "learning_rate": 3.160000000000001e-07, + "loss": 84869.9438, + "step": 1580 + }, + { + "epoch": 0.0032119005967266895, + "grad_norm": 125140.5, + "learning_rate": 3.1800000000000007e-07, + "loss": 35667.6406, + "step": 1590 + }, + { + "epoch": 0.003232101229410505, + "grad_norm": 32253.3984375, + "learning_rate": 3.2e-07, + "loss": 41518.9625, + "step": 1600 + }, + { + "epoch": 0.003252301862094321, + "grad_norm": 133711.3125, + "learning_rate": 3.22e-07, + "loss": 49417.3344, + "step": 1610 + }, + { + "epoch": 0.0032725024947781365, + "grad_norm": 588435.5, + "learning_rate": 3.24e-07, + "loss": 43908.525, + "step": 1620 + }, + { + "epoch": 0.003292703127461952, + "grad_norm": 63388.9765625, + "learning_rate": 3.26e-07, + "loss": 40770.4375, + "step": 1630 + }, + { + "epoch": 0.003312903760145768, + "grad_norm": 293617.4375, + "learning_rate": 3.280000000000001e-07, + "loss": 51706.6625, + "step": 1640 + }, + { + "epoch": 0.0033331043928295835, + "grad_norm": 16973.404296875, + "learning_rate": 3.3e-07, + "loss": 34153.6469, + "step": 1650 + }, + { + "epoch": 0.003353305025513399, + "grad_norm": 5845.9091796875, + "learning_rate": 3.32e-07, + "loss": 71441.5625, + "step": 1660 + }, + { + "epoch": 0.0033735056581972148, + "grad_norm": 53163.1328125, + "learning_rate": 3.34e-07, + "loss": 33939.8031, + "step": 1670 + }, + { + "epoch": 0.0033937062908810304, + "grad_norm": 801085.75, + "learning_rate": 3.36e-07, + "loss": 42557.8625, + "step": 1680 + }, + { + "epoch": 0.003413906923564846, + "grad_norm": 259300.4375, + "learning_rate": 3.38e-07, + "loss": 71365.2312, + "step": 1690 + }, + { + "epoch": 0.0034341075562486618, + "grad_norm": 5228.58740234375, + "learning_rate": 3.4000000000000003e-07, + "loss": 32500.6125, + "step": 1700 + }, + { + "epoch": 0.0034543081889324774, + "grad_norm": 39621.62109375, + "learning_rate": 3.42e-07, + "loss": 27413.7062, + "step": 1710 + }, + { + "epoch": 0.003474508821616293, + "grad_norm": 79977.8515625, + "learning_rate": 3.44e-07, + "loss": 79423.3062, + "step": 1720 + }, + { + "epoch": 0.0034947094543001087, + "grad_norm": 272876.84375, + "learning_rate": 3.46e-07, + "loss": 25171.5625, + "step": 1730 + }, + { + "epoch": 0.0035149100869839244, + "grad_norm": 92509.15625, + "learning_rate": 3.48e-07, + "loss": 52769.95, + "step": 1740 + }, + { + "epoch": 0.00353511071966774, + "grad_norm": 29000.017578125, + "learning_rate": 3.5000000000000004e-07, + "loss": 21108.3844, + "step": 1750 + }, + { + "epoch": 0.0035553113523515557, + "grad_norm": 29716.587890625, + "learning_rate": 3.5200000000000003e-07, + "loss": 51525.1062, + "step": 1760 + }, + { + "epoch": 0.0035755119850353714, + "grad_norm": 311533.59375, + "learning_rate": 3.54e-07, + "loss": 78616.5938, + "step": 1770 + }, + { + "epoch": 0.003595712617719187, + "grad_norm": 115813.2265625, + "learning_rate": 3.56e-07, + "loss": 60808.5437, + "step": 1780 + }, + { + "epoch": 0.0036159132504030027, + "grad_norm": 56772.15234375, + "learning_rate": 3.58e-07, + "loss": 20267.6188, + "step": 1790 + }, + { + "epoch": 0.0036361138830868184, + "grad_norm": 299846.15625, + "learning_rate": 3.6e-07, + "loss": 55841.175, + "step": 1800 + }, + { + "epoch": 0.003656314515770634, + "grad_norm": 45274.81640625, + "learning_rate": 3.6200000000000004e-07, + "loss": 20212.05, + "step": 1810 + }, + { + "epoch": 0.0036765151484544497, + "grad_norm": 28560.935546875, + "learning_rate": 3.6400000000000003e-07, + "loss": 40275.3719, + "step": 1820 + }, + { + "epoch": 0.0036967157811382653, + "grad_norm": 13348.90234375, + "learning_rate": 3.66e-07, + "loss": 102117.3188, + "step": 1830 + }, + { + "epoch": 0.003716916413822081, + "grad_norm": 18491.677734375, + "learning_rate": 3.68e-07, + "loss": 73917.3625, + "step": 1840 + }, + { + "epoch": 0.0037371170465058967, + "grad_norm": 7520.74560546875, + "learning_rate": 3.7e-07, + "loss": 25389.6953, + "step": 1850 + }, + { + "epoch": 0.0037573176791897123, + "grad_norm": 131936.71875, + "learning_rate": 3.72e-07, + "loss": 30900.3969, + "step": 1860 + }, + { + "epoch": 0.003777518311873528, + "grad_norm": 51321.96484375, + "learning_rate": 3.7400000000000004e-07, + "loss": 44297.8469, + "step": 1870 + }, + { + "epoch": 0.0037977189445573436, + "grad_norm": 146183.265625, + "learning_rate": 3.7600000000000003e-07, + "loss": 75706.975, + "step": 1880 + }, + { + "epoch": 0.0038179195772411593, + "grad_norm": 22144.74609375, + "learning_rate": 3.78e-07, + "loss": 50223.325, + "step": 1890 + }, + { + "epoch": 0.003838120209924975, + "grad_norm": 1945.9853515625, + "learning_rate": 3.8e-07, + "loss": 16478.7344, + "step": 1900 + }, + { + "epoch": 0.0038583208426087906, + "grad_norm": 7563.63232421875, + "learning_rate": 3.82e-07, + "loss": 20342.375, + "step": 1910 + }, + { + "epoch": 0.0038785214752926063, + "grad_norm": 52642.4140625, + "learning_rate": 3.84e-07, + "loss": 38028.5281, + "step": 1920 + }, + { + "epoch": 0.003898722107976422, + "grad_norm": 17312.896484375, + "learning_rate": 3.8600000000000004e-07, + "loss": 38809.3812, + "step": 1930 + }, + { + "epoch": 0.003918922740660237, + "grad_norm": 427092.71875, + "learning_rate": 3.8800000000000003e-07, + "loss": 35652.75, + "step": 1940 + }, + { + "epoch": 0.003939123373344053, + "grad_norm": 81278.125, + "learning_rate": 3.9e-07, + "loss": 13489.1203, + "step": 1950 + }, + { + "epoch": 0.0039593240060278685, + "grad_norm": 242387.015625, + "learning_rate": 3.92e-07, + "loss": 51381.4875, + "step": 1960 + }, + { + "epoch": 0.003979524638711684, + "grad_norm": 67081.8203125, + "learning_rate": 3.94e-07, + "loss": 19989.1453, + "step": 1970 + }, + { + "epoch": 0.0039997252713955, + "grad_norm": 31269.76171875, + "learning_rate": 3.9600000000000005e-07, + "loss": 22923.5469, + "step": 1980 + }, + { + "epoch": 0.0040199259040793155, + "grad_norm": 140772.1875, + "learning_rate": 3.9800000000000004e-07, + "loss": 22611.4031, + "step": 1990 + }, + { + "epoch": 0.004040126536763131, + "grad_norm": 195576.703125, + "learning_rate": 4.0000000000000003e-07, + "loss": 16202.9156, + "step": 2000 + }, + { + "epoch": 0.004060327169446947, + "grad_norm": 159208.53125, + "learning_rate": 4.02e-07, + "loss": 25542.9156, + "step": 2010 + }, + { + "epoch": 0.0040805278021307624, + "grad_norm": 29524.46875, + "learning_rate": 4.04e-07, + "loss": 29154.2938, + "step": 2020 + }, + { + "epoch": 0.004100728434814578, + "grad_norm": 404085.53125, + "learning_rate": 4.06e-07, + "loss": 39527.2812, + "step": 2030 + }, + { + "epoch": 0.004120929067498394, + "grad_norm": 44774.05859375, + "learning_rate": 4.0800000000000005e-07, + "loss": 32177.0844, + "step": 2040 + }, + { + "epoch": 0.004141129700182209, + "grad_norm": 163404.0, + "learning_rate": 4.1000000000000004e-07, + "loss": 53384.7875, + "step": 2050 + }, + { + "epoch": 0.004161330332866025, + "grad_norm": 39732.23828125, + "learning_rate": 4.1200000000000004e-07, + "loss": 14402.2938, + "step": 2060 + }, + { + "epoch": 0.004181530965549841, + "grad_norm": 285907.78125, + "learning_rate": 4.1400000000000003e-07, + "loss": 28739.2938, + "step": 2070 + }, + { + "epoch": 0.004201731598233656, + "grad_norm": 30470.197265625, + "learning_rate": 4.16e-07, + "loss": 38195.8781, + "step": 2080 + }, + { + "epoch": 0.004221932230917472, + "grad_norm": 34287.5859375, + "learning_rate": 4.18e-07, + "loss": 43357.425, + "step": 2090 + }, + { + "epoch": 0.004242132863601288, + "grad_norm": 5667.95703125, + "learning_rate": 4.2000000000000006e-07, + "loss": 11454.4945, + "step": 2100 + }, + { + "epoch": 0.004262333496285103, + "grad_norm": 137563.15625, + "learning_rate": 4.2200000000000005e-07, + "loss": 22601.0219, + "step": 2110 + }, + { + "epoch": 0.004282534128968919, + "grad_norm": 642829.4375, + "learning_rate": 4.2400000000000004e-07, + "loss": 39363.2688, + "step": 2120 + }, + { + "epoch": 0.004302734761652735, + "grad_norm": 420346.96875, + "learning_rate": 4.2600000000000003e-07, + "loss": 34232.5, + "step": 2130 + }, + { + "epoch": 0.00432293539433655, + "grad_norm": 319800.65625, + "learning_rate": 4.28e-07, + "loss": 45630.4719, + "step": 2140 + }, + { + "epoch": 0.004343136027020366, + "grad_norm": 5137.02978515625, + "learning_rate": 4.3e-07, + "loss": 12048.4562, + "step": 2150 + }, + { + "epoch": 0.004363336659704182, + "grad_norm": 28943.734375, + "learning_rate": 4.3200000000000006e-07, + "loss": 27421.1719, + "step": 2160 + }, + { + "epoch": 0.004383537292387997, + "grad_norm": 11191.4208984375, + "learning_rate": 4.3400000000000005e-07, + "loss": 29307.0094, + "step": 2170 + }, + { + "epoch": 0.004403737925071813, + "grad_norm": 668323.1875, + "learning_rate": 4.3600000000000004e-07, + "loss": 44995.9062, + "step": 2180 + }, + { + "epoch": 0.004423938557755629, + "grad_norm": 6032.32421875, + "learning_rate": 4.3800000000000003e-07, + "loss": 10335.557, + "step": 2190 + }, + { + "epoch": 0.004444139190439444, + "grad_norm": 37681.12890625, + "learning_rate": 4.4e-07, + "loss": 7949.168, + "step": 2200 + }, + { + "epoch": 0.00446433982312326, + "grad_norm": 138103.234375, + "learning_rate": 4.4200000000000007e-07, + "loss": 30452.0375, + "step": 2210 + }, + { + "epoch": 0.004484540455807076, + "grad_norm": 69653.5703125, + "learning_rate": 4.4400000000000006e-07, + "loss": 18676.6562, + "step": 2220 + }, + { + "epoch": 0.004504741088490891, + "grad_norm": 21980.90234375, + "learning_rate": 4.4600000000000005e-07, + "loss": 18408.0938, + "step": 2230 + }, + { + "epoch": 0.004524941721174707, + "grad_norm": 32741.60546875, + "learning_rate": 4.4800000000000004e-07, + "loss": 17522.1719, + "step": 2240 + }, + { + "epoch": 0.004545142353858523, + "grad_norm": 31634.638671875, + "learning_rate": 4.5000000000000003e-07, + "loss": 16577.3578, + "step": 2250 + }, + { + "epoch": 0.004565342986542338, + "grad_norm": 414.2900390625, + "learning_rate": 4.52e-07, + "loss": 19048.7562, + "step": 2260 + }, + { + "epoch": 0.004585543619226154, + "grad_norm": 67860.2109375, + "learning_rate": 4.5400000000000007e-07, + "loss": 25588.6781, + "step": 2270 + }, + { + "epoch": 0.00460574425190997, + "grad_norm": 250171.484375, + "learning_rate": 4.5600000000000006e-07, + "loss": 16886.4031, + "step": 2280 + }, + { + "epoch": 0.004625944884593785, + "grad_norm": 232578.765625, + "learning_rate": 4.5800000000000005e-07, + "loss": 16352.8188, + "step": 2290 + }, + { + "epoch": 0.004646145517277601, + "grad_norm": 56584.0703125, + "learning_rate": 4.6000000000000004e-07, + "loss": 21029.1609, + "step": 2300 + }, + { + "epoch": 0.004666346149961417, + "grad_norm": 286053.625, + "learning_rate": 4.6200000000000003e-07, + "loss": 38059.3063, + "step": 2310 + }, + { + "epoch": 0.004686546782645232, + "grad_norm": 15319.60546875, + "learning_rate": 4.64e-07, + "loss": 8820.8711, + "step": 2320 + }, + { + "epoch": 0.004706747415329048, + "grad_norm": 0.0, + "learning_rate": 4.6600000000000007e-07, + "loss": 6327.2996, + "step": 2330 + }, + { + "epoch": 0.0047269480480128636, + "grad_norm": 318668.0, + "learning_rate": 4.6800000000000006e-07, + "loss": 19358.4156, + "step": 2340 + }, + { + "epoch": 0.004747148680696679, + "grad_norm": 323314.71875, + "learning_rate": 4.7000000000000005e-07, + "loss": 22548.5812, + "step": 2350 + }, + { + "epoch": 0.004767349313380495, + "grad_norm": 43722.01171875, + "learning_rate": 4.7200000000000004e-07, + "loss": 5630.8824, + "step": 2360 + }, + { + "epoch": 0.0047875499460643105, + "grad_norm": 162153.578125, + "learning_rate": 4.7400000000000004e-07, + "loss": 28175.9375, + "step": 2370 + }, + { + "epoch": 0.004807750578748126, + "grad_norm": 99130.1015625, + "learning_rate": 4.760000000000001e-07, + "loss": 13653.0562, + "step": 2380 + }, + { + "epoch": 0.004827951211431942, + "grad_norm": 50870.140625, + "learning_rate": 4.78e-07, + "loss": 18426.3844, + "step": 2390 + }, + { + "epoch": 0.0048481518441157575, + "grad_norm": 2599.94921875, + "learning_rate": 4.800000000000001e-07, + "loss": 13747.85, + "step": 2400 + }, + { + "epoch": 0.004868352476799573, + "grad_norm": 171588.96875, + "learning_rate": 4.82e-07, + "loss": 13209.7141, + "step": 2410 + }, + { + "epoch": 0.004888553109483389, + "grad_norm": 6463.8017578125, + "learning_rate": 4.84e-07, + "loss": 11902.5484, + "step": 2420 + }, + { + "epoch": 0.0049087537421672045, + "grad_norm": 17299.404296875, + "learning_rate": 4.86e-07, + "loss": 6133.3605, + "step": 2430 + }, + { + "epoch": 0.00492895437485102, + "grad_norm": 32834.93359375, + "learning_rate": 4.88e-07, + "loss": 19574.4641, + "step": 2440 + }, + { + "epoch": 0.004949155007534836, + "grad_norm": 17722.2421875, + "learning_rate": 4.900000000000001e-07, + "loss": 15487.6141, + "step": 2450 + }, + { + "epoch": 0.0049693556402186515, + "grad_norm": 54100.19921875, + "learning_rate": 4.92e-07, + "loss": 12047.875, + "step": 2460 + }, + { + "epoch": 0.004989556272902467, + "grad_norm": 12383.744140625, + "learning_rate": 4.940000000000001e-07, + "loss": 19039.5062, + "step": 2470 + }, + { + "epoch": 0.005009756905586283, + "grad_norm": 33060.32421875, + "learning_rate": 4.96e-07, + "loss": 13212.2219, + "step": 2480 + }, + { + "epoch": 0.0050299575382700985, + "grad_norm": 24754.240234375, + "learning_rate": 4.98e-07, + "loss": 26668.5375, + "step": 2490 + }, + { + "epoch": 0.005050158170953914, + "grad_norm": 76614.6484375, + "learning_rate": 5.000000000000001e-07, + "loss": 20981.0687, + "step": 2500 + }, + { + "epoch": 0.00507035880363773, + "grad_norm": 94899.109375, + "learning_rate": 5.02e-07, + "loss": 12422.5359, + "step": 2510 + }, + { + "epoch": 0.0050905594363215454, + "grad_norm": 46776.2734375, + "learning_rate": 5.040000000000001e-07, + "loss": 14437.3984, + "step": 2520 + }, + { + "epoch": 0.005110760069005361, + "grad_norm": 41882.94921875, + "learning_rate": 5.06e-07, + "loss": 23298.3469, + "step": 2530 + }, + { + "epoch": 0.005130960701689177, + "grad_norm": 60419.1796875, + "learning_rate": 5.08e-07, + "loss": 31690.0406, + "step": 2540 + }, + { + "epoch": 0.005151161334372992, + "grad_norm": 13863.61328125, + "learning_rate": 5.1e-07, + "loss": 8781.8617, + "step": 2550 + }, + { + "epoch": 0.005171361967056808, + "grad_norm": 71163.0, + "learning_rate": 5.12e-07, + "loss": 15933.0516, + "step": 2560 + }, + { + "epoch": 0.005191562599740624, + "grad_norm": 7024.83544921875, + "learning_rate": 5.140000000000001e-07, + "loss": 12636.3367, + "step": 2570 + }, + { + "epoch": 0.005211763232424439, + "grad_norm": 8875.0224609375, + "learning_rate": 5.16e-07, + "loss": 29434.9031, + "step": 2580 + }, + { + "epoch": 0.005231963865108255, + "grad_norm": 48908.40625, + "learning_rate": 5.180000000000001e-07, + "loss": 16208.3969, + "step": 2590 + }, + { + "epoch": 0.005252164497792071, + "grad_norm": 103019.8671875, + "learning_rate": 5.2e-07, + "loss": 9421.6641, + "step": 2600 + }, + { + "epoch": 0.005272365130475886, + "grad_norm": 426906.21875, + "learning_rate": 5.22e-07, + "loss": 16878.2328, + "step": 2610 + }, + { + "epoch": 0.005292565763159702, + "grad_norm": 254.477294921875, + "learning_rate": 5.240000000000001e-07, + "loss": 15685.3563, + "step": 2620 + }, + { + "epoch": 0.005312766395843518, + "grad_norm": 104315.8359375, + "learning_rate": 5.26e-07, + "loss": 47048.3406, + "step": 2630 + }, + { + "epoch": 0.005332967028527333, + "grad_norm": 116708.25, + "learning_rate": 5.280000000000001e-07, + "loss": 18093.9797, + "step": 2640 + }, + { + "epoch": 0.005353167661211149, + "grad_norm": 192981.125, + "learning_rate": 5.3e-07, + "loss": 25835.0375, + "step": 2650 + }, + { + "epoch": 0.005373368293894965, + "grad_norm": 27211.9609375, + "learning_rate": 5.32e-07, + "loss": 20473.4188, + "step": 2660 + }, + { + "epoch": 0.00539356892657878, + "grad_norm": 80115.7578125, + "learning_rate": 5.340000000000001e-07, + "loss": 19815.3812, + "step": 2670 + }, + { + "epoch": 0.005413769559262596, + "grad_norm": 111606.5859375, + "learning_rate": 5.36e-07, + "loss": 14562.9234, + "step": 2680 + }, + { + "epoch": 0.005433970191946412, + "grad_norm": 60238.03125, + "learning_rate": 5.380000000000001e-07, + "loss": 5127.1016, + "step": 2690 + }, + { + "epoch": 0.005454170824630227, + "grad_norm": 14125.95703125, + "learning_rate": 5.4e-07, + "loss": 20656.9969, + "step": 2700 + }, + { + "epoch": 0.005474371457314043, + "grad_norm": 37463.40234375, + "learning_rate": 5.420000000000001e-07, + "loss": 17069.8406, + "step": 2710 + }, + { + "epoch": 0.005494572089997859, + "grad_norm": 3470.5966796875, + "learning_rate": 5.44e-07, + "loss": 9280.2563, + "step": 2720 + }, + { + "epoch": 0.005514772722681674, + "grad_norm": 225770.671875, + "learning_rate": 5.46e-07, + "loss": 22984.2875, + "step": 2730 + }, + { + "epoch": 0.00553497335536549, + "grad_norm": 54704.65625, + "learning_rate": 5.480000000000001e-07, + "loss": 23321.6578, + "step": 2740 + }, + { + "epoch": 0.005555173988049306, + "grad_norm": 31240.861328125, + "learning_rate": 5.5e-07, + "loss": 13835.5719, + "step": 2750 + }, + { + "epoch": 0.005575374620733121, + "grad_norm": 88557.9609375, + "learning_rate": 5.520000000000001e-07, + "loss": 27802.0187, + "step": 2760 + }, + { + "epoch": 0.005595575253416937, + "grad_norm": 61604.984375, + "learning_rate": 5.54e-07, + "loss": 12860.8969, + "step": 2770 + }, + { + "epoch": 0.005615775886100753, + "grad_norm": 74221.921875, + "learning_rate": 5.560000000000001e-07, + "loss": 31568.4375, + "step": 2780 + }, + { + "epoch": 0.005635976518784568, + "grad_norm": 4843.03466796875, + "learning_rate": 5.580000000000001e-07, + "loss": 24289.7734, + "step": 2790 + }, + { + "epoch": 0.005656177151468384, + "grad_norm": 8147.189453125, + "learning_rate": 5.6e-07, + "loss": 13324.5625, + "step": 2800 + }, + { + "epoch": 0.0056763777841522, + "grad_norm": 26549.251953125, + "learning_rate": 5.620000000000001e-07, + "loss": 9605.4602, + "step": 2810 + }, + { + "epoch": 0.005696578416836015, + "grad_norm": 2258.267333984375, + "learning_rate": 5.64e-07, + "loss": 31237.1406, + "step": 2820 + }, + { + "epoch": 0.005716779049519831, + "grad_norm": 12592.0205078125, + "learning_rate": 5.660000000000001e-07, + "loss": 17896.9656, + "step": 2830 + }, + { + "epoch": 0.0057369796822036466, + "grad_norm": 48901.77734375, + "learning_rate": 5.680000000000001e-07, + "loss": 27059.5969, + "step": 2840 + }, + { + "epoch": 0.005757180314887462, + "grad_norm": 97220.4375, + "learning_rate": 5.7e-07, + "loss": 21296.7344, + "step": 2850 + }, + { + "epoch": 0.005777380947571278, + "grad_norm": 72453.7578125, + "learning_rate": 5.720000000000001e-07, + "loss": 13806.5109, + "step": 2860 + }, + { + "epoch": 0.0057975815802550935, + "grad_norm": 6266.21435546875, + "learning_rate": 5.74e-07, + "loss": 26352.4813, + "step": 2870 + }, + { + "epoch": 0.005817782212938909, + "grad_norm": 153699.703125, + "learning_rate": 5.760000000000001e-07, + "loss": 21797.725, + "step": 2880 + }, + { + "epoch": 0.005837982845622725, + "grad_norm": 102588.703125, + "learning_rate": 5.78e-07, + "loss": 13873.9219, + "step": 2890 + }, + { + "epoch": 0.0058581834783065405, + "grad_norm": 4029.48828125, + "learning_rate": 5.800000000000001e-07, + "loss": 14034.7219, + "step": 2900 + }, + { + "epoch": 0.005878384110990356, + "grad_norm": 62459.578125, + "learning_rate": 5.820000000000001e-07, + "loss": 15893.4328, + "step": 2910 + }, + { + "epoch": 0.005898584743674172, + "grad_norm": 52890.59375, + "learning_rate": 5.84e-07, + "loss": 25137.0938, + "step": 2920 + }, + { + "epoch": 0.0059187853763579875, + "grad_norm": 31984.859375, + "learning_rate": 5.860000000000001e-07, + "loss": 7028.8516, + "step": 2930 + }, + { + "epoch": 0.005938986009041803, + "grad_norm": 627.6958618164062, + "learning_rate": 5.88e-07, + "loss": 7233.932, + "step": 2940 + }, + { + "epoch": 0.005959186641725619, + "grad_norm": 115948.8515625, + "learning_rate": 5.900000000000001e-07, + "loss": 31958.3281, + "step": 2950 + }, + { + "epoch": 0.0059793872744094345, + "grad_norm": 27290.228515625, + "learning_rate": 5.920000000000001e-07, + "loss": 18963.5187, + "step": 2960 + }, + { + "epoch": 0.00599958790709325, + "grad_norm": 8175.99365234375, + "learning_rate": 5.94e-07, + "loss": 18708.1203, + "step": 2970 + }, + { + "epoch": 0.006019788539777066, + "grad_norm": 35170.98828125, + "learning_rate": 5.960000000000001e-07, + "loss": 11847.6375, + "step": 2980 + }, + { + "epoch": 0.0060399891724608815, + "grad_norm": 53265.26171875, + "learning_rate": 5.98e-07, + "loss": 8330.9086, + "step": 2990 + }, + { + "epoch": 0.006060189805144697, + "grad_norm": 10648.859375, + "learning_rate": 6.000000000000001e-07, + "loss": 5959.052, + "step": 3000 + }, + { + "epoch": 0.006080390437828513, + "grad_norm": 1069.5797119140625, + "learning_rate": 6.02e-07, + "loss": 10041.7406, + "step": 3010 + }, + { + "epoch": 0.0061005910705123284, + "grad_norm": 318948.9375, + "learning_rate": 6.040000000000001e-07, + "loss": 12852.8531, + "step": 3020 + }, + { + "epoch": 0.006120791703196144, + "grad_norm": 123815.6484375, + "learning_rate": 6.060000000000001e-07, + "loss": 18336.9281, + "step": 3030 + }, + { + "epoch": 0.00614099233587996, + "grad_norm": 45311.125, + "learning_rate": 6.08e-07, + "loss": 8765.4781, + "step": 3040 + }, + { + "epoch": 0.006161192968563775, + "grad_norm": 168305.09375, + "learning_rate": 6.100000000000001e-07, + "loss": 12504.5578, + "step": 3050 + }, + { + "epoch": 0.006181393601247591, + "grad_norm": 79777.53125, + "learning_rate": 6.12e-07, + "loss": 21589.2906, + "step": 3060 + }, + { + "epoch": 0.006201594233931407, + "grad_norm": 28739.474609375, + "learning_rate": 6.140000000000001e-07, + "loss": 15506.2078, + "step": 3070 + }, + { + "epoch": 0.006221794866615222, + "grad_norm": 0.0, + "learning_rate": 6.160000000000001e-07, + "loss": 6696.3016, + "step": 3080 + }, + { + "epoch": 0.006241995499299038, + "grad_norm": 39363.3203125, + "learning_rate": 6.180000000000001e-07, + "loss": 14708.2781, + "step": 3090 + }, + { + "epoch": 0.006262196131982854, + "grad_norm": 32419.970703125, + "learning_rate": 6.200000000000001e-07, + "loss": 10171.1266, + "step": 3100 + }, + { + "epoch": 0.006282396764666669, + "grad_norm": 20183.353515625, + "learning_rate": 6.22e-07, + "loss": 16366.6578, + "step": 3110 + }, + { + "epoch": 0.006302597397350485, + "grad_norm": 712.5150146484375, + "learning_rate": 6.24e-07, + "loss": 7859.8023, + "step": 3120 + }, + { + "epoch": 0.006322798030034301, + "grad_norm": 6775.84765625, + "learning_rate": 6.260000000000001e-07, + "loss": 8690.6266, + "step": 3130 + }, + { + "epoch": 0.006342998662718116, + "grad_norm": 24645.04296875, + "learning_rate": 6.28e-07, + "loss": 25688.4516, + "step": 3140 + }, + { + "epoch": 0.006363199295401932, + "grad_norm": 2599.060302734375, + "learning_rate": 6.3e-07, + "loss": 9759.5227, + "step": 3150 + }, + { + "epoch": 0.006383399928085748, + "grad_norm": 240540.0, + "learning_rate": 6.320000000000002e-07, + "loss": 35375.1125, + "step": 3160 + }, + { + "epoch": 0.006403600560769563, + "grad_norm": 62608.94921875, + "learning_rate": 6.34e-07, + "loss": 16429.7422, + "step": 3170 + }, + { + "epoch": 0.006423801193453379, + "grad_norm": 17942.103515625, + "learning_rate": 6.360000000000001e-07, + "loss": 13598.0031, + "step": 3180 + }, + { + "epoch": 0.006444001826137195, + "grad_norm": 24618.7734375, + "learning_rate": 6.38e-07, + "loss": 12776.6156, + "step": 3190 + }, + { + "epoch": 0.00646420245882101, + "grad_norm": 2894.0986328125, + "learning_rate": 6.4e-07, + "loss": 11273.2242, + "step": 3200 + }, + { + "epoch": 0.006484403091504826, + "grad_norm": 6186.994140625, + "learning_rate": 6.42e-07, + "loss": 15184.8109, + "step": 3210 + }, + { + "epoch": 0.006504603724188642, + "grad_norm": 11202.0048828125, + "learning_rate": 6.44e-07, + "loss": 16402.4422, + "step": 3220 + }, + { + "epoch": 0.006524804356872457, + "grad_norm": 11999.2587890625, + "learning_rate": 6.460000000000001e-07, + "loss": 19748.0344, + "step": 3230 + }, + { + "epoch": 0.006545004989556273, + "grad_norm": 90543.4453125, + "learning_rate": 6.48e-07, + "loss": 21494.7766, + "step": 3240 + }, + { + "epoch": 0.006565205622240089, + "grad_norm": 240466.140625, + "learning_rate": 6.5e-07, + "loss": 22486.95, + "step": 3250 + }, + { + "epoch": 0.006585406254923904, + "grad_norm": 1855.9111328125, + "learning_rate": 6.52e-07, + "loss": 20967.5328, + "step": 3260 + }, + { + "epoch": 0.00660560688760772, + "grad_norm": 1702.35107421875, + "learning_rate": 6.54e-07, + "loss": 11257.2641, + "step": 3270 + }, + { + "epoch": 0.006625807520291536, + "grad_norm": 95006.828125, + "learning_rate": 6.560000000000002e-07, + "loss": 12190.9609, + "step": 3280 + }, + { + "epoch": 0.006646008152975351, + "grad_norm": 29812.98828125, + "learning_rate": 6.58e-07, + "loss": 18243.7219, + "step": 3290 + }, + { + "epoch": 0.006666208785659167, + "grad_norm": 140497.78125, + "learning_rate": 6.6e-07, + "loss": 22007.475, + "step": 3300 + }, + { + "epoch": 0.006686409418342983, + "grad_norm": 7080.90673828125, + "learning_rate": 6.62e-07, + "loss": 26077.775, + "step": 3310 + }, + { + "epoch": 0.006706610051026798, + "grad_norm": 39328.61328125, + "learning_rate": 6.64e-07, + "loss": 5631.7977, + "step": 3320 + }, + { + "epoch": 0.006726810683710614, + "grad_norm": 173130.78125, + "learning_rate": 6.660000000000002e-07, + "loss": 18533.6031, + "step": 3330 + }, + { + "epoch": 0.0067470113163944296, + "grad_norm": 6683.4814453125, + "learning_rate": 6.68e-07, + "loss": 12803.1547, + "step": 3340 + }, + { + "epoch": 0.006767211949078245, + "grad_norm": 11253.9609375, + "learning_rate": 6.7e-07, + "loss": 17899.2922, + "step": 3350 + }, + { + "epoch": 0.006787412581762061, + "grad_norm": 338358.125, + "learning_rate": 6.72e-07, + "loss": 14914.0922, + "step": 3360 + }, + { + "epoch": 0.0068076132144458765, + "grad_norm": 2257.998779296875, + "learning_rate": 6.74e-07, + "loss": 8360.9336, + "step": 3370 + }, + { + "epoch": 0.006827813847129692, + "grad_norm": 25969.142578125, + "learning_rate": 6.76e-07, + "loss": 8907.7219, + "step": 3380 + }, + { + "epoch": 0.006848014479813508, + "grad_norm": 6400.46044921875, + "learning_rate": 6.78e-07, + "loss": 24799.7188, + "step": 3390 + }, + { + "epoch": 0.0068682151124973235, + "grad_norm": 13913.5068359375, + "learning_rate": 6.800000000000001e-07, + "loss": 17362.9047, + "step": 3400 + }, + { + "epoch": 0.006888415745181139, + "grad_norm": 177074.734375, + "learning_rate": 6.82e-07, + "loss": 10985.5625, + "step": 3410 + }, + { + "epoch": 0.006908616377864955, + "grad_norm": 46989.08984375, + "learning_rate": 6.84e-07, + "loss": 12395.8844, + "step": 3420 + }, + { + "epoch": 0.0069288170105487705, + "grad_norm": 320524.75, + "learning_rate": 6.86e-07, + "loss": 16117.9094, + "step": 3430 + }, + { + "epoch": 0.006949017643232586, + "grad_norm": 373948.21875, + "learning_rate": 6.88e-07, + "loss": 39712.1438, + "step": 3440 + }, + { + "epoch": 0.006969218275916402, + "grad_norm": 1878.5328369140625, + "learning_rate": 6.900000000000001e-07, + "loss": 16970.3328, + "step": 3450 + }, + { + "epoch": 0.0069894189086002175, + "grad_norm": 6404.05908203125, + "learning_rate": 6.92e-07, + "loss": 13985.0797, + "step": 3460 + }, + { + "epoch": 0.007009619541284033, + "grad_norm": 9278.55859375, + "learning_rate": 6.94e-07, + "loss": 20362.2734, + "step": 3470 + }, + { + "epoch": 0.007029820173967849, + "grad_norm": 4454.8564453125, + "learning_rate": 6.96e-07, + "loss": 18331.8375, + "step": 3480 + }, + { + "epoch": 0.0070500208066516645, + "grad_norm": 27826.6171875, + "learning_rate": 6.98e-07, + "loss": 8314.0312, + "step": 3490 + }, + { + "epoch": 0.00707022143933548, + "grad_norm": 175139.046875, + "learning_rate": 7.000000000000001e-07, + "loss": 21167.8063, + "step": 3500 + }, + { + "epoch": 0.007090422072019296, + "grad_norm": 382.7084655761719, + "learning_rate": 7.02e-07, + "loss": 24634.9031, + "step": 3510 + }, + { + "epoch": 0.0071106227047031114, + "grad_norm": 17931.603515625, + "learning_rate": 7.040000000000001e-07, + "loss": 7655.7578, + "step": 3520 + }, + { + "epoch": 0.007130823337386927, + "grad_norm": 1468.9346923828125, + "learning_rate": 7.06e-07, + "loss": 5755.6609, + "step": 3530 + }, + { + "epoch": 0.007151023970070743, + "grad_norm": 58617.44921875, + "learning_rate": 7.08e-07, + "loss": 8167.6687, + "step": 3540 + }, + { + "epoch": 0.007171224602754558, + "grad_norm": 23248.482421875, + "learning_rate": 7.1e-07, + "loss": 15170.9109, + "step": 3550 + }, + { + "epoch": 0.007191425235438374, + "grad_norm": 196856.765625, + "learning_rate": 7.12e-07, + "loss": 40314.8875, + "step": 3560 + }, + { + "epoch": 0.00721162586812219, + "grad_norm": 16817.931640625, + "learning_rate": 7.140000000000001e-07, + "loss": 10548.9797, + "step": 3570 + }, + { + "epoch": 0.007231826500806005, + "grad_norm": 71226.015625, + "learning_rate": 7.16e-07, + "loss": 6758.7891, + "step": 3580 + }, + { + "epoch": 0.007252027133489821, + "grad_norm": 773.783203125, + "learning_rate": 7.18e-07, + "loss": 18227.0453, + "step": 3590 + }, + { + "epoch": 0.007272227766173637, + "grad_norm": 20403.48828125, + "learning_rate": 7.2e-07, + "loss": 22545.1219, + "step": 3600 + }, + { + "epoch": 0.007292428398857452, + "grad_norm": 19539.134765625, + "learning_rate": 7.22e-07, + "loss": 18158.2469, + "step": 3610 + }, + { + "epoch": 0.007312629031541268, + "grad_norm": 9027.505859375, + "learning_rate": 7.240000000000001e-07, + "loss": 8793.7664, + "step": 3620 + }, + { + "epoch": 0.007332829664225084, + "grad_norm": 209721.796875, + "learning_rate": 7.26e-07, + "loss": 30009.0719, + "step": 3630 + }, + { + "epoch": 0.007353030296908899, + "grad_norm": 9954.90234375, + "learning_rate": 7.280000000000001e-07, + "loss": 9931.6898, + "step": 3640 + }, + { + "epoch": 0.007373230929592715, + "grad_norm": 7605.5322265625, + "learning_rate": 7.3e-07, + "loss": 24024.8078, + "step": 3650 + }, + { + "epoch": 0.007393431562276531, + "grad_norm": 45263.1328125, + "learning_rate": 7.32e-07, + "loss": 26464.5281, + "step": 3660 + }, + { + "epoch": 0.007413632194960346, + "grad_norm": 62082.2890625, + "learning_rate": 7.340000000000001e-07, + "loss": 12899.7406, + "step": 3670 + }, + { + "epoch": 0.007433832827644162, + "grad_norm": 9398.4052734375, + "learning_rate": 7.36e-07, + "loss": 9049.4445, + "step": 3680 + }, + { + "epoch": 0.007454033460327978, + "grad_norm": 4885.99951171875, + "learning_rate": 7.380000000000001e-07, + "loss": 12697.3773, + "step": 3690 + }, + { + "epoch": 0.007474234093011793, + "grad_norm": 10202.4521484375, + "learning_rate": 7.4e-07, + "loss": 19078.6125, + "step": 3700 + }, + { + "epoch": 0.007494434725695609, + "grad_norm": 104868.4296875, + "learning_rate": 7.420000000000001e-07, + "loss": 32891.2937, + "step": 3710 + }, + { + "epoch": 0.007514635358379425, + "grad_norm": 19291.474609375, + "learning_rate": 7.44e-07, + "loss": 18784.0344, + "step": 3720 + }, + { + "epoch": 0.00753483599106324, + "grad_norm": 10573.443359375, + "learning_rate": 7.46e-07, + "loss": 22859.9313, + "step": 3730 + }, + { + "epoch": 0.007555036623747056, + "grad_norm": 0.0, + "learning_rate": 7.480000000000001e-07, + "loss": 14792.5891, + "step": 3740 + }, + { + "epoch": 0.007575237256430872, + "grad_norm": 15768.1513671875, + "learning_rate": 7.5e-07, + "loss": 6010.4773, + "step": 3750 + }, + { + "epoch": 0.007595437889114687, + "grad_norm": 441226.96875, + "learning_rate": 7.520000000000001e-07, + "loss": 23060.3688, + "step": 3760 + }, + { + "epoch": 0.007615638521798503, + "grad_norm": 3727.006103515625, + "learning_rate": 7.54e-07, + "loss": 8751.2172, + "step": 3770 + }, + { + "epoch": 0.007635839154482319, + "grad_norm": 435302.59375, + "learning_rate": 7.56e-07, + "loss": 18983.2359, + "step": 3780 + }, + { + "epoch": 0.007656039787166134, + "grad_norm": 10264.9287109375, + "learning_rate": 7.580000000000001e-07, + "loss": 22978.1969, + "step": 3790 + }, + { + "epoch": 0.00767624041984995, + "grad_norm": 14036.7978515625, + "learning_rate": 7.6e-07, + "loss": 24890.2219, + "step": 3800 + }, + { + "epoch": 0.007696441052533766, + "grad_norm": 4639.4169921875, + "learning_rate": 7.620000000000001e-07, + "loss": 17235.8563, + "step": 3810 + }, + { + "epoch": 0.007716641685217581, + "grad_norm": 271073.71875, + "learning_rate": 7.64e-07, + "loss": 17006.1594, + "step": 3820 + }, + { + "epoch": 0.007736842317901397, + "grad_norm": 71233.5703125, + "learning_rate": 7.660000000000001e-07, + "loss": 11100.5359, + "step": 3830 + }, + { + "epoch": 0.0077570429505852126, + "grad_norm": 4885.40576171875, + "learning_rate": 7.68e-07, + "loss": 28004.4875, + "step": 3840 + }, + { + "epoch": 0.007777243583269028, + "grad_norm": 3231.140869140625, + "learning_rate": 7.7e-07, + "loss": 14864.6313, + "step": 3850 + }, + { + "epoch": 0.007797444215952844, + "grad_norm": 130595.25, + "learning_rate": 7.720000000000001e-07, + "loss": 11785.7328, + "step": 3860 + }, + { + "epoch": 0.00781764484863666, + "grad_norm": 46750.3046875, + "learning_rate": 7.74e-07, + "loss": 23304.3797, + "step": 3870 + }, + { + "epoch": 0.007837845481320474, + "grad_norm": 13727.7412109375, + "learning_rate": 7.760000000000001e-07, + "loss": 18299.2453, + "step": 3880 + }, + { + "epoch": 0.00785804611400429, + "grad_norm": 80635.875, + "learning_rate": 7.78e-07, + "loss": 18928.8109, + "step": 3890 + }, + { + "epoch": 0.007878246746688106, + "grad_norm": 6433.1279296875, + "learning_rate": 7.8e-07, + "loss": 13656.825, + "step": 3900 + }, + { + "epoch": 0.007898447379371922, + "grad_norm": 122192.59375, + "learning_rate": 7.820000000000001e-07, + "loss": 27131.1906, + "step": 3910 + }, + { + "epoch": 0.007918648012055737, + "grad_norm": 29559.505859375, + "learning_rate": 7.84e-07, + "loss": 8754.9813, + "step": 3920 + }, + { + "epoch": 0.007938848644739553, + "grad_norm": 47494.53125, + "learning_rate": 7.860000000000001e-07, + "loss": 15851.7203, + "step": 3930 + }, + { + "epoch": 0.007959049277423368, + "grad_norm": 522401.03125, + "learning_rate": 7.88e-07, + "loss": 25441.2172, + "step": 3940 + }, + { + "epoch": 0.007979249910107185, + "grad_norm": 36609.14453125, + "learning_rate": 7.900000000000001e-07, + "loss": 15986.1281, + "step": 3950 + }, + { + "epoch": 0.007999450542791, + "grad_norm": 1221.531982421875, + "learning_rate": 7.920000000000001e-07, + "loss": 3846.6012, + "step": 3960 + }, + { + "epoch": 0.008019651175474816, + "grad_norm": 12306.3212890625, + "learning_rate": 7.94e-07, + "loss": 12358.6422, + "step": 3970 + }, + { + "epoch": 0.008039851808158631, + "grad_norm": 70250.0, + "learning_rate": 7.960000000000001e-07, + "loss": 9718.825, + "step": 3980 + }, + { + "epoch": 0.008060052440842447, + "grad_norm": 1329.7894287109375, + "learning_rate": 7.98e-07, + "loss": 26231.5625, + "step": 3990 + }, + { + "epoch": 0.008080253073526262, + "grad_norm": 101419.0390625, + "learning_rate": 8.000000000000001e-07, + "loss": 21129.8031, + "step": 4000 + }, + { + "epoch": 0.008100453706210079, + "grad_norm": 13730.8798828125, + "learning_rate": 8.02e-07, + "loss": 10409.4219, + "step": 4010 + }, + { + "epoch": 0.008120654338893894, + "grad_norm": 214713.546875, + "learning_rate": 8.04e-07, + "loss": 20159.1484, + "step": 4020 + }, + { + "epoch": 0.00814085497157771, + "grad_norm": 9771.5947265625, + "learning_rate": 8.060000000000001e-07, + "loss": 9320.0602, + "step": 4030 + }, + { + "epoch": 0.008161055604261525, + "grad_norm": 447240.71875, + "learning_rate": 8.08e-07, + "loss": 19111.8031, + "step": 4040 + }, + { + "epoch": 0.008181256236945341, + "grad_norm": 22635.025390625, + "learning_rate": 8.100000000000001e-07, + "loss": 18934.525, + "step": 4050 + }, + { + "epoch": 0.008201456869629156, + "grad_norm": 344.34930419921875, + "learning_rate": 8.12e-07, + "loss": 15426.7219, + "step": 4060 + }, + { + "epoch": 0.008221657502312973, + "grad_norm": 37353.6875, + "learning_rate": 8.140000000000001e-07, + "loss": 11698.5375, + "step": 4070 + }, + { + "epoch": 0.008241858134996788, + "grad_norm": 2551.460205078125, + "learning_rate": 8.160000000000001e-07, + "loss": 12538.4555, + "step": 4080 + }, + { + "epoch": 0.008262058767680604, + "grad_norm": 26223.171875, + "learning_rate": 8.18e-07, + "loss": 3905.6133, + "step": 4090 + }, + { + "epoch": 0.008282259400364419, + "grad_norm": 174157.046875, + "learning_rate": 8.200000000000001e-07, + "loss": 11935.7891, + "step": 4100 + }, + { + "epoch": 0.008302460033048235, + "grad_norm": 95488.703125, + "learning_rate": 8.22e-07, + "loss": 15060.0438, + "step": 4110 + }, + { + "epoch": 0.00832266066573205, + "grad_norm": 156348.984375, + "learning_rate": 8.240000000000001e-07, + "loss": 21907.3625, + "step": 4120 + }, + { + "epoch": 0.008342861298415867, + "grad_norm": 6896.138671875, + "learning_rate": 8.260000000000001e-07, + "loss": 13047.4312, + "step": 4130 + }, + { + "epoch": 0.008363061931099681, + "grad_norm": 90049.109375, + "learning_rate": 8.280000000000001e-07, + "loss": 11974.4555, + "step": 4140 + }, + { + "epoch": 0.008383262563783498, + "grad_norm": 15338.91796875, + "learning_rate": 8.300000000000001e-07, + "loss": 29198.2938, + "step": 4150 + }, + { + "epoch": 0.008403463196467313, + "grad_norm": 26100.8828125, + "learning_rate": 8.32e-07, + "loss": 8049.6844, + "step": 4160 + }, + { + "epoch": 0.00842366382915113, + "grad_norm": 73724.1875, + "learning_rate": 8.340000000000001e-07, + "loss": 18171.9047, + "step": 4170 + }, + { + "epoch": 0.008443864461834944, + "grad_norm": 49232.53125, + "learning_rate": 8.36e-07, + "loss": 32805.1312, + "step": 4180 + }, + { + "epoch": 0.00846406509451876, + "grad_norm": 4021.312255859375, + "learning_rate": 8.380000000000001e-07, + "loss": 12269.5789, + "step": 4190 + }, + { + "epoch": 0.008484265727202575, + "grad_norm": 1500.0428466796875, + "learning_rate": 8.400000000000001e-07, + "loss": 17195.5438, + "step": 4200 + }, + { + "epoch": 0.008504466359886392, + "grad_norm": 53687.9375, + "learning_rate": 8.42e-07, + "loss": 27653.4656, + "step": 4210 + }, + { + "epoch": 0.008524666992570207, + "grad_norm": 10267.7109375, + "learning_rate": 8.440000000000001e-07, + "loss": 11514.4438, + "step": 4220 + }, + { + "epoch": 0.008544867625254023, + "grad_norm": 140527.0625, + "learning_rate": 8.46e-07, + "loss": 20187.9156, + "step": 4230 + }, + { + "epoch": 0.008565068257937838, + "grad_norm": 2118.849853515625, + "learning_rate": 8.480000000000001e-07, + "loss": 7624.9008, + "step": 4240 + }, + { + "epoch": 0.008585268890621655, + "grad_norm": 41565.04296875, + "learning_rate": 8.500000000000001e-07, + "loss": 11878.5438, + "step": 4250 + }, + { + "epoch": 0.00860546952330547, + "grad_norm": 30431.865234375, + "learning_rate": 8.520000000000001e-07, + "loss": 4866.0418, + "step": 4260 + }, + { + "epoch": 0.008625670155989286, + "grad_norm": 42802.20703125, + "learning_rate": 8.540000000000001e-07, + "loss": 20688.9984, + "step": 4270 + }, + { + "epoch": 0.0086458707886731, + "grad_norm": 47751.72265625, + "learning_rate": 8.56e-07, + "loss": 16787.0203, + "step": 4280 + }, + { + "epoch": 0.008666071421356917, + "grad_norm": 67298.90625, + "learning_rate": 8.580000000000001e-07, + "loss": 9453.7469, + "step": 4290 + }, + { + "epoch": 0.008686272054040732, + "grad_norm": 9526.2021484375, + "learning_rate": 8.6e-07, + "loss": 7538.9187, + "step": 4300 + }, + { + "epoch": 0.008706472686724549, + "grad_norm": 15340.333984375, + "learning_rate": 8.620000000000001e-07, + "loss": 22261.175, + "step": 4310 + }, + { + "epoch": 0.008726673319408363, + "grad_norm": 107383.2578125, + "learning_rate": 8.640000000000001e-07, + "loss": 28152.175, + "step": 4320 + }, + { + "epoch": 0.00874687395209218, + "grad_norm": 51218.140625, + "learning_rate": 8.66e-07, + "loss": 17940.6453, + "step": 4330 + }, + { + "epoch": 0.008767074584775995, + "grad_norm": 23877.4140625, + "learning_rate": 8.680000000000001e-07, + "loss": 19704.0125, + "step": 4340 + }, + { + "epoch": 0.008787275217459811, + "grad_norm": 253370.71875, + "learning_rate": 8.7e-07, + "loss": 16135.5781, + "step": 4350 + }, + { + "epoch": 0.008807475850143626, + "grad_norm": 6619.11181640625, + "learning_rate": 8.720000000000001e-07, + "loss": 10785.4383, + "step": 4360 + }, + { + "epoch": 0.008827676482827443, + "grad_norm": 11134.771484375, + "learning_rate": 8.740000000000001e-07, + "loss": 5805.3684, + "step": 4370 + }, + { + "epoch": 0.008847877115511257, + "grad_norm": 76705.03125, + "learning_rate": 8.760000000000001e-07, + "loss": 11842.0953, + "step": 4380 + }, + { + "epoch": 0.008868077748195074, + "grad_norm": 125204.8203125, + "learning_rate": 8.780000000000001e-07, + "loss": 10773.5977, + "step": 4390 + }, + { + "epoch": 0.008888278380878889, + "grad_norm": 34642.34765625, + "learning_rate": 8.8e-07, + "loss": 21008.2766, + "step": 4400 + }, + { + "epoch": 0.008908479013562705, + "grad_norm": 67304.9921875, + "learning_rate": 8.820000000000001e-07, + "loss": 10828.8234, + "step": 4410 + }, + { + "epoch": 0.00892867964624652, + "grad_norm": 24249.85546875, + "learning_rate": 8.840000000000001e-07, + "loss": 11090.4133, + "step": 4420 + }, + { + "epoch": 0.008948880278930336, + "grad_norm": 545.22265625, + "learning_rate": 8.860000000000001e-07, + "loss": 10709.8906, + "step": 4430 + }, + { + "epoch": 0.008969080911614151, + "grad_norm": 12249.2568359375, + "learning_rate": 8.880000000000001e-07, + "loss": 15913.6437, + "step": 4440 + }, + { + "epoch": 0.008989281544297968, + "grad_norm": 186081.671875, + "learning_rate": 8.900000000000001e-07, + "loss": 15328.6797, + "step": 4450 + }, + { + "epoch": 0.009009482176981783, + "grad_norm": 39593.234375, + "learning_rate": 8.920000000000001e-07, + "loss": 22442.2359, + "step": 4460 + }, + { + "epoch": 0.0090296828096656, + "grad_norm": 157382.625, + "learning_rate": 8.94e-07, + "loss": 25779.5281, + "step": 4470 + }, + { + "epoch": 0.009049883442349414, + "grad_norm": 51718.86328125, + "learning_rate": 8.960000000000001e-07, + "loss": 18292.8734, + "step": 4480 + }, + { + "epoch": 0.00907008407503323, + "grad_norm": 174211.296875, + "learning_rate": 8.980000000000001e-07, + "loss": 12719.6672, + "step": 4490 + }, + { + "epoch": 0.009090284707717045, + "grad_norm": 36377.33984375, + "learning_rate": 9.000000000000001e-07, + "loss": 18421.1813, + "step": 4500 + }, + { + "epoch": 0.009110485340400862, + "grad_norm": 32912.82421875, + "learning_rate": 9.020000000000001e-07, + "loss": 7643.2109, + "step": 4510 + }, + { + "epoch": 0.009130685973084677, + "grad_norm": 326654.40625, + "learning_rate": 9.04e-07, + "loss": 18397.1922, + "step": 4520 + }, + { + "epoch": 0.009150886605768493, + "grad_norm": 36873.53515625, + "learning_rate": 9.060000000000001e-07, + "loss": 7409.8352, + "step": 4530 + }, + { + "epoch": 0.009171087238452308, + "grad_norm": 209855.9375, + "learning_rate": 9.080000000000001e-07, + "loss": 17906.6797, + "step": 4540 + }, + { + "epoch": 0.009191287871136124, + "grad_norm": 62610.6015625, + "learning_rate": 9.100000000000001e-07, + "loss": 8424.4047, + "step": 4550 + }, + { + "epoch": 0.00921148850381994, + "grad_norm": 19179.712890625, + "learning_rate": 9.120000000000001e-07, + "loss": 11043.2625, + "step": 4560 + }, + { + "epoch": 0.009231689136503756, + "grad_norm": 33409.73828125, + "learning_rate": 9.140000000000001e-07, + "loss": 5997.2844, + "step": 4570 + }, + { + "epoch": 0.00925188976918757, + "grad_norm": 17323.7265625, + "learning_rate": 9.160000000000001e-07, + "loss": 25414.8828, + "step": 4580 + }, + { + "epoch": 0.009272090401871387, + "grad_norm": 34895.8046875, + "learning_rate": 9.180000000000001e-07, + "loss": 4603.7688, + "step": 4590 + }, + { + "epoch": 0.009292291034555202, + "grad_norm": 24996.859375, + "learning_rate": 9.200000000000001e-07, + "loss": 18408.25, + "step": 4600 + }, + { + "epoch": 0.009312491667239018, + "grad_norm": 200959.6875, + "learning_rate": 9.220000000000001e-07, + "loss": 16407.1047, + "step": 4610 + }, + { + "epoch": 0.009332692299922833, + "grad_norm": 18809.796875, + "learning_rate": 9.240000000000001e-07, + "loss": 22280.4156, + "step": 4620 + }, + { + "epoch": 0.00935289293260665, + "grad_norm": 320747.65625, + "learning_rate": 9.260000000000001e-07, + "loss": 14981.1156, + "step": 4630 + }, + { + "epoch": 0.009373093565290464, + "grad_norm": 1956.9820556640625, + "learning_rate": 9.28e-07, + "loss": 6701.7961, + "step": 4640 + }, + { + "epoch": 0.009393294197974281, + "grad_norm": 10395.0615234375, + "learning_rate": 9.300000000000001e-07, + "loss": 10008.9758, + "step": 4650 + }, + { + "epoch": 0.009413494830658096, + "grad_norm": 121769.625, + "learning_rate": 9.320000000000001e-07, + "loss": 19805.6781, + "step": 4660 + }, + { + "epoch": 0.009433695463341912, + "grad_norm": 534600.1875, + "learning_rate": 9.340000000000001e-07, + "loss": 24132.7594, + "step": 4670 + }, + { + "epoch": 0.009453896096025727, + "grad_norm": 3357.00341796875, + "learning_rate": 9.360000000000001e-07, + "loss": 8175.8836, + "step": 4680 + }, + { + "epoch": 0.009474096728709544, + "grad_norm": 36136.92578125, + "learning_rate": 9.380000000000001e-07, + "loss": 23143.6828, + "step": 4690 + }, + { + "epoch": 0.009494297361393358, + "grad_norm": 2892.894775390625, + "learning_rate": 9.400000000000001e-07, + "loss": 10018.2188, + "step": 4700 + }, + { + "epoch": 0.009514497994077175, + "grad_norm": 21657.37109375, + "learning_rate": 9.420000000000002e-07, + "loss": 27993.0875, + "step": 4710 + }, + { + "epoch": 0.00953469862676099, + "grad_norm": 18192.474609375, + "learning_rate": 9.440000000000001e-07, + "loss": 16071.0109, + "step": 4720 + }, + { + "epoch": 0.009554899259444806, + "grad_norm": 356419.75, + "learning_rate": 9.460000000000001e-07, + "loss": 26253.3219, + "step": 4730 + }, + { + "epoch": 0.009575099892128621, + "grad_norm": 214843.84375, + "learning_rate": 9.480000000000001e-07, + "loss": 10928.5531, + "step": 4740 + }, + { + "epoch": 0.009595300524812438, + "grad_norm": 17945.451171875, + "learning_rate": 9.500000000000001e-07, + "loss": 11343.7039, + "step": 4750 + }, + { + "epoch": 0.009615501157496252, + "grad_norm": 92573.0078125, + "learning_rate": 9.520000000000002e-07, + "loss": 12407.45, + "step": 4760 + }, + { + "epoch": 0.009635701790180069, + "grad_norm": 22648.951171875, + "learning_rate": 9.54e-07, + "loss": 23956.5344, + "step": 4770 + }, + { + "epoch": 0.009655902422863884, + "grad_norm": 1144.9775390625, + "learning_rate": 9.56e-07, + "loss": 5699.5621, + "step": 4780 + }, + { + "epoch": 0.0096761030555477, + "grad_norm": 615.1077270507812, + "learning_rate": 9.58e-07, + "loss": 20422.3328, + "step": 4790 + }, + { + "epoch": 0.009696303688231515, + "grad_norm": 4486.64306640625, + "learning_rate": 9.600000000000001e-07, + "loss": 35658.1719, + "step": 4800 + }, + { + "epoch": 0.009716504320915332, + "grad_norm": 386149.0625, + "learning_rate": 9.62e-07, + "loss": 14565.6313, + "step": 4810 + }, + { + "epoch": 0.009736704953599146, + "grad_norm": 2645.639892578125, + "learning_rate": 9.64e-07, + "loss": 8256.2805, + "step": 4820 + }, + { + "epoch": 0.009756905586282963, + "grad_norm": 4177.47412109375, + "learning_rate": 9.660000000000002e-07, + "loss": 20948.3828, + "step": 4830 + }, + { + "epoch": 0.009777106218966778, + "grad_norm": 4455.91943359375, + "learning_rate": 9.68e-07, + "loss": 11324.1414, + "step": 4840 + }, + { + "epoch": 0.009797306851650594, + "grad_norm": 12810.568359375, + "learning_rate": 9.7e-07, + "loss": 12510.1148, + "step": 4850 + }, + { + "epoch": 0.009817507484334409, + "grad_norm": 3605.182373046875, + "learning_rate": 9.72e-07, + "loss": 28451.1781, + "step": 4860 + }, + { + "epoch": 0.009837708117018226, + "grad_norm": 2413.83251953125, + "learning_rate": 9.740000000000001e-07, + "loss": 19872.1969, + "step": 4870 + }, + { + "epoch": 0.00985790874970204, + "grad_norm": 5755.9609375, + "learning_rate": 9.76e-07, + "loss": 5114.0129, + "step": 4880 + }, + { + "epoch": 0.009878109382385857, + "grad_norm": 14544.984375, + "learning_rate": 9.78e-07, + "loss": 10810.7375, + "step": 4890 + }, + { + "epoch": 0.009898310015069672, + "grad_norm": 44065.046875, + "learning_rate": 9.800000000000001e-07, + "loss": 12384.8953, + "step": 4900 + }, + { + "epoch": 0.009918510647753488, + "grad_norm": 1717.6693115234375, + "learning_rate": 9.82e-07, + "loss": 12779.8641, + "step": 4910 + }, + { + "epoch": 0.009938711280437303, + "grad_norm": 76248.9765625, + "learning_rate": 9.84e-07, + "loss": 4103.3781, + "step": 4920 + }, + { + "epoch": 0.00995891191312112, + "grad_norm": 3435.300048828125, + "learning_rate": 9.86e-07, + "loss": 17310.5031, + "step": 4930 + }, + { + "epoch": 0.009979112545804934, + "grad_norm": 1764.0357666015625, + "learning_rate": 9.880000000000001e-07, + "loss": 12670.4984, + "step": 4940 + }, + { + "epoch": 0.00999931317848875, + "grad_norm": 619.042724609375, + "learning_rate": 9.9e-07, + "loss": 11573.6383, + "step": 4950 + }, + { + "epoch": 0.010019513811172566, + "grad_norm": 55950.39453125, + "learning_rate": 9.92e-07, + "loss": 11606.5344, + "step": 4960 + }, + { + "epoch": 0.010039714443856382, + "grad_norm": 87145.140625, + "learning_rate": 9.940000000000001e-07, + "loss": 30012.8438, + "step": 4970 + }, + { + "epoch": 0.010059915076540197, + "grad_norm": 4116.0888671875, + "learning_rate": 9.96e-07, + "loss": 20051.3844, + "step": 4980 + }, + { + "epoch": 0.010080115709224013, + "grad_norm": 5663.4267578125, + "learning_rate": 9.98e-07, + "loss": 21777.5422, + "step": 4990 + }, + { + "epoch": 0.010100316341907828, + "grad_norm": 7582.4169921875, + "learning_rate": 1.0000000000000002e-06, + "loss": 3765.9789, + "step": 5000 + }, + { + "epoch": 0.010120516974591645, + "grad_norm": 27853.033203125, + "learning_rate": 1.002e-06, + "loss": 25481.9562, + "step": 5010 + }, + { + "epoch": 0.01014071760727546, + "grad_norm": 10955.7919921875, + "learning_rate": 1.004e-06, + "loss": 5566.2383, + "step": 5020 + }, + { + "epoch": 0.010160918239959276, + "grad_norm": 37881.40625, + "learning_rate": 1.006e-06, + "loss": 6663.8008, + "step": 5030 + }, + { + "epoch": 0.010181118872643091, + "grad_norm": 24759.21875, + "learning_rate": 1.0080000000000001e-06, + "loss": 13978.4094, + "step": 5040 + }, + { + "epoch": 0.010201319505326907, + "grad_norm": 181236.765625, + "learning_rate": 1.01e-06, + "loss": 10625.9258, + "step": 5050 + }, + { + "epoch": 0.010221520138010722, + "grad_norm": 31320.72265625, + "learning_rate": 1.012e-06, + "loss": 7890.9656, + "step": 5060 + }, + { + "epoch": 0.010241720770694539, + "grad_norm": 108446.734375, + "learning_rate": 1.0140000000000002e-06, + "loss": 11903.1367, + "step": 5070 + }, + { + "epoch": 0.010261921403378354, + "grad_norm": 34168.71875, + "learning_rate": 1.016e-06, + "loss": 11506.0695, + "step": 5080 + }, + { + "epoch": 0.01028212203606217, + "grad_norm": 44073.58984375, + "learning_rate": 1.018e-06, + "loss": 18388.0938, + "step": 5090 + }, + { + "epoch": 0.010302322668745985, + "grad_norm": 44816.62109375, + "learning_rate": 1.02e-06, + "loss": 14483.0406, + "step": 5100 + }, + { + "epoch": 0.010322523301429801, + "grad_norm": 0.0, + "learning_rate": 1.0220000000000001e-06, + "loss": 7314.1383, + "step": 5110 + }, + { + "epoch": 0.010342723934113616, + "grad_norm": 10833.2880859375, + "learning_rate": 1.024e-06, + "loss": 12888.6367, + "step": 5120 + }, + { + "epoch": 0.010362924566797433, + "grad_norm": 33675.21484375, + "learning_rate": 1.026e-06, + "loss": 11841.3594, + "step": 5130 + }, + { + "epoch": 0.010383125199481247, + "grad_norm": 228794.578125, + "learning_rate": 1.0280000000000002e-06, + "loss": 16666.1969, + "step": 5140 + }, + { + "epoch": 0.010403325832165064, + "grad_norm": 534849.9375, + "learning_rate": 1.03e-06, + "loss": 34278.775, + "step": 5150 + }, + { + "epoch": 0.010423526464848879, + "grad_norm": 39371.65625, + "learning_rate": 1.032e-06, + "loss": 13702.1859, + "step": 5160 + }, + { + "epoch": 0.010443727097532695, + "grad_norm": 76268.5625, + "learning_rate": 1.0340000000000002e-06, + "loss": 10933.9594, + "step": 5170 + }, + { + "epoch": 0.01046392773021651, + "grad_norm": 12381.3994140625, + "learning_rate": 1.0360000000000001e-06, + "loss": 14281.85, + "step": 5180 + }, + { + "epoch": 0.010484128362900327, + "grad_norm": 35658.42578125, + "learning_rate": 1.038e-06, + "loss": 18028.2844, + "step": 5190 + }, + { + "epoch": 0.010504328995584141, + "grad_norm": 33430.06640625, + "learning_rate": 1.04e-06, + "loss": 14247.6156, + "step": 5200 + }, + { + "epoch": 0.010524529628267958, + "grad_norm": 6129.23046875, + "learning_rate": 1.0420000000000001e-06, + "loss": 13850.7375, + "step": 5210 + }, + { + "epoch": 0.010544730260951773, + "grad_norm": 10648.818359375, + "learning_rate": 1.044e-06, + "loss": 21576.0875, + "step": 5220 + }, + { + "epoch": 0.01056493089363559, + "grad_norm": 3327.794677734375, + "learning_rate": 1.046e-06, + "loss": 19406.5406, + "step": 5230 + }, + { + "epoch": 0.010585131526319404, + "grad_norm": 72311.15625, + "learning_rate": 1.0480000000000002e-06, + "loss": 19713.2438, + "step": 5240 + }, + { + "epoch": 0.01060533215900322, + "grad_norm": 1741.361328125, + "learning_rate": 1.0500000000000001e-06, + "loss": 22314.1828, + "step": 5250 + }, + { + "epoch": 0.010625532791687035, + "grad_norm": 3498.150634765625, + "learning_rate": 1.052e-06, + "loss": 23976.6031, + "step": 5260 + }, + { + "epoch": 0.010645733424370852, + "grad_norm": 82219.84375, + "learning_rate": 1.054e-06, + "loss": 15888.5781, + "step": 5270 + }, + { + "epoch": 0.010665934057054667, + "grad_norm": 7894.01025390625, + "learning_rate": 1.0560000000000001e-06, + "loss": 23960.9922, + "step": 5280 + }, + { + "epoch": 0.010686134689738483, + "grad_norm": 29842.41796875, + "learning_rate": 1.058e-06, + "loss": 7760.075, + "step": 5290 + }, + { + "epoch": 0.010706335322422298, + "grad_norm": 364546.5, + "learning_rate": 1.06e-06, + "loss": 26794.7062, + "step": 5300 + }, + { + "epoch": 0.010726535955106115, + "grad_norm": 499.27117919921875, + "learning_rate": 1.0620000000000002e-06, + "loss": 22033.0344, + "step": 5310 + }, + { + "epoch": 0.01074673658778993, + "grad_norm": 5294.3837890625, + "learning_rate": 1.064e-06, + "loss": 8560.5219, + "step": 5320 + }, + { + "epoch": 0.010766937220473746, + "grad_norm": 100056.203125, + "learning_rate": 1.066e-06, + "loss": 18931.9969, + "step": 5330 + }, + { + "epoch": 0.01078713785315756, + "grad_norm": 801.2661743164062, + "learning_rate": 1.0680000000000002e-06, + "loss": 13702.7859, + "step": 5340 + }, + { + "epoch": 0.010807338485841377, + "grad_norm": 5510.7294921875, + "learning_rate": 1.0700000000000001e-06, + "loss": 4377.9586, + "step": 5350 + }, + { + "epoch": 0.010827539118525192, + "grad_norm": 3436.544921875, + "learning_rate": 1.072e-06, + "loss": 47433.8531, + "step": 5360 + }, + { + "epoch": 0.010847739751209009, + "grad_norm": 1919.5628662109375, + "learning_rate": 1.074e-06, + "loss": 18344.2562, + "step": 5370 + }, + { + "epoch": 0.010867940383892823, + "grad_norm": 143652.421875, + "learning_rate": 1.0760000000000002e-06, + "loss": 31453.1781, + "step": 5380 + }, + { + "epoch": 0.01088814101657664, + "grad_norm": 1470.7537841796875, + "learning_rate": 1.078e-06, + "loss": 17771.8156, + "step": 5390 + }, + { + "epoch": 0.010908341649260455, + "grad_norm": 81239.921875, + "learning_rate": 1.08e-06, + "loss": 12848.8188, + "step": 5400 + }, + { + "epoch": 0.010928542281944271, + "grad_norm": 38639.13671875, + "learning_rate": 1.0820000000000002e-06, + "loss": 7559.2953, + "step": 5410 + }, + { + "epoch": 0.010948742914628086, + "grad_norm": 8112.00146484375, + "learning_rate": 1.0840000000000001e-06, + "loss": 18786.9406, + "step": 5420 + }, + { + "epoch": 0.010968943547311902, + "grad_norm": 69353.4453125, + "learning_rate": 1.086e-06, + "loss": 23695.3297, + "step": 5430 + }, + { + "epoch": 0.010989144179995717, + "grad_norm": 11517.8037109375, + "learning_rate": 1.088e-06, + "loss": 9925.0016, + "step": 5440 + }, + { + "epoch": 0.011009344812679534, + "grad_norm": 74804.3203125, + "learning_rate": 1.0900000000000002e-06, + "loss": 10572.9836, + "step": 5450 + }, + { + "epoch": 0.011029545445363349, + "grad_norm": 26380.28515625, + "learning_rate": 1.092e-06, + "loss": 21151.9078, + "step": 5460 + }, + { + "epoch": 0.011049746078047165, + "grad_norm": 1416.6256103515625, + "learning_rate": 1.094e-06, + "loss": 8203.6656, + "step": 5470 + }, + { + "epoch": 0.01106994671073098, + "grad_norm": 160510.890625, + "learning_rate": 1.0960000000000002e-06, + "loss": 21101.95, + "step": 5480 + }, + { + "epoch": 0.011090147343414796, + "grad_norm": 34927.15625, + "learning_rate": 1.0980000000000001e-06, + "loss": 3430.9953, + "step": 5490 + }, + { + "epoch": 0.011110347976098611, + "grad_norm": 15234.2568359375, + "learning_rate": 1.1e-06, + "loss": 12458.0938, + "step": 5500 + }, + { + "epoch": 0.011130548608782428, + "grad_norm": 94129.90625, + "learning_rate": 1.1020000000000002e-06, + "loss": 16887.6672, + "step": 5510 + }, + { + "epoch": 0.011150749241466243, + "grad_norm": 9846.66015625, + "learning_rate": 1.1040000000000001e-06, + "loss": 28645.225, + "step": 5520 + }, + { + "epoch": 0.011170949874150059, + "grad_norm": 31460.53125, + "learning_rate": 1.106e-06, + "loss": 11961.6883, + "step": 5530 + }, + { + "epoch": 0.011191150506833874, + "grad_norm": 82737.9375, + "learning_rate": 1.108e-06, + "loss": 10054.0328, + "step": 5540 + }, + { + "epoch": 0.01121135113951769, + "grad_norm": 66389.5546875, + "learning_rate": 1.1100000000000002e-06, + "loss": 16057.0344, + "step": 5550 + }, + { + "epoch": 0.011231551772201505, + "grad_norm": 3344.353271484375, + "learning_rate": 1.1120000000000001e-06, + "loss": 8793.0539, + "step": 5560 + }, + { + "epoch": 0.011251752404885322, + "grad_norm": 117917.640625, + "learning_rate": 1.114e-06, + "loss": 12484.775, + "step": 5570 + }, + { + "epoch": 0.011271953037569137, + "grad_norm": 3019.362060546875, + "learning_rate": 1.1160000000000002e-06, + "loss": 10219.2508, + "step": 5580 + }, + { + "epoch": 0.011292153670252953, + "grad_norm": 143684.21875, + "learning_rate": 1.1180000000000001e-06, + "loss": 21637.1641, + "step": 5590 + }, + { + "epoch": 0.011312354302936768, + "grad_norm": 251.64846801757812, + "learning_rate": 1.12e-06, + "loss": 12254.525, + "step": 5600 + }, + { + "epoch": 0.011332554935620584, + "grad_norm": 28985.96484375, + "learning_rate": 1.122e-06, + "loss": 15970.0672, + "step": 5610 + }, + { + "epoch": 0.0113527555683044, + "grad_norm": 36913.453125, + "learning_rate": 1.1240000000000002e-06, + "loss": 15036.6437, + "step": 5620 + }, + { + "epoch": 0.011372956200988216, + "grad_norm": 24031.478515625, + "learning_rate": 1.126e-06, + "loss": 7894.4852, + "step": 5630 + }, + { + "epoch": 0.01139315683367203, + "grad_norm": 6255.70263671875, + "learning_rate": 1.128e-06, + "loss": 7917.657, + "step": 5640 + }, + { + "epoch": 0.011413357466355847, + "grad_norm": 207117.984375, + "learning_rate": 1.1300000000000002e-06, + "loss": 25344.075, + "step": 5650 + }, + { + "epoch": 0.011433558099039662, + "grad_norm": 197209.859375, + "learning_rate": 1.1320000000000001e-06, + "loss": 10412.9719, + "step": 5660 + }, + { + "epoch": 0.011453758731723478, + "grad_norm": 48560.23046875, + "learning_rate": 1.134e-06, + "loss": 20832.4672, + "step": 5670 + }, + { + "epoch": 0.011473959364407293, + "grad_norm": 816.8687744140625, + "learning_rate": 1.1360000000000002e-06, + "loss": 10900.3617, + "step": 5680 + }, + { + "epoch": 0.01149415999709111, + "grad_norm": 145682.5, + "learning_rate": 1.1380000000000002e-06, + "loss": 15259.2938, + "step": 5690 + }, + { + "epoch": 0.011514360629774924, + "grad_norm": 11706.9912109375, + "learning_rate": 1.14e-06, + "loss": 5335.4133, + "step": 5700 + }, + { + "epoch": 0.011534561262458741, + "grad_norm": 350.41754150390625, + "learning_rate": 1.142e-06, + "loss": 14387.5734, + "step": 5710 + }, + { + "epoch": 0.011554761895142556, + "grad_norm": 2927.037841796875, + "learning_rate": 1.1440000000000002e-06, + "loss": 24064.5563, + "step": 5720 + }, + { + "epoch": 0.011574962527826372, + "grad_norm": 26759.54296875, + "learning_rate": 1.1460000000000001e-06, + "loss": 13552.6172, + "step": 5730 + }, + { + "epoch": 0.011595163160510187, + "grad_norm": 38653.3125, + "learning_rate": 1.148e-06, + "loss": 15552.3547, + "step": 5740 + }, + { + "epoch": 0.011615363793194004, + "grad_norm": 7402.4599609375, + "learning_rate": 1.1500000000000002e-06, + "loss": 4720.9914, + "step": 5750 + }, + { + "epoch": 0.011635564425877818, + "grad_norm": 18212.04296875, + "learning_rate": 1.1520000000000002e-06, + "loss": 15217.6609, + "step": 5760 + }, + { + "epoch": 0.011655765058561635, + "grad_norm": 3391.678466796875, + "learning_rate": 1.154e-06, + "loss": 10566.6648, + "step": 5770 + }, + { + "epoch": 0.01167596569124545, + "grad_norm": 45592.8515625, + "learning_rate": 1.156e-06, + "loss": 20316.5828, + "step": 5780 + }, + { + "epoch": 0.011696166323929266, + "grad_norm": 165522.96875, + "learning_rate": 1.1580000000000002e-06, + "loss": 24717.0828, + "step": 5790 + }, + { + "epoch": 0.011716366956613081, + "grad_norm": 1959.2056884765625, + "learning_rate": 1.1600000000000001e-06, + "loss": 15901.0234, + "step": 5800 + }, + { + "epoch": 0.011736567589296898, + "grad_norm": 10222.0068359375, + "learning_rate": 1.162e-06, + "loss": 26663.025, + "step": 5810 + }, + { + "epoch": 0.011756768221980712, + "grad_norm": 14248.8837890625, + "learning_rate": 1.1640000000000002e-06, + "loss": 6976.7758, + "step": 5820 + }, + { + "epoch": 0.011776968854664529, + "grad_norm": 3360.869384765625, + "learning_rate": 1.1660000000000001e-06, + "loss": 14049.3172, + "step": 5830 + }, + { + "epoch": 0.011797169487348344, + "grad_norm": 3128.62255859375, + "learning_rate": 1.168e-06, + "loss": 5973.1, + "step": 5840 + }, + { + "epoch": 0.01181737012003216, + "grad_norm": 72855.3984375, + "learning_rate": 1.1700000000000002e-06, + "loss": 11630.7953, + "step": 5850 + }, + { + "epoch": 0.011837570752715975, + "grad_norm": 14373.0810546875, + "learning_rate": 1.1720000000000002e-06, + "loss": 7168.2812, + "step": 5860 + }, + { + "epoch": 0.011857771385399792, + "grad_norm": 166529.609375, + "learning_rate": 1.1740000000000001e-06, + "loss": 21492.2031, + "step": 5870 + }, + { + "epoch": 0.011877972018083606, + "grad_norm": 13754.033203125, + "learning_rate": 1.176e-06, + "loss": 7930.2063, + "step": 5880 + }, + { + "epoch": 0.011898172650767423, + "grad_norm": 1858.7001953125, + "learning_rate": 1.1780000000000002e-06, + "loss": 11885.7805, + "step": 5890 + }, + { + "epoch": 0.011918373283451238, + "grad_norm": 182396.09375, + "learning_rate": 1.1800000000000001e-06, + "loss": 49554.0062, + "step": 5900 + }, + { + "epoch": 0.011938573916135054, + "grad_norm": 3051.433837890625, + "learning_rate": 1.182e-06, + "loss": 26190.2234, + "step": 5910 + }, + { + "epoch": 0.011958774548818869, + "grad_norm": 214122.203125, + "learning_rate": 1.1840000000000002e-06, + "loss": 15595.1734, + "step": 5920 + }, + { + "epoch": 0.011978975181502685, + "grad_norm": 15737.2900390625, + "learning_rate": 1.1860000000000002e-06, + "loss": 20354.6188, + "step": 5930 + }, + { + "epoch": 0.0119991758141865, + "grad_norm": 749443.6875, + "learning_rate": 1.188e-06, + "loss": 35290.1594, + "step": 5940 + }, + { + "epoch": 0.012019376446870317, + "grad_norm": 10347.375, + "learning_rate": 1.19e-06, + "loss": 21005.275, + "step": 5950 + }, + { + "epoch": 0.012039577079554132, + "grad_norm": 3748.6337890625, + "learning_rate": 1.1920000000000002e-06, + "loss": 20068.0781, + "step": 5960 + }, + { + "epoch": 0.012059777712237948, + "grad_norm": 22553.638671875, + "learning_rate": 1.1940000000000001e-06, + "loss": 13706.4297, + "step": 5970 + }, + { + "epoch": 0.012079978344921763, + "grad_norm": 21190.9609375, + "learning_rate": 1.196e-06, + "loss": 20361.0281, + "step": 5980 + }, + { + "epoch": 0.01210017897760558, + "grad_norm": 9108.6171875, + "learning_rate": 1.1980000000000002e-06, + "loss": 24575.3453, + "step": 5990 + }, + { + "epoch": 0.012120379610289394, + "grad_norm": 4027.6748046875, + "learning_rate": 1.2000000000000002e-06, + "loss": 5930.3949, + "step": 6000 + }, + { + "epoch": 0.01214058024297321, + "grad_norm": 267680.75, + "learning_rate": 1.202e-06, + "loss": 14484.9406, + "step": 6010 + }, + { + "epoch": 0.012160780875657026, + "grad_norm": 90715.2421875, + "learning_rate": 1.204e-06, + "loss": 16868.6469, + "step": 6020 + }, + { + "epoch": 0.012180981508340842, + "grad_norm": 14685.91796875, + "learning_rate": 1.2060000000000002e-06, + "loss": 4984.3938, + "step": 6030 + }, + { + "epoch": 0.012201182141024657, + "grad_norm": 1735.1435546875, + "learning_rate": 1.2080000000000001e-06, + "loss": 7450.3727, + "step": 6040 + }, + { + "epoch": 0.012221382773708473, + "grad_norm": 13322.9169921875, + "learning_rate": 1.21e-06, + "loss": 17728.1766, + "step": 6050 + }, + { + "epoch": 0.012241583406392288, + "grad_norm": 64941.97265625, + "learning_rate": 1.2120000000000002e-06, + "loss": 17426.5281, + "step": 6060 + }, + { + "epoch": 0.012261784039076105, + "grad_norm": 30488.32421875, + "learning_rate": 1.214e-06, + "loss": 37703.3906, + "step": 6070 + }, + { + "epoch": 0.01228198467175992, + "grad_norm": 12084.6201171875, + "learning_rate": 1.216e-06, + "loss": 20386.4953, + "step": 6080 + }, + { + "epoch": 0.012302185304443734, + "grad_norm": 1851.4454345703125, + "learning_rate": 1.2180000000000002e-06, + "loss": 13418.0297, + "step": 6090 + }, + { + "epoch": 0.01232238593712755, + "grad_norm": 4988.72509765625, + "learning_rate": 1.2200000000000002e-06, + "loss": 23210.8531, + "step": 6100 + }, + { + "epoch": 0.012342586569811366, + "grad_norm": 42636.3125, + "learning_rate": 1.2220000000000001e-06, + "loss": 18765.2844, + "step": 6110 + }, + { + "epoch": 0.012362787202495182, + "grad_norm": 62963.3125, + "learning_rate": 1.224e-06, + "loss": 19066.4516, + "step": 6120 + }, + { + "epoch": 0.012382987835178997, + "grad_norm": 247887.890625, + "learning_rate": 1.2260000000000002e-06, + "loss": 13484.4, + "step": 6130 + }, + { + "epoch": 0.012403188467862813, + "grad_norm": 96677.53125, + "learning_rate": 1.2280000000000001e-06, + "loss": 11780.5477, + "step": 6140 + }, + { + "epoch": 0.012423389100546628, + "grad_norm": 0.0, + "learning_rate": 1.23e-06, + "loss": 6985.193, + "step": 6150 + }, + { + "epoch": 0.012443589733230445, + "grad_norm": 73377.5703125, + "learning_rate": 1.2320000000000002e-06, + "loss": 9011.6391, + "step": 6160 + }, + { + "epoch": 0.01246379036591426, + "grad_norm": 4710.33154296875, + "learning_rate": 1.234e-06, + "loss": 19982.975, + "step": 6170 + }, + { + "epoch": 0.012483990998598076, + "grad_norm": 159648.59375, + "learning_rate": 1.2360000000000001e-06, + "loss": 17345.0922, + "step": 6180 + }, + { + "epoch": 0.012504191631281891, + "grad_norm": 1952.4061279296875, + "learning_rate": 1.238e-06, + "loss": 23448.3859, + "step": 6190 + }, + { + "epoch": 0.012524392263965707, + "grad_norm": 2572.1171875, + "learning_rate": 1.2400000000000002e-06, + "loss": 17860.4375, + "step": 6200 + }, + { + "epoch": 0.012544592896649522, + "grad_norm": 78835.7265625, + "learning_rate": 1.2420000000000001e-06, + "loss": 15290.1266, + "step": 6210 + }, + { + "epoch": 0.012564793529333339, + "grad_norm": 25783.2421875, + "learning_rate": 1.244e-06, + "loss": 22456.5703, + "step": 6220 + }, + { + "epoch": 0.012584994162017154, + "grad_norm": 418482.8125, + "learning_rate": 1.2460000000000002e-06, + "loss": 37077.075, + "step": 6230 + }, + { + "epoch": 0.01260519479470097, + "grad_norm": 4121.7490234375, + "learning_rate": 1.248e-06, + "loss": 13426.9656, + "step": 6240 + }, + { + "epoch": 0.012625395427384785, + "grad_norm": 11838.5595703125, + "learning_rate": 1.25e-06, + "loss": 10688.6336, + "step": 6250 + }, + { + "epoch": 0.012645596060068601, + "grad_norm": 21941.9296875, + "learning_rate": 1.2520000000000003e-06, + "loss": 11199.6766, + "step": 6260 + }, + { + "epoch": 0.012665796692752416, + "grad_norm": 7407.28662109375, + "learning_rate": 1.2540000000000002e-06, + "loss": 9169.3578, + "step": 6270 + }, + { + "epoch": 0.012685997325436233, + "grad_norm": 12334.8203125, + "learning_rate": 1.256e-06, + "loss": 16464.9219, + "step": 6280 + }, + { + "epoch": 0.012706197958120048, + "grad_norm": 1572.96044921875, + "learning_rate": 1.258e-06, + "loss": 14761.9625, + "step": 6290 + }, + { + "epoch": 0.012726398590803864, + "grad_norm": 382818.21875, + "learning_rate": 1.26e-06, + "loss": 23616.1531, + "step": 6300 + }, + { + "epoch": 0.012746599223487679, + "grad_norm": 49837.37109375, + "learning_rate": 1.2620000000000002e-06, + "loss": 15748.5203, + "step": 6310 + }, + { + "epoch": 0.012766799856171495, + "grad_norm": 89261.2890625, + "learning_rate": 1.2640000000000003e-06, + "loss": 8609.2336, + "step": 6320 + }, + { + "epoch": 0.01278700048885531, + "grad_norm": 21076.07421875, + "learning_rate": 1.266e-06, + "loss": 7949.8883, + "step": 6330 + }, + { + "epoch": 0.012807201121539127, + "grad_norm": 24423.455078125, + "learning_rate": 1.268e-06, + "loss": 7817.7094, + "step": 6340 + }, + { + "epoch": 0.012827401754222941, + "grad_norm": 6075.142578125, + "learning_rate": 1.2700000000000001e-06, + "loss": 27769.4875, + "step": 6350 + }, + { + "epoch": 0.012847602386906758, + "grad_norm": 29507.548828125, + "learning_rate": 1.2720000000000003e-06, + "loss": 16610.1031, + "step": 6360 + }, + { + "epoch": 0.012867803019590573, + "grad_norm": 1511.0750732421875, + "learning_rate": 1.2740000000000002e-06, + "loss": 7655.9859, + "step": 6370 + }, + { + "epoch": 0.01288800365227439, + "grad_norm": 14754.13671875, + "learning_rate": 1.276e-06, + "loss": 11080.6281, + "step": 6380 + }, + { + "epoch": 0.012908204284958204, + "grad_norm": 110098.3359375, + "learning_rate": 1.278e-06, + "loss": 13328.0203, + "step": 6390 + }, + { + "epoch": 0.01292840491764202, + "grad_norm": 257890.890625, + "learning_rate": 1.28e-06, + "loss": 25792.875, + "step": 6400 + }, + { + "epoch": 0.012948605550325835, + "grad_norm": 46371.46875, + "learning_rate": 1.2820000000000002e-06, + "loss": 17696.4047, + "step": 6410 + }, + { + "epoch": 0.012968806183009652, + "grad_norm": 35709.03515625, + "learning_rate": 1.284e-06, + "loss": 15143.7578, + "step": 6420 + }, + { + "epoch": 0.012989006815693467, + "grad_norm": 225.7156982421875, + "learning_rate": 1.286e-06, + "loss": 11281.5453, + "step": 6430 + }, + { + "epoch": 0.013009207448377283, + "grad_norm": 2728.095458984375, + "learning_rate": 1.288e-06, + "loss": 9664.7008, + "step": 6440 + }, + { + "epoch": 0.013029408081061098, + "grad_norm": 51334.125, + "learning_rate": 1.2900000000000001e-06, + "loss": 18704.9297, + "step": 6450 + }, + { + "epoch": 0.013049608713744915, + "grad_norm": 37199.16015625, + "learning_rate": 1.2920000000000003e-06, + "loss": 15023.9516, + "step": 6460 + }, + { + "epoch": 0.01306980934642873, + "grad_norm": 1387.2667236328125, + "learning_rate": 1.294e-06, + "loss": 17621.2625, + "step": 6470 + }, + { + "epoch": 0.013090009979112546, + "grad_norm": 4743.09814453125, + "learning_rate": 1.296e-06, + "loss": 22846.9313, + "step": 6480 + }, + { + "epoch": 0.01311021061179636, + "grad_norm": 16740.259765625, + "learning_rate": 1.2980000000000001e-06, + "loss": 17491.2453, + "step": 6490 + }, + { + "epoch": 0.013130411244480177, + "grad_norm": 6913.3427734375, + "learning_rate": 1.3e-06, + "loss": 25299.5297, + "step": 6500 + }, + { + "epoch": 0.013150611877163992, + "grad_norm": 41665.5390625, + "learning_rate": 1.3020000000000002e-06, + "loss": 18500.2297, + "step": 6510 + }, + { + "epoch": 0.013170812509847809, + "grad_norm": 129957.6796875, + "learning_rate": 1.304e-06, + "loss": 15455.4641, + "step": 6520 + }, + { + "epoch": 0.013191013142531623, + "grad_norm": 7750.5439453125, + "learning_rate": 1.306e-06, + "loss": 33675.6188, + "step": 6530 + }, + { + "epoch": 0.01321121377521544, + "grad_norm": 2216.72119140625, + "learning_rate": 1.308e-06, + "loss": 7992.0656, + "step": 6540 + }, + { + "epoch": 0.013231414407899255, + "grad_norm": 8618.84375, + "learning_rate": 1.3100000000000002e-06, + "loss": 13772.6328, + "step": 6550 + }, + { + "epoch": 0.013251615040583071, + "grad_norm": 8929.0537109375, + "learning_rate": 1.3120000000000003e-06, + "loss": 29050.0375, + "step": 6560 + }, + { + "epoch": 0.013271815673266886, + "grad_norm": 159729.421875, + "learning_rate": 1.314e-06, + "loss": 44360.8875, + "step": 6570 + }, + { + "epoch": 0.013292016305950703, + "grad_norm": 408752.625, + "learning_rate": 1.316e-06, + "loss": 16158.2484, + "step": 6580 + }, + { + "epoch": 0.013312216938634517, + "grad_norm": 58536.109375, + "learning_rate": 1.3180000000000001e-06, + "loss": 19676.15, + "step": 6590 + }, + { + "epoch": 0.013332417571318334, + "grad_norm": 11471.8447265625, + "learning_rate": 1.32e-06, + "loss": 19829.3031, + "step": 6600 + }, + { + "epoch": 0.013352618204002149, + "grad_norm": 1333.2705078125, + "learning_rate": 1.3220000000000002e-06, + "loss": 12406.9906, + "step": 6610 + }, + { + "epoch": 0.013372818836685965, + "grad_norm": 10976.0458984375, + "learning_rate": 1.324e-06, + "loss": 32106.8781, + "step": 6620 + }, + { + "epoch": 0.01339301946936978, + "grad_norm": 11658.36328125, + "learning_rate": 1.326e-06, + "loss": 7945.1102, + "step": 6630 + }, + { + "epoch": 0.013413220102053596, + "grad_norm": 5023.14111328125, + "learning_rate": 1.328e-06, + "loss": 24616.5484, + "step": 6640 + }, + { + "epoch": 0.013433420734737411, + "grad_norm": 52339.8359375, + "learning_rate": 1.3300000000000002e-06, + "loss": 11980.3891, + "step": 6650 + }, + { + "epoch": 0.013453621367421228, + "grad_norm": 158755.59375, + "learning_rate": 1.3320000000000003e-06, + "loss": 19977.7547, + "step": 6660 + }, + { + "epoch": 0.013473822000105043, + "grad_norm": 5178.3583984375, + "learning_rate": 1.334e-06, + "loss": 13749.9656, + "step": 6670 + }, + { + "epoch": 0.013494022632788859, + "grad_norm": 20589.2734375, + "learning_rate": 1.336e-06, + "loss": 30086.0969, + "step": 6680 + }, + { + "epoch": 0.013514223265472674, + "grad_norm": 5240.51708984375, + "learning_rate": 1.3380000000000001e-06, + "loss": 5454.3859, + "step": 6690 + }, + { + "epoch": 0.01353442389815649, + "grad_norm": 44156.46484375, + "learning_rate": 1.34e-06, + "loss": 12539.0867, + "step": 6700 + }, + { + "epoch": 0.013554624530840305, + "grad_norm": 6263.9765625, + "learning_rate": 1.3420000000000002e-06, + "loss": 8781.8695, + "step": 6710 + }, + { + "epoch": 0.013574825163524122, + "grad_norm": 28901.16015625, + "learning_rate": 1.344e-06, + "loss": 6029.6641, + "step": 6720 + }, + { + "epoch": 0.013595025796207937, + "grad_norm": 7473.57080078125, + "learning_rate": 1.3460000000000001e-06, + "loss": 26680.2687, + "step": 6730 + }, + { + "epoch": 0.013615226428891753, + "grad_norm": 32746.974609375, + "learning_rate": 1.348e-06, + "loss": 8930.3992, + "step": 6740 + }, + { + "epoch": 0.013635427061575568, + "grad_norm": 1488.567626953125, + "learning_rate": 1.3500000000000002e-06, + "loss": 3920.2133, + "step": 6750 + }, + { + "epoch": 0.013655627694259384, + "grad_norm": 73434.2890625, + "learning_rate": 1.352e-06, + "loss": 7468.3781, + "step": 6760 + }, + { + "epoch": 0.0136758283269432, + "grad_norm": 224274.515625, + "learning_rate": 1.354e-06, + "loss": 16628.8734, + "step": 6770 + }, + { + "epoch": 0.013696028959627016, + "grad_norm": 188551.71875, + "learning_rate": 1.356e-06, + "loss": 14320.4875, + "step": 6780 + }, + { + "epoch": 0.01371622959231083, + "grad_norm": 43169.09375, + "learning_rate": 1.3580000000000002e-06, + "loss": 6269.452, + "step": 6790 + }, + { + "epoch": 0.013736430224994647, + "grad_norm": 12901.5439453125, + "learning_rate": 1.3600000000000001e-06, + "loss": 11890.9984, + "step": 6800 + }, + { + "epoch": 0.013756630857678462, + "grad_norm": 35727.2421875, + "learning_rate": 1.362e-06, + "loss": 30316.2, + "step": 6810 + }, + { + "epoch": 0.013776831490362278, + "grad_norm": 108671.140625, + "learning_rate": 1.364e-06, + "loss": 6676.0016, + "step": 6820 + }, + { + "epoch": 0.013797032123046093, + "grad_norm": 19104.642578125, + "learning_rate": 1.3660000000000001e-06, + "loss": 16240.7812, + "step": 6830 + }, + { + "epoch": 0.01381723275572991, + "grad_norm": 121576.3828125, + "learning_rate": 1.368e-06, + "loss": 32759.2, + "step": 6840 + }, + { + "epoch": 0.013837433388413724, + "grad_norm": 78405.1875, + "learning_rate": 1.3700000000000002e-06, + "loss": 19409.2531, + "step": 6850 + }, + { + "epoch": 0.013857634021097541, + "grad_norm": 55331.2109375, + "learning_rate": 1.372e-06, + "loss": 17641.3125, + "step": 6860 + }, + { + "epoch": 0.013877834653781356, + "grad_norm": 3504.864013671875, + "learning_rate": 1.374e-06, + "loss": 5042.4816, + "step": 6870 + }, + { + "epoch": 0.013898035286465172, + "grad_norm": 26177.78515625, + "learning_rate": 1.376e-06, + "loss": 8298.3203, + "step": 6880 + }, + { + "epoch": 0.013918235919148987, + "grad_norm": 41438.36328125, + "learning_rate": 1.3780000000000002e-06, + "loss": 24058.5641, + "step": 6890 + }, + { + "epoch": 0.013938436551832804, + "grad_norm": 30273.14453125, + "learning_rate": 1.3800000000000001e-06, + "loss": 13656.9859, + "step": 6900 + }, + { + "epoch": 0.013958637184516618, + "grad_norm": 83375.203125, + "learning_rate": 1.382e-06, + "loss": 11249.5922, + "step": 6910 + }, + { + "epoch": 0.013978837817200435, + "grad_norm": 13275.79296875, + "learning_rate": 1.384e-06, + "loss": 27667.6937, + "step": 6920 + }, + { + "epoch": 0.01399903844988425, + "grad_norm": 17866.685546875, + "learning_rate": 1.3860000000000002e-06, + "loss": 7108.9875, + "step": 6930 + }, + { + "epoch": 0.014019239082568066, + "grad_norm": 188594.28125, + "learning_rate": 1.388e-06, + "loss": 13781.5469, + "step": 6940 + }, + { + "epoch": 0.014039439715251881, + "grad_norm": 19604.240234375, + "learning_rate": 1.3900000000000002e-06, + "loss": 7253.8102, + "step": 6950 + }, + { + "epoch": 0.014059640347935698, + "grad_norm": 293817.34375, + "learning_rate": 1.392e-06, + "loss": 27029.2938, + "step": 6960 + }, + { + "epoch": 0.014079840980619512, + "grad_norm": 60799.51953125, + "learning_rate": 1.3940000000000001e-06, + "loss": 17216.0406, + "step": 6970 + }, + { + "epoch": 0.014100041613303329, + "grad_norm": 125074.8203125, + "learning_rate": 1.396e-06, + "loss": 11996.9945, + "step": 6980 + }, + { + "epoch": 0.014120242245987144, + "grad_norm": 4032.80712890625, + "learning_rate": 1.3980000000000002e-06, + "loss": 5477.7262, + "step": 6990 + }, + { + "epoch": 0.01414044287867096, + "grad_norm": 2326.66845703125, + "learning_rate": 1.4000000000000001e-06, + "loss": 8423.6234, + "step": 7000 + }, + { + "epoch": 0.014160643511354775, + "grad_norm": 51620.51171875, + "learning_rate": 1.402e-06, + "loss": 8680.4219, + "step": 7010 + }, + { + "epoch": 0.014180844144038592, + "grad_norm": 67504.9296875, + "learning_rate": 1.404e-06, + "loss": 29956.8563, + "step": 7020 + }, + { + "epoch": 0.014201044776722406, + "grad_norm": 24694.431640625, + "learning_rate": 1.4060000000000002e-06, + "loss": 12818.1289, + "step": 7030 + }, + { + "epoch": 0.014221245409406223, + "grad_norm": 23701.880859375, + "learning_rate": 1.4080000000000001e-06, + "loss": 15917.1359, + "step": 7040 + }, + { + "epoch": 0.014241446042090038, + "grad_norm": 31768.447265625, + "learning_rate": 1.41e-06, + "loss": 8356.0961, + "step": 7050 + }, + { + "epoch": 0.014261646674773854, + "grad_norm": 28170.0625, + "learning_rate": 1.412e-06, + "loss": 6661.2977, + "step": 7060 + }, + { + "epoch": 0.014281847307457669, + "grad_norm": 22834.1640625, + "learning_rate": 1.4140000000000001e-06, + "loss": 10985.0828, + "step": 7070 + }, + { + "epoch": 0.014302047940141486, + "grad_norm": 65524.83203125, + "learning_rate": 1.416e-06, + "loss": 15868.0234, + "step": 7080 + }, + { + "epoch": 0.0143222485728253, + "grad_norm": 4871.06103515625, + "learning_rate": 1.4180000000000002e-06, + "loss": 7731.0797, + "step": 7090 + }, + { + "epoch": 0.014342449205509117, + "grad_norm": 46414.8046875, + "learning_rate": 1.42e-06, + "loss": 7633.2797, + "step": 7100 + }, + { + "epoch": 0.014362649838192932, + "grad_norm": 124104.984375, + "learning_rate": 1.4220000000000001e-06, + "loss": 31059.8219, + "step": 7110 + }, + { + "epoch": 0.014382850470876748, + "grad_norm": 5895.1884765625, + "learning_rate": 1.424e-06, + "loss": 11081.7563, + "step": 7120 + }, + { + "epoch": 0.014403051103560563, + "grad_norm": 1388.494873046875, + "learning_rate": 1.4260000000000002e-06, + "loss": 11112.9977, + "step": 7130 + }, + { + "epoch": 0.01442325173624438, + "grad_norm": 45176.203125, + "learning_rate": 1.4280000000000001e-06, + "loss": 8333.868, + "step": 7140 + }, + { + "epoch": 0.014443452368928194, + "grad_norm": 30283.33984375, + "learning_rate": 1.43e-06, + "loss": 2436.277, + "step": 7150 + }, + { + "epoch": 0.01446365300161201, + "grad_norm": 22589.083984375, + "learning_rate": 1.432e-06, + "loss": 9638.2047, + "step": 7160 + }, + { + "epoch": 0.014483853634295826, + "grad_norm": 26312.59765625, + "learning_rate": 1.4340000000000002e-06, + "loss": 19552.5125, + "step": 7170 + }, + { + "epoch": 0.014504054266979642, + "grad_norm": 1388.655517578125, + "learning_rate": 1.436e-06, + "loss": 9620.9484, + "step": 7180 + }, + { + "epoch": 0.014524254899663457, + "grad_norm": 9867.5849609375, + "learning_rate": 1.4380000000000003e-06, + "loss": 13561.1281, + "step": 7190 + }, + { + "epoch": 0.014544455532347273, + "grad_norm": 7633.67431640625, + "learning_rate": 1.44e-06, + "loss": 17560.9281, + "step": 7200 + }, + { + "epoch": 0.014564656165031088, + "grad_norm": 144.49697875976562, + "learning_rate": 1.4420000000000001e-06, + "loss": 11014.3383, + "step": 7210 + }, + { + "epoch": 0.014584856797714905, + "grad_norm": 69013.3125, + "learning_rate": 1.444e-06, + "loss": 17344.5062, + "step": 7220 + }, + { + "epoch": 0.01460505743039872, + "grad_norm": 173776.703125, + "learning_rate": 1.4460000000000002e-06, + "loss": 15244.2141, + "step": 7230 + }, + { + "epoch": 0.014625258063082536, + "grad_norm": 21598.5078125, + "learning_rate": 1.4480000000000002e-06, + "loss": 5885.6738, + "step": 7240 + }, + { + "epoch": 0.014645458695766351, + "grad_norm": 3801.53173828125, + "learning_rate": 1.45e-06, + "loss": 7521.0133, + "step": 7250 + }, + { + "epoch": 0.014665659328450167, + "grad_norm": 6827.3330078125, + "learning_rate": 1.452e-06, + "loss": 11458.1336, + "step": 7260 + }, + { + "epoch": 0.014685859961133982, + "grad_norm": 61073.86328125, + "learning_rate": 1.4540000000000002e-06, + "loss": 20056.6922, + "step": 7270 + }, + { + "epoch": 0.014706060593817799, + "grad_norm": 63308.8203125, + "learning_rate": 1.4560000000000001e-06, + "loss": 25891.0766, + "step": 7280 + }, + { + "epoch": 0.014726261226501614, + "grad_norm": 6393.193359375, + "learning_rate": 1.4580000000000003e-06, + "loss": 7875.7398, + "step": 7290 + }, + { + "epoch": 0.01474646185918543, + "grad_norm": 33021.26953125, + "learning_rate": 1.46e-06, + "loss": 19021.1219, + "step": 7300 + }, + { + "epoch": 0.014766662491869245, + "grad_norm": 19365.966796875, + "learning_rate": 1.4620000000000001e-06, + "loss": 20090.9328, + "step": 7310 + }, + { + "epoch": 0.014786863124553061, + "grad_norm": 1474.2152099609375, + "learning_rate": 1.464e-06, + "loss": 20874.2984, + "step": 7320 + }, + { + "epoch": 0.014807063757236876, + "grad_norm": 55864.90625, + "learning_rate": 1.4660000000000002e-06, + "loss": 14054.9344, + "step": 7330 + }, + { + "epoch": 0.014827264389920693, + "grad_norm": 2079.494384765625, + "learning_rate": 1.4680000000000002e-06, + "loss": 25079.7266, + "step": 7340 + }, + { + "epoch": 0.014847465022604507, + "grad_norm": 16635.931640625, + "learning_rate": 1.4700000000000001e-06, + "loss": 12540.3328, + "step": 7350 + }, + { + "epoch": 0.014867665655288324, + "grad_norm": 405221.125, + "learning_rate": 1.472e-06, + "loss": 25279.8187, + "step": 7360 + }, + { + "epoch": 0.014887866287972139, + "grad_norm": 382.17193603515625, + "learning_rate": 1.4740000000000002e-06, + "loss": 10247.4531, + "step": 7370 + }, + { + "epoch": 0.014908066920655955, + "grad_norm": 19988.833984375, + "learning_rate": 1.4760000000000001e-06, + "loss": 30705.3812, + "step": 7380 + }, + { + "epoch": 0.01492826755333977, + "grad_norm": 180768.6875, + "learning_rate": 1.478e-06, + "loss": 12860.8188, + "step": 7390 + }, + { + "epoch": 0.014948468186023587, + "grad_norm": 186730.90625, + "learning_rate": 1.48e-06, + "loss": 20007.8203, + "step": 7400 + }, + { + "epoch": 0.014968668818707401, + "grad_norm": 8008.4248046875, + "learning_rate": 1.4820000000000002e-06, + "loss": 23303.5594, + "step": 7410 + }, + { + "epoch": 0.014988869451391218, + "grad_norm": 2948.936767578125, + "learning_rate": 1.4840000000000001e-06, + "loss": 10149.6219, + "step": 7420 + }, + { + "epoch": 0.015009070084075033, + "grad_norm": 2840.257568359375, + "learning_rate": 1.4860000000000003e-06, + "loss": 11405.1164, + "step": 7430 + }, + { + "epoch": 0.01502927071675885, + "grad_norm": 3011.02734375, + "learning_rate": 1.488e-06, + "loss": 8638.1406, + "step": 7440 + }, + { + "epoch": 0.015049471349442664, + "grad_norm": 17134.943359375, + "learning_rate": 1.4900000000000001e-06, + "loss": 14044.4188, + "step": 7450 + }, + { + "epoch": 0.01506967198212648, + "grad_norm": 9995.5517578125, + "learning_rate": 1.492e-06, + "loss": 12212.1773, + "step": 7460 + }, + { + "epoch": 0.015089872614810295, + "grad_norm": 4345.95361328125, + "learning_rate": 1.4940000000000002e-06, + "loss": 10979.0977, + "step": 7470 + }, + { + "epoch": 0.015110073247494112, + "grad_norm": 46439.84765625, + "learning_rate": 1.4960000000000002e-06, + "loss": 21017.4344, + "step": 7480 + }, + { + "epoch": 0.015130273880177927, + "grad_norm": 8257.5185546875, + "learning_rate": 1.498e-06, + "loss": 23491.0875, + "step": 7490 + }, + { + "epoch": 0.015150474512861743, + "grad_norm": 12976.3017578125, + "learning_rate": 1.5e-06, + "loss": 34219.1406, + "step": 7500 + }, + { + "epoch": 0.015170675145545558, + "grad_norm": 27194.572265625, + "learning_rate": 1.5020000000000002e-06, + "loss": 12687.4937, + "step": 7510 + }, + { + "epoch": 0.015190875778229375, + "grad_norm": 18502.626953125, + "learning_rate": 1.5040000000000001e-06, + "loss": 8749.1641, + "step": 7520 + }, + { + "epoch": 0.01521107641091319, + "grad_norm": 22476.64453125, + "learning_rate": 1.5060000000000003e-06, + "loss": 22952.5922, + "step": 7530 + }, + { + "epoch": 0.015231277043597006, + "grad_norm": 270217.84375, + "learning_rate": 1.508e-06, + "loss": 14447.0266, + "step": 7540 + }, + { + "epoch": 0.01525147767628082, + "grad_norm": 95372.5546875, + "learning_rate": 1.5100000000000002e-06, + "loss": 7640.693, + "step": 7550 + }, + { + "epoch": 0.015271678308964637, + "grad_norm": 47438.609375, + "learning_rate": 1.512e-06, + "loss": 14052.2797, + "step": 7560 + }, + { + "epoch": 0.015291878941648452, + "grad_norm": 177834.875, + "learning_rate": 1.5140000000000002e-06, + "loss": 26431.3281, + "step": 7570 + }, + { + "epoch": 0.015312079574332269, + "grad_norm": 206648.859375, + "learning_rate": 1.5160000000000002e-06, + "loss": 13817.6625, + "step": 7580 + }, + { + "epoch": 0.015332280207016083, + "grad_norm": 79978.65625, + "learning_rate": 1.5180000000000001e-06, + "loss": 6433.2492, + "step": 7590 + }, + { + "epoch": 0.0153524808396999, + "grad_norm": 47938.13671875, + "learning_rate": 1.52e-06, + "loss": 6174.0719, + "step": 7600 + }, + { + "epoch": 0.015372681472383715, + "grad_norm": 379534.75, + "learning_rate": 1.5220000000000002e-06, + "loss": 25185.2672, + "step": 7610 + }, + { + "epoch": 0.015392882105067531, + "grad_norm": 6644.5771484375, + "learning_rate": 1.5240000000000001e-06, + "loss": 10203.5469, + "step": 7620 + }, + { + "epoch": 0.015413082737751346, + "grad_norm": 6582.5693359375, + "learning_rate": 1.5260000000000003e-06, + "loss": 19420.2906, + "step": 7630 + }, + { + "epoch": 0.015433283370435162, + "grad_norm": 9998.0859375, + "learning_rate": 1.528e-06, + "loss": 5107.3813, + "step": 7640 + }, + { + "epoch": 0.015453484003118977, + "grad_norm": 8445.6923828125, + "learning_rate": 1.5300000000000002e-06, + "loss": 13606.1266, + "step": 7650 + }, + { + "epoch": 0.015473684635802794, + "grad_norm": 94890.8125, + "learning_rate": 1.5320000000000001e-06, + "loss": 25219.6234, + "step": 7660 + }, + { + "epoch": 0.015493885268486609, + "grad_norm": 8073.33837890625, + "learning_rate": 1.5340000000000003e-06, + "loss": 7977.2563, + "step": 7670 + }, + { + "epoch": 0.015514085901170425, + "grad_norm": 387521.25, + "learning_rate": 1.536e-06, + "loss": 36563.9437, + "step": 7680 + }, + { + "epoch": 0.01553428653385424, + "grad_norm": 45709.1171875, + "learning_rate": 1.5380000000000001e-06, + "loss": 15488.3156, + "step": 7690 + }, + { + "epoch": 0.015554487166538056, + "grad_norm": 48193.35546875, + "learning_rate": 1.54e-06, + "loss": 13242.0672, + "step": 7700 + }, + { + "epoch": 0.015574687799221871, + "grad_norm": 3012.248046875, + "learning_rate": 1.5420000000000002e-06, + "loss": 11054.8766, + "step": 7710 + }, + { + "epoch": 0.015594888431905688, + "grad_norm": 26581.96875, + "learning_rate": 1.5440000000000002e-06, + "loss": 20315.4531, + "step": 7720 + }, + { + "epoch": 0.015615089064589503, + "grad_norm": 707524.4375, + "learning_rate": 1.546e-06, + "loss": 24993.6875, + "step": 7730 + }, + { + "epoch": 0.01563528969727332, + "grad_norm": 3509.196533203125, + "learning_rate": 1.548e-06, + "loss": 5891.5637, + "step": 7740 + }, + { + "epoch": 0.015655490329957136, + "grad_norm": 69065.359375, + "learning_rate": 1.5500000000000002e-06, + "loss": 11127.1281, + "step": 7750 + }, + { + "epoch": 0.01567569096264095, + "grad_norm": 4999.98828125, + "learning_rate": 1.5520000000000001e-06, + "loss": 17136.7109, + "step": 7760 + }, + { + "epoch": 0.015695891595324765, + "grad_norm": 27204.638671875, + "learning_rate": 1.5540000000000003e-06, + "loss": 25995.8594, + "step": 7770 + }, + { + "epoch": 0.01571609222800858, + "grad_norm": 59627.609375, + "learning_rate": 1.556e-06, + "loss": 8510.8297, + "step": 7780 + }, + { + "epoch": 0.015736292860692398, + "grad_norm": 62858.0703125, + "learning_rate": 1.5580000000000002e-06, + "loss": 20550.3797, + "step": 7790 + }, + { + "epoch": 0.01575649349337621, + "grad_norm": 59877.54296875, + "learning_rate": 1.56e-06, + "loss": 21786.6828, + "step": 7800 + }, + { + "epoch": 0.015776694126060028, + "grad_norm": 44970.84375, + "learning_rate": 1.5620000000000002e-06, + "loss": 14799.3, + "step": 7810 + }, + { + "epoch": 0.015796894758743844, + "grad_norm": 32797.1875, + "learning_rate": 1.5640000000000002e-06, + "loss": 8278.1023, + "step": 7820 + }, + { + "epoch": 0.01581709539142766, + "grad_norm": 50935.42578125, + "learning_rate": 1.566e-06, + "loss": 2548.6527, + "step": 7830 + }, + { + "epoch": 0.015837296024111474, + "grad_norm": 15591.6103515625, + "learning_rate": 1.568e-06, + "loss": 35680.9437, + "step": 7840 + }, + { + "epoch": 0.01585749665679529, + "grad_norm": 473.6663513183594, + "learning_rate": 1.5700000000000002e-06, + "loss": 8432.7461, + "step": 7850 + }, + { + "epoch": 0.015877697289479107, + "grad_norm": 22984.583984375, + "learning_rate": 1.5720000000000002e-06, + "loss": 23707.6234, + "step": 7860 + }, + { + "epoch": 0.015897897922162924, + "grad_norm": 35284.1953125, + "learning_rate": 1.5740000000000003e-06, + "loss": 9363.207, + "step": 7870 + }, + { + "epoch": 0.015918098554846737, + "grad_norm": 282275.6875, + "learning_rate": 1.576e-06, + "loss": 15207.9125, + "step": 7880 + }, + { + "epoch": 0.015938299187530553, + "grad_norm": 108339.8046875, + "learning_rate": 1.5780000000000002e-06, + "loss": 27300.2031, + "step": 7890 + }, + { + "epoch": 0.01595849982021437, + "grad_norm": 56655.76953125, + "learning_rate": 1.5800000000000001e-06, + "loss": 13281.0984, + "step": 7900 + }, + { + "epoch": 0.015978700452898186, + "grad_norm": 4969.9111328125, + "learning_rate": 1.5820000000000003e-06, + "loss": 7841.5641, + "step": 7910 + }, + { + "epoch": 0.015998901085582, + "grad_norm": 28825.724609375, + "learning_rate": 1.5840000000000002e-06, + "loss": 18123.5156, + "step": 7920 + }, + { + "epoch": 0.016019101718265816, + "grad_norm": 33547.66015625, + "learning_rate": 1.586e-06, + "loss": 23015.8187, + "step": 7930 + }, + { + "epoch": 0.016039302350949632, + "grad_norm": 7082.48876953125, + "learning_rate": 1.588e-06, + "loss": 24177.2969, + "step": 7940 + }, + { + "epoch": 0.01605950298363345, + "grad_norm": 111331.203125, + "learning_rate": 1.5900000000000002e-06, + "loss": 18262.2578, + "step": 7950 + }, + { + "epoch": 0.016079703616317262, + "grad_norm": 6212.82861328125, + "learning_rate": 1.5920000000000002e-06, + "loss": 13108.6234, + "step": 7960 + }, + { + "epoch": 0.01609990424900108, + "grad_norm": 30816.033203125, + "learning_rate": 1.594e-06, + "loss": 12134.4844, + "step": 7970 + }, + { + "epoch": 0.016120104881684895, + "grad_norm": 1693.0535888671875, + "learning_rate": 1.596e-06, + "loss": 12963.6258, + "step": 7980 + }, + { + "epoch": 0.01614030551436871, + "grad_norm": 100402.640625, + "learning_rate": 1.5980000000000002e-06, + "loss": 12246.7523, + "step": 7990 + }, + { + "epoch": 0.016160506147052525, + "grad_norm": 10500.3984375, + "learning_rate": 1.6000000000000001e-06, + "loss": 11982.7242, + "step": 8000 + }, + { + "epoch": 0.01618070677973634, + "grad_norm": 36285.34765625, + "learning_rate": 1.6020000000000003e-06, + "loss": 17776.2188, + "step": 8010 + }, + { + "epoch": 0.016200907412420158, + "grad_norm": 8388.0859375, + "learning_rate": 1.604e-06, + "loss": 4827.291, + "step": 8020 + }, + { + "epoch": 0.016221108045103974, + "grad_norm": 4566.99609375, + "learning_rate": 1.606e-06, + "loss": 15205.2078, + "step": 8030 + }, + { + "epoch": 0.016241308677787787, + "grad_norm": 186901.3125, + "learning_rate": 1.608e-06, + "loss": 34904.3063, + "step": 8040 + }, + { + "epoch": 0.016261509310471604, + "grad_norm": 3603.91015625, + "learning_rate": 1.6100000000000003e-06, + "loss": 23089.0563, + "step": 8050 + }, + { + "epoch": 0.01628170994315542, + "grad_norm": 72576.1015625, + "learning_rate": 1.6120000000000002e-06, + "loss": 9161.1078, + "step": 8060 + }, + { + "epoch": 0.016301910575839237, + "grad_norm": 71085.71875, + "learning_rate": 1.614e-06, + "loss": 34732.4344, + "step": 8070 + }, + { + "epoch": 0.01632211120852305, + "grad_norm": 29081.185546875, + "learning_rate": 1.616e-06, + "loss": 24521.7406, + "step": 8080 + }, + { + "epoch": 0.016342311841206866, + "grad_norm": 6928.611328125, + "learning_rate": 1.6180000000000002e-06, + "loss": 5536.607, + "step": 8090 + }, + { + "epoch": 0.016362512473890683, + "grad_norm": 44989.94140625, + "learning_rate": 1.6200000000000002e-06, + "loss": 24191.5281, + "step": 8100 + }, + { + "epoch": 0.0163827131065745, + "grad_norm": 127265.796875, + "learning_rate": 1.6220000000000003e-06, + "loss": 20864.2125, + "step": 8110 + }, + { + "epoch": 0.016402913739258312, + "grad_norm": 78548.2890625, + "learning_rate": 1.624e-06, + "loss": 20310.1594, + "step": 8120 + }, + { + "epoch": 0.01642311437194213, + "grad_norm": 338059.84375, + "learning_rate": 1.626e-06, + "loss": 18908.4328, + "step": 8130 + }, + { + "epoch": 0.016443315004625945, + "grad_norm": 24857.833984375, + "learning_rate": 1.6280000000000001e-06, + "loss": 12864.4562, + "step": 8140 + }, + { + "epoch": 0.016463515637309762, + "grad_norm": 29954.40625, + "learning_rate": 1.6300000000000003e-06, + "loss": 20422.7375, + "step": 8150 + }, + { + "epoch": 0.016483716269993575, + "grad_norm": 17649.640625, + "learning_rate": 1.6320000000000002e-06, + "loss": 6661.4266, + "step": 8160 + }, + { + "epoch": 0.01650391690267739, + "grad_norm": 76565.6015625, + "learning_rate": 1.634e-06, + "loss": 26073.8125, + "step": 8170 + }, + { + "epoch": 0.016524117535361208, + "grad_norm": 14915.697265625, + "learning_rate": 1.636e-06, + "loss": 15576.4109, + "step": 8180 + }, + { + "epoch": 0.016544318168045025, + "grad_norm": 3946.53369140625, + "learning_rate": 1.6380000000000002e-06, + "loss": 11705.0242, + "step": 8190 + }, + { + "epoch": 0.016564518800728838, + "grad_norm": 5438.3046875, + "learning_rate": 1.6400000000000002e-06, + "loss": 19728.8109, + "step": 8200 + }, + { + "epoch": 0.016584719433412654, + "grad_norm": 14046.2568359375, + "learning_rate": 1.6420000000000003e-06, + "loss": 6943.7773, + "step": 8210 + }, + { + "epoch": 0.01660492006609647, + "grad_norm": 1289.6888427734375, + "learning_rate": 1.644e-06, + "loss": 6915.8406, + "step": 8220 + }, + { + "epoch": 0.016625120698780287, + "grad_norm": 21007.798828125, + "learning_rate": 1.646e-06, + "loss": 18950.725, + "step": 8230 + }, + { + "epoch": 0.0166453213314641, + "grad_norm": 7116.79052734375, + "learning_rate": 1.6480000000000001e-06, + "loss": 18018.1109, + "step": 8240 + }, + { + "epoch": 0.016665521964147917, + "grad_norm": 776.9560546875, + "learning_rate": 1.6500000000000003e-06, + "loss": 6975.0938, + "step": 8250 + }, + { + "epoch": 0.016685722596831733, + "grad_norm": 6495.59228515625, + "learning_rate": 1.6520000000000002e-06, + "loss": 13780.9953, + "step": 8260 + }, + { + "epoch": 0.01670592322951555, + "grad_norm": 7925.15087890625, + "learning_rate": 1.654e-06, + "loss": 9083.9797, + "step": 8270 + }, + { + "epoch": 0.016726123862199363, + "grad_norm": 35554.390625, + "learning_rate": 1.6560000000000001e-06, + "loss": 17314.4719, + "step": 8280 + }, + { + "epoch": 0.01674632449488318, + "grad_norm": 23247.4921875, + "learning_rate": 1.6580000000000003e-06, + "loss": 24720.8453, + "step": 8290 + }, + { + "epoch": 0.016766525127566996, + "grad_norm": 36798.72265625, + "learning_rate": 1.6600000000000002e-06, + "loss": 21330.6234, + "step": 8300 + }, + { + "epoch": 0.016786725760250813, + "grad_norm": 21632.177734375, + "learning_rate": 1.662e-06, + "loss": 23943.7844, + "step": 8310 + }, + { + "epoch": 0.016806926392934626, + "grad_norm": 347.73321533203125, + "learning_rate": 1.664e-06, + "loss": 12062.457, + "step": 8320 + }, + { + "epoch": 0.016827127025618442, + "grad_norm": 62908.69140625, + "learning_rate": 1.666e-06, + "loss": 17520.35, + "step": 8330 + }, + { + "epoch": 0.01684732765830226, + "grad_norm": 14732.880859375, + "learning_rate": 1.6680000000000002e-06, + "loss": 16994.6703, + "step": 8340 + }, + { + "epoch": 0.016867528290986075, + "grad_norm": 36801.72265625, + "learning_rate": 1.6700000000000003e-06, + "loss": 18838.0062, + "step": 8350 + }, + { + "epoch": 0.016887728923669888, + "grad_norm": 74101.484375, + "learning_rate": 1.672e-06, + "loss": 12375.0086, + "step": 8360 + }, + { + "epoch": 0.016907929556353705, + "grad_norm": 4289.52978515625, + "learning_rate": 1.674e-06, + "loss": 11961.2953, + "step": 8370 + }, + { + "epoch": 0.01692813018903752, + "grad_norm": 1771.85400390625, + "learning_rate": 1.6760000000000001e-06, + "loss": 6116.3938, + "step": 8380 + }, + { + "epoch": 0.016948330821721338, + "grad_norm": 7522.46337890625, + "learning_rate": 1.6780000000000003e-06, + "loss": 7218.4492, + "step": 8390 + }, + { + "epoch": 0.01696853145440515, + "grad_norm": 9449.8232421875, + "learning_rate": 1.6800000000000002e-06, + "loss": 8706.1969, + "step": 8400 + }, + { + "epoch": 0.016988732087088967, + "grad_norm": 10755.6025390625, + "learning_rate": 1.682e-06, + "loss": 12318.2523, + "step": 8410 + }, + { + "epoch": 0.017008932719772784, + "grad_norm": 5160.26220703125, + "learning_rate": 1.684e-06, + "loss": 11767.0609, + "step": 8420 + }, + { + "epoch": 0.0170291333524566, + "grad_norm": 42028.4921875, + "learning_rate": 1.686e-06, + "loss": 11667.0516, + "step": 8430 + }, + { + "epoch": 0.017049333985140414, + "grad_norm": 1600.973388671875, + "learning_rate": 1.6880000000000002e-06, + "loss": 6666.9602, + "step": 8440 + }, + { + "epoch": 0.01706953461782423, + "grad_norm": 27729.75, + "learning_rate": 1.6900000000000003e-06, + "loss": 13334.1703, + "step": 8450 + }, + { + "epoch": 0.017089735250508047, + "grad_norm": 36061.89453125, + "learning_rate": 1.692e-06, + "loss": 18552.2344, + "step": 8460 + }, + { + "epoch": 0.017109935883191863, + "grad_norm": 27357.47265625, + "learning_rate": 1.694e-06, + "loss": 35550.525, + "step": 8470 + }, + { + "epoch": 0.017130136515875676, + "grad_norm": 1577.767578125, + "learning_rate": 1.6960000000000002e-06, + "loss": 15026.7, + "step": 8480 + }, + { + "epoch": 0.017150337148559493, + "grad_norm": 14798.3818359375, + "learning_rate": 1.6980000000000003e-06, + "loss": 18012.7687, + "step": 8490 + }, + { + "epoch": 0.01717053778124331, + "grad_norm": 78593.7578125, + "learning_rate": 1.7000000000000002e-06, + "loss": 12636.1719, + "step": 8500 + }, + { + "epoch": 0.017190738413927126, + "grad_norm": 56956.671875, + "learning_rate": 1.702e-06, + "loss": 19664.6594, + "step": 8510 + }, + { + "epoch": 0.01721093904661094, + "grad_norm": 36313.49609375, + "learning_rate": 1.7040000000000001e-06, + "loss": 13359.4937, + "step": 8520 + }, + { + "epoch": 0.017231139679294755, + "grad_norm": 2114.220458984375, + "learning_rate": 1.706e-06, + "loss": 6685.3594, + "step": 8530 + }, + { + "epoch": 0.017251340311978572, + "grad_norm": 114415.953125, + "learning_rate": 1.7080000000000002e-06, + "loss": 23796.0828, + "step": 8540 + }, + { + "epoch": 0.01727154094466239, + "grad_norm": 441749.84375, + "learning_rate": 1.7100000000000004e-06, + "loss": 24015.2031, + "step": 8550 + }, + { + "epoch": 0.0172917415773462, + "grad_norm": 105464.5703125, + "learning_rate": 1.712e-06, + "loss": 14302.3312, + "step": 8560 + }, + { + "epoch": 0.017311942210030018, + "grad_norm": 162179.40625, + "learning_rate": 1.714e-06, + "loss": 17441.4625, + "step": 8570 + }, + { + "epoch": 0.017332142842713835, + "grad_norm": 7524.65380859375, + "learning_rate": 1.7160000000000002e-06, + "loss": 5110.5762, + "step": 8580 + }, + { + "epoch": 0.01735234347539765, + "grad_norm": 36814.98046875, + "learning_rate": 1.7180000000000003e-06, + "loss": 5640.9402, + "step": 8590 + }, + { + "epoch": 0.017372544108081464, + "grad_norm": 57213.35546875, + "learning_rate": 1.72e-06, + "loss": 23453.6141, + "step": 8600 + }, + { + "epoch": 0.01739274474076528, + "grad_norm": 21268.857421875, + "learning_rate": 1.722e-06, + "loss": 26455.4969, + "step": 8610 + }, + { + "epoch": 0.017412945373449097, + "grad_norm": 122390.1640625, + "learning_rate": 1.7240000000000001e-06, + "loss": 22409.7547, + "step": 8620 + }, + { + "epoch": 0.017433146006132914, + "grad_norm": 25622.80859375, + "learning_rate": 1.726e-06, + "loss": 15545.6875, + "step": 8630 + }, + { + "epoch": 0.017453346638816727, + "grad_norm": 99507.5625, + "learning_rate": 1.7280000000000002e-06, + "loss": 32603.4219, + "step": 8640 + }, + { + "epoch": 0.017473547271500543, + "grad_norm": 86075.5390625, + "learning_rate": 1.73e-06, + "loss": 8089.3758, + "step": 8650 + }, + { + "epoch": 0.01749374790418436, + "grad_norm": 6281.654296875, + "learning_rate": 1.732e-06, + "loss": 9604.0, + "step": 8660 + }, + { + "epoch": 0.017513948536868176, + "grad_norm": 37068.42578125, + "learning_rate": 1.734e-06, + "loss": 17980.2922, + "step": 8670 + }, + { + "epoch": 0.01753414916955199, + "grad_norm": 31098.34765625, + "learning_rate": 1.7360000000000002e-06, + "loss": 21015.4562, + "step": 8680 + }, + { + "epoch": 0.017554349802235806, + "grad_norm": 4011.788330078125, + "learning_rate": 1.7380000000000003e-06, + "loss": 4810.9082, + "step": 8690 + }, + { + "epoch": 0.017574550434919622, + "grad_norm": 133353.296875, + "learning_rate": 1.74e-06, + "loss": 19126.8563, + "step": 8700 + }, + { + "epoch": 0.01759475106760344, + "grad_norm": 118925.328125, + "learning_rate": 1.742e-06, + "loss": 14214.9531, + "step": 8710 + }, + { + "epoch": 0.017614951700287252, + "grad_norm": 94544.9921875, + "learning_rate": 1.7440000000000002e-06, + "loss": 4311.3758, + "step": 8720 + }, + { + "epoch": 0.01763515233297107, + "grad_norm": 6562.328125, + "learning_rate": 1.746e-06, + "loss": 7817.0055, + "step": 8730 + }, + { + "epoch": 0.017655352965654885, + "grad_norm": 40556.171875, + "learning_rate": 1.7480000000000002e-06, + "loss": 13012.207, + "step": 8740 + }, + { + "epoch": 0.0176755535983387, + "grad_norm": 3296.880615234375, + "learning_rate": 1.75e-06, + "loss": 16394.0406, + "step": 8750 + }, + { + "epoch": 0.017695754231022515, + "grad_norm": 8397.732421875, + "learning_rate": 1.7520000000000001e-06, + "loss": 8451.1672, + "step": 8760 + }, + { + "epoch": 0.01771595486370633, + "grad_norm": 37572.5078125, + "learning_rate": 1.754e-06, + "loss": 10004.5883, + "step": 8770 + }, + { + "epoch": 0.017736155496390148, + "grad_norm": 101023.6328125, + "learning_rate": 1.7560000000000002e-06, + "loss": 7904.1234, + "step": 8780 + }, + { + "epoch": 0.017756356129073964, + "grad_norm": 6500.74560546875, + "learning_rate": 1.7580000000000004e-06, + "loss": 20434.9297, + "step": 8790 + }, + { + "epoch": 0.017776556761757777, + "grad_norm": 1547.491455078125, + "learning_rate": 1.76e-06, + "loss": 13932.7844, + "step": 8800 + }, + { + "epoch": 0.017796757394441594, + "grad_norm": 4261.4150390625, + "learning_rate": 1.762e-06, + "loss": 5743.2289, + "step": 8810 + }, + { + "epoch": 0.01781695802712541, + "grad_norm": 11384.724609375, + "learning_rate": 1.7640000000000002e-06, + "loss": 8340.9062, + "step": 8820 + }, + { + "epoch": 0.017837158659809227, + "grad_norm": 30608.41796875, + "learning_rate": 1.7660000000000001e-06, + "loss": 10316.5727, + "step": 8830 + }, + { + "epoch": 0.01785735929249304, + "grad_norm": 216002.953125, + "learning_rate": 1.7680000000000003e-06, + "loss": 14282.3766, + "step": 8840 + }, + { + "epoch": 0.017877559925176856, + "grad_norm": 31853.7265625, + "learning_rate": 1.77e-06, + "loss": 14546.0844, + "step": 8850 + }, + { + "epoch": 0.017897760557860673, + "grad_norm": 15802.69921875, + "learning_rate": 1.7720000000000001e-06, + "loss": 4982.4094, + "step": 8860 + }, + { + "epoch": 0.01791796119054449, + "grad_norm": 20033.650390625, + "learning_rate": 1.774e-06, + "loss": 9656.9016, + "step": 8870 + }, + { + "epoch": 0.017938161823228303, + "grad_norm": 61221.71484375, + "learning_rate": 1.7760000000000002e-06, + "loss": 7679.7359, + "step": 8880 + }, + { + "epoch": 0.01795836245591212, + "grad_norm": 67195.5859375, + "learning_rate": 1.7780000000000004e-06, + "loss": 10861.5773, + "step": 8890 + }, + { + "epoch": 0.017978563088595936, + "grad_norm": 12194.9921875, + "learning_rate": 1.7800000000000001e-06, + "loss": 9247.4477, + "step": 8900 + }, + { + "epoch": 0.017998763721279752, + "grad_norm": 39572.05859375, + "learning_rate": 1.782e-06, + "loss": 6484.5961, + "step": 8910 + }, + { + "epoch": 0.018018964353963565, + "grad_norm": 3655.52392578125, + "learning_rate": 1.7840000000000002e-06, + "loss": 14718.9734, + "step": 8920 + }, + { + "epoch": 0.018039164986647382, + "grad_norm": 35591.1328125, + "learning_rate": 1.7860000000000001e-06, + "loss": 12907.8344, + "step": 8930 + }, + { + "epoch": 0.0180593656193312, + "grad_norm": 56747.734375, + "learning_rate": 1.788e-06, + "loss": 8704.8797, + "step": 8940 + }, + { + "epoch": 0.018079566252015015, + "grad_norm": 1011.18115234375, + "learning_rate": 1.79e-06, + "loss": 11302.2273, + "step": 8950 + }, + { + "epoch": 0.018099766884698828, + "grad_norm": 37335.23046875, + "learning_rate": 1.7920000000000002e-06, + "loss": 20566.6672, + "step": 8960 + }, + { + "epoch": 0.018119967517382644, + "grad_norm": 188172.4375, + "learning_rate": 1.794e-06, + "loss": 13321.0547, + "step": 8970 + }, + { + "epoch": 0.01814016815006646, + "grad_norm": 64716.75390625, + "learning_rate": 1.7960000000000003e-06, + "loss": 7287.1781, + "step": 8980 + }, + { + "epoch": 0.018160368782750277, + "grad_norm": 280774.21875, + "learning_rate": 1.798e-06, + "loss": 13311.4875, + "step": 8990 + }, + { + "epoch": 0.01818056941543409, + "grad_norm": 17050.95703125, + "learning_rate": 1.8000000000000001e-06, + "loss": 17053.7219, + "step": 9000 + }, + { + "epoch": 0.018200770048117907, + "grad_norm": 12757.2998046875, + "learning_rate": 1.802e-06, + "loss": 13734.1625, + "step": 9010 + }, + { + "epoch": 0.018220970680801724, + "grad_norm": 173252.421875, + "learning_rate": 1.8040000000000002e-06, + "loss": 11885.6609, + "step": 9020 + }, + { + "epoch": 0.01824117131348554, + "grad_norm": 105546.6640625, + "learning_rate": 1.8060000000000002e-06, + "loss": 18098.3719, + "step": 9030 + }, + { + "epoch": 0.018261371946169353, + "grad_norm": 45804.09375, + "learning_rate": 1.808e-06, + "loss": 12734.05, + "step": 9040 + }, + { + "epoch": 0.01828157257885317, + "grad_norm": 14127.3291015625, + "learning_rate": 1.81e-06, + "loss": 6741.1727, + "step": 9050 + }, + { + "epoch": 0.018301773211536986, + "grad_norm": 55265.3671875, + "learning_rate": 1.8120000000000002e-06, + "loss": 11828.4719, + "step": 9060 + }, + { + "epoch": 0.018321973844220803, + "grad_norm": 33607.90234375, + "learning_rate": 1.8140000000000001e-06, + "loss": 8168.7453, + "step": 9070 + }, + { + "epoch": 0.018342174476904616, + "grad_norm": 1952.7432861328125, + "learning_rate": 1.8160000000000003e-06, + "loss": 4124.8336, + "step": 9080 + }, + { + "epoch": 0.018362375109588432, + "grad_norm": 34107.12109375, + "learning_rate": 1.818e-06, + "loss": 10475.8687, + "step": 9090 + }, + { + "epoch": 0.01838257574227225, + "grad_norm": 1828.3487548828125, + "learning_rate": 1.8200000000000002e-06, + "loss": 25089.0109, + "step": 9100 + }, + { + "epoch": 0.018402776374956065, + "grad_norm": 2380.738525390625, + "learning_rate": 1.822e-06, + "loss": 18292.3812, + "step": 9110 + }, + { + "epoch": 0.01842297700763988, + "grad_norm": 46805.91796875, + "learning_rate": 1.8240000000000002e-06, + "loss": 29699.2812, + "step": 9120 + }, + { + "epoch": 0.018443177640323695, + "grad_norm": 0.0, + "learning_rate": 1.8260000000000002e-06, + "loss": 14292.5172, + "step": 9130 + }, + { + "epoch": 0.01846337827300751, + "grad_norm": 105565.7109375, + "learning_rate": 1.8280000000000001e-06, + "loss": 14340.7578, + "step": 9140 + }, + { + "epoch": 0.018483578905691325, + "grad_norm": 114032.5859375, + "learning_rate": 1.83e-06, + "loss": 21718.5625, + "step": 9150 + }, + { + "epoch": 0.01850377953837514, + "grad_norm": 3267.939453125, + "learning_rate": 1.8320000000000002e-06, + "loss": 4443.316, + "step": 9160 + }, + { + "epoch": 0.018523980171058958, + "grad_norm": 27140.021484375, + "learning_rate": 1.8340000000000001e-06, + "loss": 15865.5844, + "step": 9170 + }, + { + "epoch": 0.018544180803742774, + "grad_norm": 3081.318115234375, + "learning_rate": 1.8360000000000003e-06, + "loss": 10098.3008, + "step": 9180 + }, + { + "epoch": 0.018564381436426587, + "grad_norm": 130473.59375, + "learning_rate": 1.838e-06, + "loss": 15714.975, + "step": 9190 + }, + { + "epoch": 0.018584582069110404, + "grad_norm": 35855.9375, + "learning_rate": 1.8400000000000002e-06, + "loss": 27474.9406, + "step": 9200 + }, + { + "epoch": 0.01860478270179422, + "grad_norm": 44820.796875, + "learning_rate": 1.8420000000000001e-06, + "loss": 21417.2891, + "step": 9210 + }, + { + "epoch": 0.018624983334478037, + "grad_norm": 8466.6357421875, + "learning_rate": 1.8440000000000003e-06, + "loss": 12796.1922, + "step": 9220 + }, + { + "epoch": 0.01864518396716185, + "grad_norm": 10022.8056640625, + "learning_rate": 1.846e-06, + "loss": 11545.5648, + "step": 9230 + }, + { + "epoch": 0.018665384599845666, + "grad_norm": 797.6505737304688, + "learning_rate": 1.8480000000000001e-06, + "loss": 9365.9562, + "step": 9240 + }, + { + "epoch": 0.018685585232529483, + "grad_norm": 57274.890625, + "learning_rate": 1.85e-06, + "loss": 9034.0477, + "step": 9250 + }, + { + "epoch": 0.0187057858652133, + "grad_norm": 17805.453125, + "learning_rate": 1.8520000000000002e-06, + "loss": 6518.9414, + "step": 9260 + }, + { + "epoch": 0.018725986497897112, + "grad_norm": 20171.54296875, + "learning_rate": 1.8540000000000002e-06, + "loss": 9015.8484, + "step": 9270 + }, + { + "epoch": 0.01874618713058093, + "grad_norm": 112563.5703125, + "learning_rate": 1.856e-06, + "loss": 9224.3797, + "step": 9280 + }, + { + "epoch": 0.018766387763264746, + "grad_norm": 66742.390625, + "learning_rate": 1.858e-06, + "loss": 17995.8, + "step": 9290 + }, + { + "epoch": 0.018786588395948562, + "grad_norm": 77701.4375, + "learning_rate": 1.8600000000000002e-06, + "loss": 19777.9469, + "step": 9300 + }, + { + "epoch": 0.018806789028632375, + "grad_norm": 56533.734375, + "learning_rate": 1.8620000000000001e-06, + "loss": 19583.7453, + "step": 9310 + }, + { + "epoch": 0.01882698966131619, + "grad_norm": 48316.12890625, + "learning_rate": 1.8640000000000003e-06, + "loss": 15982.2594, + "step": 9320 + }, + { + "epoch": 0.018847190294000008, + "grad_norm": 30509.46484375, + "learning_rate": 1.866e-06, + "loss": 11389.632, + "step": 9330 + }, + { + "epoch": 0.018867390926683825, + "grad_norm": 21668.52734375, + "learning_rate": 1.8680000000000002e-06, + "loss": 22580.5922, + "step": 9340 + }, + { + "epoch": 0.018887591559367638, + "grad_norm": 4078.613037109375, + "learning_rate": 1.87e-06, + "loss": 4624.1828, + "step": 9350 + }, + { + "epoch": 0.018907792192051454, + "grad_norm": 22914.55078125, + "learning_rate": 1.8720000000000002e-06, + "loss": 10873.8937, + "step": 9360 + }, + { + "epoch": 0.01892799282473527, + "grad_norm": 14259.6083984375, + "learning_rate": 1.8740000000000002e-06, + "loss": 14685.4172, + "step": 9370 + }, + { + "epoch": 0.018948193457419087, + "grad_norm": 222177.890625, + "learning_rate": 1.8760000000000001e-06, + "loss": 10622.3219, + "step": 9380 + }, + { + "epoch": 0.0189683940901029, + "grad_norm": 5978.0927734375, + "learning_rate": 1.878e-06, + "loss": 9771.2414, + "step": 9390 + }, + { + "epoch": 0.018988594722786717, + "grad_norm": 80206.1171875, + "learning_rate": 1.8800000000000002e-06, + "loss": 16134.0031, + "step": 9400 + }, + { + "epoch": 0.019008795355470533, + "grad_norm": 39605.77734375, + "learning_rate": 1.8820000000000001e-06, + "loss": 20598.0031, + "step": 9410 + }, + { + "epoch": 0.01902899598815435, + "grad_norm": 49570.20703125, + "learning_rate": 1.8840000000000003e-06, + "loss": 18757.3281, + "step": 9420 + }, + { + "epoch": 0.019049196620838163, + "grad_norm": 19803.54296875, + "learning_rate": 1.886e-06, + "loss": 19535.1625, + "step": 9430 + }, + { + "epoch": 0.01906939725352198, + "grad_norm": 0.0, + "learning_rate": 1.8880000000000002e-06, + "loss": 7279.1984, + "step": 9440 + }, + { + "epoch": 0.019089597886205796, + "grad_norm": 5013.3701171875, + "learning_rate": 1.8900000000000001e-06, + "loss": 8356.9617, + "step": 9450 + }, + { + "epoch": 0.019109798518889613, + "grad_norm": 21797.5, + "learning_rate": 1.8920000000000003e-06, + "loss": 16583.575, + "step": 9460 + }, + { + "epoch": 0.019129999151573426, + "grad_norm": 57297.15625, + "learning_rate": 1.8940000000000002e-06, + "loss": 16544.8266, + "step": 9470 + }, + { + "epoch": 0.019150199784257242, + "grad_norm": 103634.640625, + "learning_rate": 1.8960000000000001e-06, + "loss": 12514.6766, + "step": 9480 + }, + { + "epoch": 0.01917040041694106, + "grad_norm": 454405.21875, + "learning_rate": 1.898e-06, + "loss": 25875.8438, + "step": 9490 + }, + { + "epoch": 0.019190601049624875, + "grad_norm": 56303.640625, + "learning_rate": 1.9000000000000002e-06, + "loss": 13404.3953, + "step": 9500 + }, + { + "epoch": 0.01921080168230869, + "grad_norm": 9778.6923828125, + "learning_rate": 1.9020000000000002e-06, + "loss": 9763.8438, + "step": 9510 + }, + { + "epoch": 0.019231002314992505, + "grad_norm": 17689.013671875, + "learning_rate": 1.9040000000000003e-06, + "loss": 28407.8312, + "step": 9520 + }, + { + "epoch": 0.01925120294767632, + "grad_norm": 22796.755859375, + "learning_rate": 1.906e-06, + "loss": 27397.05, + "step": 9530 + }, + { + "epoch": 0.019271403580360138, + "grad_norm": 25413.19140625, + "learning_rate": 1.908e-06, + "loss": 9115.75, + "step": 9540 + }, + { + "epoch": 0.01929160421304395, + "grad_norm": 57803.55859375, + "learning_rate": 1.9100000000000003e-06, + "loss": 12081.7508, + "step": 9550 + }, + { + "epoch": 0.019311804845727767, + "grad_norm": 32141.400390625, + "learning_rate": 1.912e-06, + "loss": 13197.55, + "step": 9560 + }, + { + "epoch": 0.019332005478411584, + "grad_norm": 2522.803466796875, + "learning_rate": 1.9140000000000002e-06, + "loss": 6442.3309, + "step": 9570 + }, + { + "epoch": 0.0193522061110954, + "grad_norm": 5419.42724609375, + "learning_rate": 1.916e-06, + "loss": 6441.0391, + "step": 9580 + }, + { + "epoch": 0.019372406743779214, + "grad_norm": 0.0, + "learning_rate": 1.918e-06, + "loss": 15337.0172, + "step": 9590 + }, + { + "epoch": 0.01939260737646303, + "grad_norm": 20183.28125, + "learning_rate": 1.9200000000000003e-06, + "loss": 6845.782, + "step": 9600 + }, + { + "epoch": 0.019412808009146847, + "grad_norm": 77743.9609375, + "learning_rate": 1.9220000000000004e-06, + "loss": 9753.3289, + "step": 9610 + }, + { + "epoch": 0.019433008641830663, + "grad_norm": 19934.73828125, + "learning_rate": 1.924e-06, + "loss": 6234.175, + "step": 9620 + }, + { + "epoch": 0.019453209274514476, + "grad_norm": 8846.2919921875, + "learning_rate": 1.9260000000000003e-06, + "loss": 20301.7656, + "step": 9630 + }, + { + "epoch": 0.019473409907198293, + "grad_norm": 126589.640625, + "learning_rate": 1.928e-06, + "loss": 10572.1273, + "step": 9640 + }, + { + "epoch": 0.01949361053988211, + "grad_norm": 872.2177734375, + "learning_rate": 1.93e-06, + "loss": 9965.3641, + "step": 9650 + }, + { + "epoch": 0.019513811172565926, + "grad_norm": 218300.234375, + "learning_rate": 1.9320000000000003e-06, + "loss": 17513.9219, + "step": 9660 + }, + { + "epoch": 0.01953401180524974, + "grad_norm": 25419.0703125, + "learning_rate": 1.934e-06, + "loss": 8296.2875, + "step": 9670 + }, + { + "epoch": 0.019554212437933555, + "grad_norm": 52334.0, + "learning_rate": 1.936e-06, + "loss": 6550.7469, + "step": 9680 + }, + { + "epoch": 0.019574413070617372, + "grad_norm": 3520.184814453125, + "learning_rate": 1.9380000000000003e-06, + "loss": 19590.5312, + "step": 9690 + }, + { + "epoch": 0.01959461370330119, + "grad_norm": 18193.005859375, + "learning_rate": 1.94e-06, + "loss": 24007.5875, + "step": 9700 + }, + { + "epoch": 0.019614814335985, + "grad_norm": 8535.96875, + "learning_rate": 1.942e-06, + "loss": 6228.6723, + "step": 9710 + }, + { + "epoch": 0.019635014968668818, + "grad_norm": 856.751953125, + "learning_rate": 1.944e-06, + "loss": 7100.4133, + "step": 9720 + }, + { + "epoch": 0.019655215601352635, + "grad_norm": 18193.998046875, + "learning_rate": 1.946e-06, + "loss": 10770.1664, + "step": 9730 + }, + { + "epoch": 0.01967541623403645, + "grad_norm": 206367.296875, + "learning_rate": 1.9480000000000002e-06, + "loss": 14603.7344, + "step": 9740 + }, + { + "epoch": 0.019695616866720264, + "grad_norm": 33896.82421875, + "learning_rate": 1.9500000000000004e-06, + "loss": 6476.4117, + "step": 9750 + }, + { + "epoch": 0.01971581749940408, + "grad_norm": 6885.41748046875, + "learning_rate": 1.952e-06, + "loss": 8252.8539, + "step": 9760 + }, + { + "epoch": 0.019736018132087897, + "grad_norm": 2897.56787109375, + "learning_rate": 1.9540000000000003e-06, + "loss": 9926.2891, + "step": 9770 + }, + { + "epoch": 0.019756218764771714, + "grad_norm": 37388.37890625, + "learning_rate": 1.956e-06, + "loss": 6989.468, + "step": 9780 + }, + { + "epoch": 0.019776419397455527, + "grad_norm": 5452.5732421875, + "learning_rate": 1.958e-06, + "loss": 32031.775, + "step": 9790 + }, + { + "epoch": 0.019796620030139343, + "grad_norm": 8724.3232421875, + "learning_rate": 1.9600000000000003e-06, + "loss": 10727.6359, + "step": 9800 + }, + { + "epoch": 0.01981682066282316, + "grad_norm": 59824.16015625, + "learning_rate": 1.9620000000000004e-06, + "loss": 18896.9016, + "step": 9810 + }, + { + "epoch": 0.019837021295506976, + "grad_norm": 71291.4609375, + "learning_rate": 1.964e-06, + "loss": 17376.2156, + "step": 9820 + }, + { + "epoch": 0.01985722192819079, + "grad_norm": 49356.70703125, + "learning_rate": 1.9660000000000003e-06, + "loss": 3006.2064, + "step": 9830 + }, + { + "epoch": 0.019877422560874606, + "grad_norm": 2757.724609375, + "learning_rate": 1.968e-06, + "loss": 13280.4953, + "step": 9840 + }, + { + "epoch": 0.019897623193558422, + "grad_norm": 28293.869140625, + "learning_rate": 1.97e-06, + "loss": 10987.0852, + "step": 9850 + }, + { + "epoch": 0.01991782382624224, + "grad_norm": 76282.7734375, + "learning_rate": 1.972e-06, + "loss": 16348.9891, + "step": 9860 + }, + { + "epoch": 0.019938024458926052, + "grad_norm": 32159.00390625, + "learning_rate": 1.974e-06, + "loss": 14772.2016, + "step": 9870 + }, + { + "epoch": 0.01995822509160987, + "grad_norm": 30913.25390625, + "learning_rate": 1.9760000000000002e-06, + "loss": 19321.3047, + "step": 9880 + }, + { + "epoch": 0.019978425724293685, + "grad_norm": 19321.078125, + "learning_rate": 1.9780000000000004e-06, + "loss": 13175.8625, + "step": 9890 + }, + { + "epoch": 0.0199986263569775, + "grad_norm": 2680.1787109375, + "learning_rate": 1.98e-06, + "loss": 12522.8648, + "step": 9900 + }, + { + "epoch": 0.020018826989661315, + "grad_norm": 172524.3125, + "learning_rate": 1.982e-06, + "loss": 7870.9234, + "step": 9910 + }, + { + "epoch": 0.02003902762234513, + "grad_norm": 20942.68359375, + "learning_rate": 1.984e-06, + "loss": 5196.0437, + "step": 9920 + }, + { + "epoch": 0.020059228255028948, + "grad_norm": 2929.58349609375, + "learning_rate": 1.986e-06, + "loss": 20708.9234, + "step": 9930 + }, + { + "epoch": 0.020079428887712764, + "grad_norm": 12528.7236328125, + "learning_rate": 1.9880000000000003e-06, + "loss": 16700.6781, + "step": 9940 + }, + { + "epoch": 0.020099629520396577, + "grad_norm": 11406.9697265625, + "learning_rate": 1.9900000000000004e-06, + "loss": 17126.85, + "step": 9950 + }, + { + "epoch": 0.020119830153080394, + "grad_norm": 2851.818359375, + "learning_rate": 1.992e-06, + "loss": 9085.3789, + "step": 9960 + }, + { + "epoch": 0.02014003078576421, + "grad_norm": 297214.3125, + "learning_rate": 1.9940000000000003e-06, + "loss": 21690.7938, + "step": 9970 + }, + { + "epoch": 0.020160231418448027, + "grad_norm": 33661.8984375, + "learning_rate": 1.996e-06, + "loss": 16905.5859, + "step": 9980 + }, + { + "epoch": 0.02018043205113184, + "grad_norm": 384996.5, + "learning_rate": 1.998e-06, + "loss": 21905.2984, + "step": 9990 + }, + { + "epoch": 0.020200632683815656, + "grad_norm": 89269.3046875, + "learning_rate": 2.0000000000000003e-06, + "loss": 11651.1844, + "step": 10000 + }, + { + "epoch": 0.020220833316499473, + "grad_norm": 72314.6015625, + "learning_rate": 2.002e-06, + "loss": 6714.7688, + "step": 10010 + }, + { + "epoch": 0.02024103394918329, + "grad_norm": 35409.90625, + "learning_rate": 2.004e-06, + "loss": 27951.3625, + "step": 10020 + }, + { + "epoch": 0.020261234581867103, + "grad_norm": 200492.828125, + "learning_rate": 2.0060000000000004e-06, + "loss": 9370.1328, + "step": 10030 + }, + { + "epoch": 0.02028143521455092, + "grad_norm": 26683.103515625, + "learning_rate": 2.008e-06, + "loss": 4088.65, + "step": 10040 + }, + { + "epoch": 0.020301635847234736, + "grad_norm": 27431.7734375, + "learning_rate": 2.0100000000000002e-06, + "loss": 15451.1547, + "step": 10050 + }, + { + "epoch": 0.020321836479918552, + "grad_norm": 107234.8984375, + "learning_rate": 2.012e-06, + "loss": 14083.8031, + "step": 10060 + }, + { + "epoch": 0.020342037112602365, + "grad_norm": 31921.80078125, + "learning_rate": 2.014e-06, + "loss": 13749.4891, + "step": 10070 + }, + { + "epoch": 0.020362237745286182, + "grad_norm": 17831.673828125, + "learning_rate": 2.0160000000000003e-06, + "loss": 17133.3453, + "step": 10080 + }, + { + "epoch": 0.02038243837797, + "grad_norm": 4208.63525390625, + "learning_rate": 2.0180000000000004e-06, + "loss": 4690.5855, + "step": 10090 + }, + { + "epoch": 0.020402639010653815, + "grad_norm": 15338.41796875, + "learning_rate": 2.02e-06, + "loss": 16839.85, + "step": 10100 + }, + { + "epoch": 0.020422839643337628, + "grad_norm": 58610.98046875, + "learning_rate": 2.022e-06, + "loss": 23410.95, + "step": 10110 + }, + { + "epoch": 0.020443040276021444, + "grad_norm": 0.0, + "learning_rate": 2.024e-06, + "loss": 16879.5016, + "step": 10120 + }, + { + "epoch": 0.02046324090870526, + "grad_norm": 4707.7646484375, + "learning_rate": 2.026e-06, + "loss": 6417.4711, + "step": 10130 + }, + { + "epoch": 0.020483441541389077, + "grad_norm": 163619.96875, + "learning_rate": 2.0280000000000003e-06, + "loss": 15146.0094, + "step": 10140 + }, + { + "epoch": 0.02050364217407289, + "grad_norm": 1847.330322265625, + "learning_rate": 2.0300000000000005e-06, + "loss": 14126.0594, + "step": 10150 + }, + { + "epoch": 0.020523842806756707, + "grad_norm": 7015.3046875, + "learning_rate": 2.032e-06, + "loss": 22132.2781, + "step": 10160 + }, + { + "epoch": 0.020544043439440524, + "grad_norm": 12160.0576171875, + "learning_rate": 2.0340000000000003e-06, + "loss": 25985.3688, + "step": 10170 + }, + { + "epoch": 0.02056424407212434, + "grad_norm": 15340.6474609375, + "learning_rate": 2.036e-06, + "loss": 16274.6953, + "step": 10180 + }, + { + "epoch": 0.020584444704808153, + "grad_norm": 80321.4375, + "learning_rate": 2.0380000000000002e-06, + "loss": 20867.8547, + "step": 10190 + }, + { + "epoch": 0.02060464533749197, + "grad_norm": 14842.3427734375, + "learning_rate": 2.04e-06, + "loss": 3842.3152, + "step": 10200 + }, + { + "epoch": 0.020624845970175786, + "grad_norm": 2946.72998046875, + "learning_rate": 2.042e-06, + "loss": 18120.2687, + "step": 10210 + }, + { + "epoch": 0.020645046602859603, + "grad_norm": 21453.494140625, + "learning_rate": 2.0440000000000003e-06, + "loss": 19827.2687, + "step": 10220 + }, + { + "epoch": 0.020665247235543416, + "grad_norm": 3819.943603515625, + "learning_rate": 2.0460000000000004e-06, + "loss": 5483.207, + "step": 10230 + }, + { + "epoch": 0.020685447868227232, + "grad_norm": 6143.220703125, + "learning_rate": 2.048e-06, + "loss": 23672.9984, + "step": 10240 + }, + { + "epoch": 0.02070564850091105, + "grad_norm": 36922.09765625, + "learning_rate": 2.05e-06, + "loss": 9350.5516, + "step": 10250 + }, + { + "epoch": 0.020725849133594865, + "grad_norm": 71350.6796875, + "learning_rate": 2.052e-06, + "loss": 15646.4344, + "step": 10260 + }, + { + "epoch": 0.02074604976627868, + "grad_norm": 5519.83984375, + "learning_rate": 2.054e-06, + "loss": 12832.582, + "step": 10270 + }, + { + "epoch": 0.020766250398962495, + "grad_norm": 38670.7109375, + "learning_rate": 2.0560000000000003e-06, + "loss": 5966.1605, + "step": 10280 + }, + { + "epoch": 0.02078645103164631, + "grad_norm": 31311.2578125, + "learning_rate": 2.0580000000000005e-06, + "loss": 6283.2078, + "step": 10290 + }, + { + "epoch": 0.020806651664330128, + "grad_norm": 3399.40771484375, + "learning_rate": 2.06e-06, + "loss": 7996.5547, + "step": 10300 + }, + { + "epoch": 0.02082685229701394, + "grad_norm": 9792.9853515625, + "learning_rate": 2.062e-06, + "loss": 17343.7281, + "step": 10310 + }, + { + "epoch": 0.020847052929697758, + "grad_norm": 2276.78955078125, + "learning_rate": 2.064e-06, + "loss": 7303.8508, + "step": 10320 + }, + { + "epoch": 0.020867253562381574, + "grad_norm": 5302.24609375, + "learning_rate": 2.066e-06, + "loss": 8293.6477, + "step": 10330 + }, + { + "epoch": 0.02088745419506539, + "grad_norm": 14222.4140625, + "learning_rate": 2.0680000000000004e-06, + "loss": 12199.5375, + "step": 10340 + }, + { + "epoch": 0.020907654827749204, + "grad_norm": 62130.671875, + "learning_rate": 2.07e-06, + "loss": 15187.0906, + "step": 10350 + }, + { + "epoch": 0.02092785546043302, + "grad_norm": 49655.61328125, + "learning_rate": 2.0720000000000002e-06, + "loss": 8131.8891, + "step": 10360 + }, + { + "epoch": 0.020948056093116837, + "grad_norm": 8858.3359375, + "learning_rate": 2.0740000000000004e-06, + "loss": 29139.1406, + "step": 10370 + }, + { + "epoch": 0.020968256725800653, + "grad_norm": 48659.31640625, + "learning_rate": 2.076e-06, + "loss": 15649.9031, + "step": 10380 + }, + { + "epoch": 0.020988457358484466, + "grad_norm": 56645.67578125, + "learning_rate": 2.0780000000000003e-06, + "loss": 22483.0438, + "step": 10390 + }, + { + "epoch": 0.021008657991168283, + "grad_norm": 7049.67578125, + "learning_rate": 2.08e-06, + "loss": 7691.8836, + "step": 10400 + }, + { + "epoch": 0.0210288586238521, + "grad_norm": 355356.875, + "learning_rate": 2.082e-06, + "loss": 19265.4125, + "step": 10410 + }, + { + "epoch": 0.021049059256535916, + "grad_norm": 58395.60546875, + "learning_rate": 2.0840000000000003e-06, + "loss": 13006.6305, + "step": 10420 + }, + { + "epoch": 0.02106925988921973, + "grad_norm": 0.0, + "learning_rate": 2.0860000000000004e-06, + "loss": 2924.9291, + "step": 10430 + }, + { + "epoch": 0.021089460521903546, + "grad_norm": 3852.6484375, + "learning_rate": 2.088e-06, + "loss": 21360.4562, + "step": 10440 + }, + { + "epoch": 0.021109661154587362, + "grad_norm": 10107.515625, + "learning_rate": 2.09e-06, + "loss": 10028.3469, + "step": 10450 + }, + { + "epoch": 0.02112986178727118, + "grad_norm": 6145.81103515625, + "learning_rate": 2.092e-06, + "loss": 19532.0531, + "step": 10460 + }, + { + "epoch": 0.02115006241995499, + "grad_norm": 245584.40625, + "learning_rate": 2.094e-06, + "loss": 31526.0, + "step": 10470 + }, + { + "epoch": 0.021170263052638808, + "grad_norm": 29326.072265625, + "learning_rate": 2.0960000000000003e-06, + "loss": 23780.0047, + "step": 10480 + }, + { + "epoch": 0.021190463685322625, + "grad_norm": 5678.1591796875, + "learning_rate": 2.098e-06, + "loss": 7700.2969, + "step": 10490 + }, + { + "epoch": 0.02121066431800644, + "grad_norm": 84362.484375, + "learning_rate": 2.1000000000000002e-06, + "loss": 23221.1312, + "step": 10500 + }, + { + "epoch": 0.021230864950690254, + "grad_norm": 51417.21875, + "learning_rate": 2.102e-06, + "loss": 18747.4891, + "step": 10510 + }, + { + "epoch": 0.02125106558337407, + "grad_norm": 190696.671875, + "learning_rate": 2.104e-06, + "loss": 16214.475, + "step": 10520 + }, + { + "epoch": 0.021271266216057887, + "grad_norm": 42184.03125, + "learning_rate": 2.1060000000000002e-06, + "loss": 3576.0004, + "step": 10530 + }, + { + "epoch": 0.021291466848741704, + "grad_norm": 21591.703125, + "learning_rate": 2.108e-06, + "loss": 12673.4859, + "step": 10540 + }, + { + "epoch": 0.021311667481425517, + "grad_norm": 139786.34375, + "learning_rate": 2.11e-06, + "loss": 19981.4531, + "step": 10550 + }, + { + "epoch": 0.021331868114109333, + "grad_norm": 9360.927734375, + "learning_rate": 2.1120000000000003e-06, + "loss": 20568.5641, + "step": 10560 + }, + { + "epoch": 0.02135206874679315, + "grad_norm": 11046.1064453125, + "learning_rate": 2.1140000000000004e-06, + "loss": 9942.7633, + "step": 10570 + }, + { + "epoch": 0.021372269379476967, + "grad_norm": 146784.640625, + "learning_rate": 2.116e-06, + "loss": 25355.9062, + "step": 10580 + }, + { + "epoch": 0.02139247001216078, + "grad_norm": 66903.0234375, + "learning_rate": 2.118e-06, + "loss": 15604.3219, + "step": 10590 + }, + { + "epoch": 0.021412670644844596, + "grad_norm": 18348.865234375, + "learning_rate": 2.12e-06, + "loss": 18198.4828, + "step": 10600 + }, + { + "epoch": 0.021432871277528413, + "grad_norm": 5617.10498046875, + "learning_rate": 2.122e-06, + "loss": 23566.6016, + "step": 10610 + }, + { + "epoch": 0.02145307191021223, + "grad_norm": 127323.125, + "learning_rate": 2.1240000000000003e-06, + "loss": 13751.1672, + "step": 10620 + }, + { + "epoch": 0.021473272542896042, + "grad_norm": 4649.94970703125, + "learning_rate": 2.1260000000000005e-06, + "loss": 9263.7828, + "step": 10630 + }, + { + "epoch": 0.02149347317557986, + "grad_norm": 82388.96875, + "learning_rate": 2.128e-06, + "loss": 23842.6531, + "step": 10640 + }, + { + "epoch": 0.021513673808263675, + "grad_norm": 5492.75439453125, + "learning_rate": 2.13e-06, + "loss": 14725.4484, + "step": 10650 + }, + { + "epoch": 0.021533874440947492, + "grad_norm": 29087.056640625, + "learning_rate": 2.132e-06, + "loss": 19471.8156, + "step": 10660 + }, + { + "epoch": 0.021554075073631305, + "grad_norm": 31335.01953125, + "learning_rate": 2.1340000000000002e-06, + "loss": 8914.7242, + "step": 10670 + }, + { + "epoch": 0.02157427570631512, + "grad_norm": 10237.0966796875, + "learning_rate": 2.1360000000000004e-06, + "loss": 23192.9281, + "step": 10680 + }, + { + "epoch": 0.021594476338998938, + "grad_norm": 25468.2890625, + "learning_rate": 2.138e-06, + "loss": 11829.1125, + "step": 10690 + }, + { + "epoch": 0.021614676971682754, + "grad_norm": 77904.6328125, + "learning_rate": 2.1400000000000003e-06, + "loss": 16493.2703, + "step": 10700 + }, + { + "epoch": 0.021634877604366567, + "grad_norm": 165799.84375, + "learning_rate": 2.142e-06, + "loss": 30286.1063, + "step": 10710 + }, + { + "epoch": 0.021655078237050384, + "grad_norm": 18103.724609375, + "learning_rate": 2.144e-06, + "loss": 16938.9266, + "step": 10720 + }, + { + "epoch": 0.0216752788697342, + "grad_norm": 133844.109375, + "learning_rate": 2.1460000000000003e-06, + "loss": 18342.5469, + "step": 10730 + }, + { + "epoch": 0.021695479502418017, + "grad_norm": 8677.6025390625, + "learning_rate": 2.148e-06, + "loss": 17283.3875, + "step": 10740 + }, + { + "epoch": 0.02171568013510183, + "grad_norm": 11487.1005859375, + "learning_rate": 2.15e-06, + "loss": 9688.1727, + "step": 10750 + }, + { + "epoch": 0.021735880767785647, + "grad_norm": 1568.593505859375, + "learning_rate": 2.1520000000000003e-06, + "loss": 9968.9266, + "step": 10760 + }, + { + "epoch": 0.021756081400469463, + "grad_norm": 12236.7568359375, + "learning_rate": 2.1540000000000005e-06, + "loss": 12342.207, + "step": 10770 + }, + { + "epoch": 0.02177628203315328, + "grad_norm": 58687.46875, + "learning_rate": 2.156e-06, + "loss": 14270.4078, + "step": 10780 + }, + { + "epoch": 0.021796482665837093, + "grad_norm": 26472.3828125, + "learning_rate": 2.158e-06, + "loss": 30220.9469, + "step": 10790 + }, + { + "epoch": 0.02181668329852091, + "grad_norm": 253739.34375, + "learning_rate": 2.16e-06, + "loss": 23290.8, + "step": 10800 + }, + { + "epoch": 0.021836883931204726, + "grad_norm": 1467.3016357421875, + "learning_rate": 2.1620000000000002e-06, + "loss": 7794.7445, + "step": 10810 + }, + { + "epoch": 0.021857084563888542, + "grad_norm": 2200.973388671875, + "learning_rate": 2.1640000000000004e-06, + "loss": 9172.2609, + "step": 10820 + }, + { + "epoch": 0.021877285196572355, + "grad_norm": 4932.7822265625, + "learning_rate": 2.166e-06, + "loss": 13820.2766, + "step": 10830 + }, + { + "epoch": 0.021897485829256172, + "grad_norm": 131875.171875, + "learning_rate": 2.1680000000000002e-06, + "loss": 25036.125, + "step": 10840 + }, + { + "epoch": 0.02191768646193999, + "grad_norm": 8741.953125, + "learning_rate": 2.17e-06, + "loss": 3754.5094, + "step": 10850 + }, + { + "epoch": 0.021937887094623805, + "grad_norm": 371661.71875, + "learning_rate": 2.172e-06, + "loss": 24405.3969, + "step": 10860 + }, + { + "epoch": 0.021958087727307618, + "grad_norm": 341696.0625, + "learning_rate": 2.1740000000000003e-06, + "loss": 19350.75, + "step": 10870 + }, + { + "epoch": 0.021978288359991435, + "grad_norm": 4527.36669921875, + "learning_rate": 2.176e-06, + "loss": 14560.9844, + "step": 10880 + }, + { + "epoch": 0.02199848899267525, + "grad_norm": 11115.71484375, + "learning_rate": 2.178e-06, + "loss": 19662.2625, + "step": 10890 + }, + { + "epoch": 0.022018689625359068, + "grad_norm": 423648.46875, + "learning_rate": 2.1800000000000003e-06, + "loss": 45934.0563, + "step": 10900 + }, + { + "epoch": 0.02203889025804288, + "grad_norm": 38468.9609375, + "learning_rate": 2.182e-06, + "loss": 19955.6937, + "step": 10910 + }, + { + "epoch": 0.022059090890726697, + "grad_norm": 24980.365234375, + "learning_rate": 2.184e-06, + "loss": 25966.1906, + "step": 10920 + }, + { + "epoch": 0.022079291523410514, + "grad_norm": 19609.984375, + "learning_rate": 2.186e-06, + "loss": 16027.5031, + "step": 10930 + }, + { + "epoch": 0.02209949215609433, + "grad_norm": 95428.59375, + "learning_rate": 2.188e-06, + "loss": 11829.9312, + "step": 10940 + }, + { + "epoch": 0.022119692788778143, + "grad_norm": 213844.1875, + "learning_rate": 2.19e-06, + "loss": 19148.1969, + "step": 10950 + }, + { + "epoch": 0.02213989342146196, + "grad_norm": 24107.984375, + "learning_rate": 2.1920000000000004e-06, + "loss": 8168.5867, + "step": 10960 + }, + { + "epoch": 0.022160094054145776, + "grad_norm": 1319.4188232421875, + "learning_rate": 2.194e-06, + "loss": 22258.5672, + "step": 10970 + }, + { + "epoch": 0.022180294686829593, + "grad_norm": 239840.0, + "learning_rate": 2.1960000000000002e-06, + "loss": 16248.3016, + "step": 10980 + }, + { + "epoch": 0.022200495319513406, + "grad_norm": 223855.28125, + "learning_rate": 2.198e-06, + "loss": 30339.8656, + "step": 10990 + }, + { + "epoch": 0.022220695952197222, + "grad_norm": 217863.328125, + "learning_rate": 2.2e-06, + "loss": 13169.2719, + "step": 11000 + }, + { + "epoch": 0.02224089658488104, + "grad_norm": 6059.01123046875, + "learning_rate": 2.2020000000000003e-06, + "loss": 4270.6078, + "step": 11010 + }, + { + "epoch": 0.022261097217564856, + "grad_norm": 13117.345703125, + "learning_rate": 2.2040000000000004e-06, + "loss": 9356.532, + "step": 11020 + }, + { + "epoch": 0.02228129785024867, + "grad_norm": 11243.669921875, + "learning_rate": 2.206e-06, + "loss": 15236.0719, + "step": 11030 + }, + { + "epoch": 0.022301498482932485, + "grad_norm": 1764.6810302734375, + "learning_rate": 2.2080000000000003e-06, + "loss": 20429.9766, + "step": 11040 + }, + { + "epoch": 0.0223216991156163, + "grad_norm": 25236.865234375, + "learning_rate": 2.21e-06, + "loss": 3912.6113, + "step": 11050 + }, + { + "epoch": 0.022341899748300118, + "grad_norm": 39480.296875, + "learning_rate": 2.212e-06, + "loss": 11931.9859, + "step": 11060 + }, + { + "epoch": 0.02236210038098393, + "grad_norm": 5684.80859375, + "learning_rate": 2.2140000000000003e-06, + "loss": 12779.9758, + "step": 11070 + }, + { + "epoch": 0.022382301013667748, + "grad_norm": 707674.6875, + "learning_rate": 2.216e-06, + "loss": 33013.0969, + "step": 11080 + }, + { + "epoch": 0.022402501646351564, + "grad_norm": 51977.453125, + "learning_rate": 2.218e-06, + "loss": 28476.6406, + "step": 11090 + }, + { + "epoch": 0.02242270227903538, + "grad_norm": 24614.85546875, + "learning_rate": 2.2200000000000003e-06, + "loss": 4956.5266, + "step": 11100 + }, + { + "epoch": 0.022442902911719194, + "grad_norm": 25240.22265625, + "learning_rate": 2.222e-06, + "loss": 5483.1828, + "step": 11110 + }, + { + "epoch": 0.02246310354440301, + "grad_norm": 1686.88671875, + "learning_rate": 2.2240000000000002e-06, + "loss": 14887.5172, + "step": 11120 + }, + { + "epoch": 0.022483304177086827, + "grad_norm": 24991.578125, + "learning_rate": 2.226e-06, + "loss": 9053.0711, + "step": 11130 + }, + { + "epoch": 0.022503504809770643, + "grad_norm": 6279.3583984375, + "learning_rate": 2.228e-06, + "loss": 5781.0332, + "step": 11140 + }, + { + "epoch": 0.022523705442454457, + "grad_norm": 4717.3447265625, + "learning_rate": 2.2300000000000002e-06, + "loss": 10450.4758, + "step": 11150 + }, + { + "epoch": 0.022543906075138273, + "grad_norm": 2358.759521484375, + "learning_rate": 2.2320000000000004e-06, + "loss": 18911.2547, + "step": 11160 + }, + { + "epoch": 0.02256410670782209, + "grad_norm": 61233.95703125, + "learning_rate": 2.234e-06, + "loss": 18417.7047, + "step": 11170 + }, + { + "epoch": 0.022584307340505906, + "grad_norm": 7310.8037109375, + "learning_rate": 2.2360000000000003e-06, + "loss": 7313.0461, + "step": 11180 + }, + { + "epoch": 0.02260450797318972, + "grad_norm": 99978.0, + "learning_rate": 2.238e-06, + "loss": 8687.3109, + "step": 11190 + }, + { + "epoch": 0.022624708605873536, + "grad_norm": 18746.30078125, + "learning_rate": 2.24e-06, + "loss": 10558.1547, + "step": 11200 + }, + { + "epoch": 0.022644909238557352, + "grad_norm": 21121.255859375, + "learning_rate": 2.2420000000000003e-06, + "loss": 7415.1609, + "step": 11210 + }, + { + "epoch": 0.02266510987124117, + "grad_norm": 687890.5625, + "learning_rate": 2.244e-06, + "loss": 33589.325, + "step": 11220 + }, + { + "epoch": 0.022685310503924982, + "grad_norm": 7186.6015625, + "learning_rate": 2.246e-06, + "loss": 11786.1141, + "step": 11230 + }, + { + "epoch": 0.0227055111366088, + "grad_norm": 175979.75, + "learning_rate": 2.2480000000000003e-06, + "loss": 8788.1859, + "step": 11240 + }, + { + "epoch": 0.022725711769292615, + "grad_norm": 34996.83203125, + "learning_rate": 2.25e-06, + "loss": 41315.8469, + "step": 11250 + }, + { + "epoch": 0.02274591240197643, + "grad_norm": 24373.28125, + "learning_rate": 2.252e-06, + "loss": 8103.6133, + "step": 11260 + }, + { + "epoch": 0.022766113034660244, + "grad_norm": 9437.0673828125, + "learning_rate": 2.254e-06, + "loss": 10354.7195, + "step": 11270 + }, + { + "epoch": 0.02278631366734406, + "grad_norm": 4093.658935546875, + "learning_rate": 2.256e-06, + "loss": 36568.3156, + "step": 11280 + }, + { + "epoch": 0.022806514300027877, + "grad_norm": 24249.865234375, + "learning_rate": 2.2580000000000002e-06, + "loss": 26954.4188, + "step": 11290 + }, + { + "epoch": 0.022826714932711694, + "grad_norm": 162002.25, + "learning_rate": 2.2600000000000004e-06, + "loss": 13018.7828, + "step": 11300 + }, + { + "epoch": 0.022846915565395507, + "grad_norm": 5969.00439453125, + "learning_rate": 2.262e-06, + "loss": 19149.5875, + "step": 11310 + }, + { + "epoch": 0.022867116198079324, + "grad_norm": 37502.22265625, + "learning_rate": 2.2640000000000003e-06, + "loss": 13351.9078, + "step": 11320 + }, + { + "epoch": 0.02288731683076314, + "grad_norm": 75505.9921875, + "learning_rate": 2.266e-06, + "loss": 7682.1227, + "step": 11330 + }, + { + "epoch": 0.022907517463446957, + "grad_norm": 149015.15625, + "learning_rate": 2.268e-06, + "loss": 21129.8969, + "step": 11340 + }, + { + "epoch": 0.02292771809613077, + "grad_norm": 4063.11181640625, + "learning_rate": 2.2700000000000003e-06, + "loss": 9373.6055, + "step": 11350 + }, + { + "epoch": 0.022947918728814586, + "grad_norm": 3563.05029296875, + "learning_rate": 2.2720000000000004e-06, + "loss": 8734.6242, + "step": 11360 + }, + { + "epoch": 0.022968119361498403, + "grad_norm": 2953.644775390625, + "learning_rate": 2.274e-06, + "loss": 17221.6562, + "step": 11370 + }, + { + "epoch": 0.02298831999418222, + "grad_norm": 194035.296875, + "learning_rate": 2.2760000000000003e-06, + "loss": 21826.725, + "step": 11380 + }, + { + "epoch": 0.023008520626866032, + "grad_norm": 30131.28125, + "learning_rate": 2.278e-06, + "loss": 14175.3844, + "step": 11390 + }, + { + "epoch": 0.02302872125954985, + "grad_norm": 32757.58984375, + "learning_rate": 2.28e-06, + "loss": 12336.3078, + "step": 11400 + }, + { + "epoch": 0.023048921892233665, + "grad_norm": 130032.25, + "learning_rate": 2.282e-06, + "loss": 14162.2812, + "step": 11410 + }, + { + "epoch": 0.023069122524917482, + "grad_norm": 19505.322265625, + "learning_rate": 2.284e-06, + "loss": 12345.4469, + "step": 11420 + }, + { + "epoch": 0.023089323157601295, + "grad_norm": 1938.1358642578125, + "learning_rate": 2.2860000000000002e-06, + "loss": 15697.9469, + "step": 11430 + }, + { + "epoch": 0.02310952379028511, + "grad_norm": 7502.39599609375, + "learning_rate": 2.2880000000000004e-06, + "loss": 6721.6219, + "step": 11440 + }, + { + "epoch": 0.023129724422968928, + "grad_norm": 43514.59375, + "learning_rate": 2.29e-06, + "loss": 13021.8727, + "step": 11450 + }, + { + "epoch": 0.023149925055652745, + "grad_norm": 27885.015625, + "learning_rate": 2.2920000000000002e-06, + "loss": 6262.0402, + "step": 11460 + }, + { + "epoch": 0.023170125688336558, + "grad_norm": 77529.296875, + "learning_rate": 2.294e-06, + "loss": 13251.0719, + "step": 11470 + }, + { + "epoch": 0.023190326321020374, + "grad_norm": 613.5460205078125, + "learning_rate": 2.296e-06, + "loss": 14842.9547, + "step": 11480 + }, + { + "epoch": 0.02321052695370419, + "grad_norm": 36265.26171875, + "learning_rate": 2.2980000000000003e-06, + "loss": 15308.15, + "step": 11490 + }, + { + "epoch": 0.023230727586388007, + "grad_norm": 88539.1328125, + "learning_rate": 2.3000000000000004e-06, + "loss": 18338.2109, + "step": 11500 + }, + { + "epoch": 0.02325092821907182, + "grad_norm": 102047.140625, + "learning_rate": 2.302e-06, + "loss": 15924.9469, + "step": 11510 + }, + { + "epoch": 0.023271128851755637, + "grad_norm": 254122.640625, + "learning_rate": 2.3040000000000003e-06, + "loss": 22102.3937, + "step": 11520 + }, + { + "epoch": 0.023291329484439453, + "grad_norm": 15507.9404296875, + "learning_rate": 2.306e-06, + "loss": 10577.2328, + "step": 11530 + }, + { + "epoch": 0.02331153011712327, + "grad_norm": 40521.390625, + "learning_rate": 2.308e-06, + "loss": 12435.4039, + "step": 11540 + }, + { + "epoch": 0.023331730749807083, + "grad_norm": 14070.2607421875, + "learning_rate": 2.3100000000000003e-06, + "loss": 5346.3832, + "step": 11550 + }, + { + "epoch": 0.0233519313824909, + "grad_norm": 10443.48828125, + "learning_rate": 2.312e-06, + "loss": 6041.9312, + "step": 11560 + }, + { + "epoch": 0.023372132015174716, + "grad_norm": 54782.34765625, + "learning_rate": 2.314e-06, + "loss": 24331.7219, + "step": 11570 + }, + { + "epoch": 0.023392332647858533, + "grad_norm": 9443.3349609375, + "learning_rate": 2.3160000000000004e-06, + "loss": 17965.0625, + "step": 11580 + }, + { + "epoch": 0.023412533280542346, + "grad_norm": 7723.7197265625, + "learning_rate": 2.318e-06, + "loss": 28447.175, + "step": 11590 + }, + { + "epoch": 0.023432733913226162, + "grad_norm": 19012.517578125, + "learning_rate": 2.3200000000000002e-06, + "loss": 12479.5734, + "step": 11600 + }, + { + "epoch": 0.02345293454590998, + "grad_norm": 1430.667724609375, + "learning_rate": 2.322e-06, + "loss": 8460.082, + "step": 11610 + }, + { + "epoch": 0.023473135178593795, + "grad_norm": 82557.4921875, + "learning_rate": 2.324e-06, + "loss": 13836.1, + "step": 11620 + }, + { + "epoch": 0.023493335811277608, + "grad_norm": 24892.966796875, + "learning_rate": 2.3260000000000003e-06, + "loss": 14159.7281, + "step": 11630 + }, + { + "epoch": 0.023513536443961425, + "grad_norm": 3299.33740234375, + "learning_rate": 2.3280000000000004e-06, + "loss": 18158.0938, + "step": 11640 + }, + { + "epoch": 0.02353373707664524, + "grad_norm": 20765.8984375, + "learning_rate": 2.33e-06, + "loss": 6390.6078, + "step": 11650 + }, + { + "epoch": 0.023553937709329058, + "grad_norm": 6974.259765625, + "learning_rate": 2.3320000000000003e-06, + "loss": 9966.7852, + "step": 11660 + }, + { + "epoch": 0.02357413834201287, + "grad_norm": 1798.3555908203125, + "learning_rate": 2.334e-06, + "loss": 29947.6375, + "step": 11670 + }, + { + "epoch": 0.023594338974696687, + "grad_norm": 40283.6640625, + "learning_rate": 2.336e-06, + "loss": 15294.9719, + "step": 11680 + }, + { + "epoch": 0.023614539607380504, + "grad_norm": 49792.6171875, + "learning_rate": 2.3380000000000003e-06, + "loss": 11786.393, + "step": 11690 + }, + { + "epoch": 0.02363474024006432, + "grad_norm": 77568.0546875, + "learning_rate": 2.3400000000000005e-06, + "loss": 30178.4313, + "step": 11700 + }, + { + "epoch": 0.023654940872748133, + "grad_norm": 298267.75, + "learning_rate": 2.342e-06, + "loss": 19414.3563, + "step": 11710 + }, + { + "epoch": 0.02367514150543195, + "grad_norm": 70565.015625, + "learning_rate": 2.3440000000000003e-06, + "loss": 20280.0453, + "step": 11720 + }, + { + "epoch": 0.023695342138115767, + "grad_norm": 56803.65234375, + "learning_rate": 2.346e-06, + "loss": 13271.9188, + "step": 11730 + }, + { + "epoch": 0.023715542770799583, + "grad_norm": 231223.46875, + "learning_rate": 2.3480000000000002e-06, + "loss": 19381.25, + "step": 11740 + }, + { + "epoch": 0.023735743403483396, + "grad_norm": 17392.27734375, + "learning_rate": 2.35e-06, + "loss": 15623.4578, + "step": 11750 + }, + { + "epoch": 0.023755944036167213, + "grad_norm": 242.4376220703125, + "learning_rate": 2.352e-06, + "loss": 8526.3648, + "step": 11760 + }, + { + "epoch": 0.02377614466885103, + "grad_norm": 7789.3740234375, + "learning_rate": 2.3540000000000002e-06, + "loss": 7194.9242, + "step": 11770 + }, + { + "epoch": 0.023796345301534846, + "grad_norm": 51304.28515625, + "learning_rate": 2.3560000000000004e-06, + "loss": 19294.6125, + "step": 11780 + }, + { + "epoch": 0.02381654593421866, + "grad_norm": 38041.1484375, + "learning_rate": 2.358e-06, + "loss": 23016.4188, + "step": 11790 + }, + { + "epoch": 0.023836746566902475, + "grad_norm": 221243.78125, + "learning_rate": 2.3600000000000003e-06, + "loss": 18888.5922, + "step": 11800 + }, + { + "epoch": 0.023856947199586292, + "grad_norm": 422474.9375, + "learning_rate": 2.362e-06, + "loss": 24558.7906, + "step": 11810 + }, + { + "epoch": 0.02387714783227011, + "grad_norm": 9636.0859375, + "learning_rate": 2.364e-06, + "loss": 14659.4781, + "step": 11820 + }, + { + "epoch": 0.02389734846495392, + "grad_norm": 11450.96484375, + "learning_rate": 2.3660000000000003e-06, + "loss": 8441.807, + "step": 11830 + }, + { + "epoch": 0.023917549097637738, + "grad_norm": 73587.765625, + "learning_rate": 2.3680000000000005e-06, + "loss": 8543.1828, + "step": 11840 + }, + { + "epoch": 0.023937749730321554, + "grad_norm": 2581.27490234375, + "learning_rate": 2.37e-06, + "loss": 6968.4898, + "step": 11850 + }, + { + "epoch": 0.02395795036300537, + "grad_norm": 14455.408203125, + "learning_rate": 2.3720000000000003e-06, + "loss": 11972.8148, + "step": 11860 + }, + { + "epoch": 0.023978150995689184, + "grad_norm": 1345.4638671875, + "learning_rate": 2.374e-06, + "loss": 10514.6562, + "step": 11870 + }, + { + "epoch": 0.023998351628373, + "grad_norm": 1048.2291259765625, + "learning_rate": 2.376e-06, + "loss": 8991.2102, + "step": 11880 + }, + { + "epoch": 0.024018552261056817, + "grad_norm": 107139.7890625, + "learning_rate": 2.3780000000000004e-06, + "loss": 22650.6516, + "step": 11890 + }, + { + "epoch": 0.024038752893740634, + "grad_norm": 27912.123046875, + "learning_rate": 2.38e-06, + "loss": 12600.2516, + "step": 11900 + }, + { + "epoch": 0.024058953526424447, + "grad_norm": 212286.46875, + "learning_rate": 2.3820000000000002e-06, + "loss": 17272.3641, + "step": 11910 + }, + { + "epoch": 0.024079154159108263, + "grad_norm": 39554.359375, + "learning_rate": 2.3840000000000004e-06, + "loss": 5288.9168, + "step": 11920 + }, + { + "epoch": 0.02409935479179208, + "grad_norm": 35549.73046875, + "learning_rate": 2.386e-06, + "loss": 18034.0906, + "step": 11930 + }, + { + "epoch": 0.024119555424475896, + "grad_norm": 1306.552490234375, + "learning_rate": 2.3880000000000003e-06, + "loss": 11723.1016, + "step": 11940 + }, + { + "epoch": 0.02413975605715971, + "grad_norm": 41810.3984375, + "learning_rate": 2.39e-06, + "loss": 23770.6406, + "step": 11950 + }, + { + "epoch": 0.024159956689843526, + "grad_norm": 5445.5146484375, + "learning_rate": 2.392e-06, + "loss": 13263.5219, + "step": 11960 + }, + { + "epoch": 0.024180157322527342, + "grad_norm": 2206.436279296875, + "learning_rate": 2.3940000000000003e-06, + "loss": 5783.3523, + "step": 11970 + }, + { + "epoch": 0.02420035795521116, + "grad_norm": 59589.94140625, + "learning_rate": 2.3960000000000004e-06, + "loss": 6313.7754, + "step": 11980 + }, + { + "epoch": 0.024220558587894972, + "grad_norm": 114675.5234375, + "learning_rate": 2.398e-06, + "loss": 21288.2281, + "step": 11990 + }, + { + "epoch": 0.02424075922057879, + "grad_norm": 22409.896484375, + "learning_rate": 2.4000000000000003e-06, + "loss": 8459.7773, + "step": 12000 + }, + { + "epoch": 0.024260959853262605, + "grad_norm": 3959.387939453125, + "learning_rate": 2.402e-06, + "loss": 10683.875, + "step": 12010 + }, + { + "epoch": 0.02428116048594642, + "grad_norm": 4413.3759765625, + "learning_rate": 2.404e-06, + "loss": 7211.7398, + "step": 12020 + }, + { + "epoch": 0.024301361118630235, + "grad_norm": 94653.515625, + "learning_rate": 2.4060000000000003e-06, + "loss": 11898.7453, + "step": 12030 + }, + { + "epoch": 0.02432156175131405, + "grad_norm": 60599.58984375, + "learning_rate": 2.408e-06, + "loss": 9229.9836, + "step": 12040 + }, + { + "epoch": 0.024341762383997868, + "grad_norm": 106677.796875, + "learning_rate": 2.4100000000000002e-06, + "loss": 21327.0469, + "step": 12050 + }, + { + "epoch": 0.024361963016681684, + "grad_norm": 77130.2734375, + "learning_rate": 2.4120000000000004e-06, + "loss": 11277.7648, + "step": 12060 + }, + { + "epoch": 0.024382163649365497, + "grad_norm": 12333.6279296875, + "learning_rate": 2.414e-06, + "loss": 6601.6703, + "step": 12070 + }, + { + "epoch": 0.024402364282049314, + "grad_norm": 316.9674072265625, + "learning_rate": 2.4160000000000002e-06, + "loss": 19001.1859, + "step": 12080 + }, + { + "epoch": 0.02442256491473313, + "grad_norm": 4852.3251953125, + "learning_rate": 2.418e-06, + "loss": 19848.7781, + "step": 12090 + }, + { + "epoch": 0.024442765547416947, + "grad_norm": 12897.9912109375, + "learning_rate": 2.42e-06, + "loss": 11141.5469, + "step": 12100 + }, + { + "epoch": 0.02446296618010076, + "grad_norm": 915.7304077148438, + "learning_rate": 2.4220000000000003e-06, + "loss": 23360.5312, + "step": 12110 + }, + { + "epoch": 0.024483166812784576, + "grad_norm": 4069.96923828125, + "learning_rate": 2.4240000000000004e-06, + "loss": 6898.2117, + "step": 12120 + }, + { + "epoch": 0.024503367445468393, + "grad_norm": 3396.974609375, + "learning_rate": 2.426e-06, + "loss": 8741.1023, + "step": 12130 + }, + { + "epoch": 0.02452356807815221, + "grad_norm": 129241.7109375, + "learning_rate": 2.428e-06, + "loss": 20940.2594, + "step": 12140 + }, + { + "epoch": 0.024543768710836023, + "grad_norm": 3553.63134765625, + "learning_rate": 2.43e-06, + "loss": 13925.4937, + "step": 12150 + }, + { + "epoch": 0.02456396934351984, + "grad_norm": 214409.84375, + "learning_rate": 2.432e-06, + "loss": 12426.7406, + "step": 12160 + }, + { + "epoch": 0.024584169976203656, + "grad_norm": 3851.241455078125, + "learning_rate": 2.4340000000000003e-06, + "loss": 37849.6656, + "step": 12170 + }, + { + "epoch": 0.02460437060888747, + "grad_norm": 48595.21484375, + "learning_rate": 2.4360000000000005e-06, + "loss": 8032.6789, + "step": 12180 + }, + { + "epoch": 0.024624571241571285, + "grad_norm": 33816.96875, + "learning_rate": 2.438e-06, + "loss": 15095.275, + "step": 12190 + }, + { + "epoch": 0.0246447718742551, + "grad_norm": 79158.6953125, + "learning_rate": 2.4400000000000004e-06, + "loss": 24339.7875, + "step": 12200 + }, + { + "epoch": 0.024664972506938918, + "grad_norm": 8204.1689453125, + "learning_rate": 2.442e-06, + "loss": 10835.2195, + "step": 12210 + }, + { + "epoch": 0.02468517313962273, + "grad_norm": 8392.2421875, + "learning_rate": 2.4440000000000002e-06, + "loss": 9890.4242, + "step": 12220 + }, + { + "epoch": 0.024705373772306548, + "grad_norm": 28058.205078125, + "learning_rate": 2.4460000000000004e-06, + "loss": 63923.9062, + "step": 12230 + }, + { + "epoch": 0.024725574404990364, + "grad_norm": 12839.4912109375, + "learning_rate": 2.448e-06, + "loss": 7173.5172, + "step": 12240 + }, + { + "epoch": 0.02474577503767418, + "grad_norm": 137291.0, + "learning_rate": 2.4500000000000003e-06, + "loss": 15170.1719, + "step": 12250 + }, + { + "epoch": 0.024765975670357994, + "grad_norm": 30064.498046875, + "learning_rate": 2.4520000000000004e-06, + "loss": 11342.2508, + "step": 12260 + }, + { + "epoch": 0.02478617630304181, + "grad_norm": 17913.841796875, + "learning_rate": 2.454e-06, + "loss": 8221.725, + "step": 12270 + }, + { + "epoch": 0.024806376935725627, + "grad_norm": 7547.79541015625, + "learning_rate": 2.4560000000000003e-06, + "loss": 18221.2641, + "step": 12280 + }, + { + "epoch": 0.024826577568409443, + "grad_norm": 118762.9453125, + "learning_rate": 2.458e-06, + "loss": 18038.0375, + "step": 12290 + }, + { + "epoch": 0.024846778201093257, + "grad_norm": 14720.498046875, + "learning_rate": 2.46e-06, + "loss": 6373.9066, + "step": 12300 + }, + { + "epoch": 0.024866978833777073, + "grad_norm": 2850.217529296875, + "learning_rate": 2.4620000000000003e-06, + "loss": 9515.3672, + "step": 12310 + }, + { + "epoch": 0.02488717946646089, + "grad_norm": 18542.49609375, + "learning_rate": 2.4640000000000005e-06, + "loss": 13293.0594, + "step": 12320 + }, + { + "epoch": 0.024907380099144706, + "grad_norm": 40593.82421875, + "learning_rate": 2.466e-06, + "loss": 13328.0453, + "step": 12330 + }, + { + "epoch": 0.02492758073182852, + "grad_norm": 18167.427734375, + "learning_rate": 2.468e-06, + "loss": 14441.5063, + "step": 12340 + }, + { + "epoch": 0.024947781364512336, + "grad_norm": 155365.75, + "learning_rate": 2.47e-06, + "loss": 24125.8156, + "step": 12350 + }, + { + "epoch": 0.024967981997196152, + "grad_norm": 1135.5247802734375, + "learning_rate": 2.4720000000000002e-06, + "loss": 8645.7867, + "step": 12360 + }, + { + "epoch": 0.02498818262987997, + "grad_norm": 14159.08203125, + "learning_rate": 2.4740000000000004e-06, + "loss": 16465.3531, + "step": 12370 + }, + { + "epoch": 0.025008383262563782, + "grad_norm": 4385.66064453125, + "learning_rate": 2.476e-06, + "loss": 8653.2648, + "step": 12380 + }, + { + "epoch": 0.0250285838952476, + "grad_norm": 170417.515625, + "learning_rate": 2.4780000000000002e-06, + "loss": 12392.2094, + "step": 12390 + }, + { + "epoch": 0.025048784527931415, + "grad_norm": 24686.12109375, + "learning_rate": 2.4800000000000004e-06, + "loss": 18002.5031, + "step": 12400 + }, + { + "epoch": 0.02506898516061523, + "grad_norm": 10025.0576171875, + "learning_rate": 2.482e-06, + "loss": 12300.8094, + "step": 12410 + }, + { + "epoch": 0.025089185793299044, + "grad_norm": 413039.0, + "learning_rate": 2.4840000000000003e-06, + "loss": 20725.2766, + "step": 12420 + }, + { + "epoch": 0.02510938642598286, + "grad_norm": 360916.65625, + "learning_rate": 2.486e-06, + "loss": 22493.0125, + "step": 12430 + }, + { + "epoch": 0.025129587058666678, + "grad_norm": 5796.13720703125, + "learning_rate": 2.488e-06, + "loss": 14747.5406, + "step": 12440 + }, + { + "epoch": 0.025149787691350494, + "grad_norm": 61408.9765625, + "learning_rate": 2.4900000000000003e-06, + "loss": 21913.2266, + "step": 12450 + }, + { + "epoch": 0.025169988324034307, + "grad_norm": 24689.56640625, + "learning_rate": 2.4920000000000005e-06, + "loss": 13133.4344, + "step": 12460 + }, + { + "epoch": 0.025190188956718124, + "grad_norm": 4731.255859375, + "learning_rate": 2.494e-06, + "loss": 17228.825, + "step": 12470 + }, + { + "epoch": 0.02521038958940194, + "grad_norm": 943.9275512695312, + "learning_rate": 2.496e-06, + "loss": 10156.1313, + "step": 12480 + }, + { + "epoch": 0.025230590222085757, + "grad_norm": 37099.56640625, + "learning_rate": 2.498e-06, + "loss": 14066.6344, + "step": 12490 + }, + { + "epoch": 0.02525079085476957, + "grad_norm": 1182.9068603515625, + "learning_rate": 2.5e-06, + "loss": 9393.2281, + "step": 12500 + }, + { + "epoch": 0.025270991487453386, + "grad_norm": 181.1616668701172, + "learning_rate": 2.502e-06, + "loss": 13468.6437, + "step": 12510 + }, + { + "epoch": 0.025291192120137203, + "grad_norm": 2616.606201171875, + "learning_rate": 2.5040000000000005e-06, + "loss": 19069.025, + "step": 12520 + }, + { + "epoch": 0.02531139275282102, + "grad_norm": 23314.8203125, + "learning_rate": 2.5060000000000002e-06, + "loss": 33733.1687, + "step": 12530 + }, + { + "epoch": 0.025331593385504832, + "grad_norm": 27577.30859375, + "learning_rate": 2.5080000000000004e-06, + "loss": 24576.3266, + "step": 12540 + }, + { + "epoch": 0.02535179401818865, + "grad_norm": 97776.2890625, + "learning_rate": 2.51e-06, + "loss": 16733.2313, + "step": 12550 + }, + { + "epoch": 0.025371994650872465, + "grad_norm": 131374.265625, + "learning_rate": 2.512e-06, + "loss": 10693.8922, + "step": 12560 + }, + { + "epoch": 0.025392195283556282, + "grad_norm": 54657.0546875, + "learning_rate": 2.5140000000000004e-06, + "loss": 6133.3824, + "step": 12570 + }, + { + "epoch": 0.025412395916240095, + "grad_norm": 1183.1282958984375, + "learning_rate": 2.516e-06, + "loss": 5531.65, + "step": 12580 + }, + { + "epoch": 0.02543259654892391, + "grad_norm": 0.0, + "learning_rate": 2.5180000000000003e-06, + "loss": 10711.9773, + "step": 12590 + }, + { + "epoch": 0.025452797181607728, + "grad_norm": 88761.109375, + "learning_rate": 2.52e-06, + "loss": 16020.3531, + "step": 12600 + }, + { + "epoch": 0.025472997814291545, + "grad_norm": 26704.283203125, + "learning_rate": 2.522e-06, + "loss": 6198.6711, + "step": 12610 + }, + { + "epoch": 0.025493198446975358, + "grad_norm": 3259.79248046875, + "learning_rate": 2.5240000000000003e-06, + "loss": 8126.9125, + "step": 12620 + }, + { + "epoch": 0.025513399079659174, + "grad_norm": 44885.2734375, + "learning_rate": 2.526e-06, + "loss": 9977.8062, + "step": 12630 + }, + { + "epoch": 0.02553359971234299, + "grad_norm": 437328.1875, + "learning_rate": 2.5280000000000006e-06, + "loss": 26875.8063, + "step": 12640 + }, + { + "epoch": 0.025553800345026807, + "grad_norm": 1026.5933837890625, + "learning_rate": 2.5300000000000003e-06, + "loss": 12060.2992, + "step": 12650 + }, + { + "epoch": 0.02557400097771062, + "grad_norm": 151467.1875, + "learning_rate": 2.532e-06, + "loss": 24021.1391, + "step": 12660 + }, + { + "epoch": 0.025594201610394437, + "grad_norm": 17554.634765625, + "learning_rate": 2.5340000000000002e-06, + "loss": 8225.3305, + "step": 12670 + }, + { + "epoch": 0.025614402243078253, + "grad_norm": 18307.40625, + "learning_rate": 2.536e-06, + "loss": 22615.4547, + "step": 12680 + }, + { + "epoch": 0.02563460287576207, + "grad_norm": 116868.9453125, + "learning_rate": 2.5380000000000005e-06, + "loss": 24227.7313, + "step": 12690 + }, + { + "epoch": 0.025654803508445883, + "grad_norm": 3025.2705078125, + "learning_rate": 2.5400000000000002e-06, + "loss": 14669.0453, + "step": 12700 + }, + { + "epoch": 0.0256750041411297, + "grad_norm": 23074.013671875, + "learning_rate": 2.542e-06, + "loss": 6449.7016, + "step": 12710 + }, + { + "epoch": 0.025695204773813516, + "grad_norm": 47152.2109375, + "learning_rate": 2.5440000000000005e-06, + "loss": 30235.725, + "step": 12720 + }, + { + "epoch": 0.025715405406497333, + "grad_norm": 6364.4921875, + "learning_rate": 2.5460000000000003e-06, + "loss": 5790.5395, + "step": 12730 + }, + { + "epoch": 0.025735606039181146, + "grad_norm": 17181.341796875, + "learning_rate": 2.5480000000000004e-06, + "loss": 9820.2437, + "step": 12740 + }, + { + "epoch": 0.025755806671864962, + "grad_norm": 18937.5234375, + "learning_rate": 2.55e-06, + "loss": 29002.15, + "step": 12750 + }, + { + "epoch": 0.02577600730454878, + "grad_norm": 4640.4501953125, + "learning_rate": 2.552e-06, + "loss": 19944.3719, + "step": 12760 + }, + { + "epoch": 0.025796207937232595, + "grad_norm": 154516.953125, + "learning_rate": 2.5540000000000004e-06, + "loss": 21663.7719, + "step": 12770 + }, + { + "epoch": 0.025816408569916408, + "grad_norm": 70738.1796875, + "learning_rate": 2.556e-06, + "loss": 20294.5359, + "step": 12780 + }, + { + "epoch": 0.025836609202600225, + "grad_norm": 1199.399658203125, + "learning_rate": 2.5580000000000003e-06, + "loss": 24000.0, + "step": 12790 + }, + { + "epoch": 0.02585680983528404, + "grad_norm": 2132.311767578125, + "learning_rate": 2.56e-06, + "loss": 2390.3549, + "step": 12800 + }, + { + "epoch": 0.025877010467967858, + "grad_norm": 44151.91796875, + "learning_rate": 2.562e-06, + "loss": 29079.45, + "step": 12810 + }, + { + "epoch": 0.02589721110065167, + "grad_norm": 20195.056640625, + "learning_rate": 2.5640000000000004e-06, + "loss": 18915.85, + "step": 12820 + }, + { + "epoch": 0.025917411733335487, + "grad_norm": 6710.990234375, + "learning_rate": 2.566e-06, + "loss": 10070.3375, + "step": 12830 + }, + { + "epoch": 0.025937612366019304, + "grad_norm": 206726.1875, + "learning_rate": 2.568e-06, + "loss": 12153.1734, + "step": 12840 + }, + { + "epoch": 0.02595781299870312, + "grad_norm": 47668.74609375, + "learning_rate": 2.5700000000000004e-06, + "loss": 7939.6086, + "step": 12850 + }, + { + "epoch": 0.025978013631386934, + "grad_norm": 9722.228515625, + "learning_rate": 2.572e-06, + "loss": 12151.6516, + "step": 12860 + }, + { + "epoch": 0.02599821426407075, + "grad_norm": 13987.740234375, + "learning_rate": 2.5740000000000003e-06, + "loss": 10240.5742, + "step": 12870 + }, + { + "epoch": 0.026018414896754567, + "grad_norm": 104377.15625, + "learning_rate": 2.576e-06, + "loss": 17755.3484, + "step": 12880 + }, + { + "epoch": 0.026038615529438383, + "grad_norm": 451642.3125, + "learning_rate": 2.578e-06, + "loss": 24314.9344, + "step": 12890 + }, + { + "epoch": 0.026058816162122196, + "grad_norm": 89829.0234375, + "learning_rate": 2.5800000000000003e-06, + "loss": 9979.3531, + "step": 12900 + }, + { + "epoch": 0.026079016794806013, + "grad_norm": 89329.65625, + "learning_rate": 2.582e-06, + "loss": 7716.4906, + "step": 12910 + }, + { + "epoch": 0.02609921742748983, + "grad_norm": 8908.1337890625, + "learning_rate": 2.5840000000000006e-06, + "loss": 5358.4289, + "step": 12920 + }, + { + "epoch": 0.026119418060173646, + "grad_norm": 5269.734375, + "learning_rate": 2.5860000000000003e-06, + "loss": 14022.5078, + "step": 12930 + }, + { + "epoch": 0.02613961869285746, + "grad_norm": 3434.826904296875, + "learning_rate": 2.588e-06, + "loss": 10162.1125, + "step": 12940 + }, + { + "epoch": 0.026159819325541275, + "grad_norm": 18319.392578125, + "learning_rate": 2.59e-06, + "loss": 21306.7766, + "step": 12950 + }, + { + "epoch": 0.026180019958225092, + "grad_norm": 15308.357421875, + "learning_rate": 2.592e-06, + "loss": 11918.7437, + "step": 12960 + }, + { + "epoch": 0.02620022059090891, + "grad_norm": 42663.69921875, + "learning_rate": 2.5940000000000005e-06, + "loss": 17304.8781, + "step": 12970 + }, + { + "epoch": 0.02622042122359272, + "grad_norm": 56969.99609375, + "learning_rate": 2.5960000000000002e-06, + "loss": 14806.925, + "step": 12980 + }, + { + "epoch": 0.026240621856276538, + "grad_norm": 19199.64453125, + "learning_rate": 2.598e-06, + "loss": 13931.6453, + "step": 12990 + }, + { + "epoch": 0.026260822488960354, + "grad_norm": 1490.22265625, + "learning_rate": 2.6e-06, + "loss": 17063.4016, + "step": 13000 + }, + { + "epoch": 0.02628102312164417, + "grad_norm": 13992.634765625, + "learning_rate": 2.6020000000000002e-06, + "loss": 16874.9094, + "step": 13010 + }, + { + "epoch": 0.026301223754327984, + "grad_norm": 10102.5712890625, + "learning_rate": 2.6040000000000004e-06, + "loss": 16644.9625, + "step": 13020 + }, + { + "epoch": 0.0263214243870118, + "grad_norm": 25727.591796875, + "learning_rate": 2.606e-06, + "loss": 7799.9211, + "step": 13030 + }, + { + "epoch": 0.026341625019695617, + "grad_norm": 63253.69921875, + "learning_rate": 2.608e-06, + "loss": 14834.8594, + "step": 13040 + }, + { + "epoch": 0.026361825652379434, + "grad_norm": 108939.6171875, + "learning_rate": 2.6100000000000004e-06, + "loss": 10038.7781, + "step": 13050 + }, + { + "epoch": 0.026382026285063247, + "grad_norm": 210855.78125, + "learning_rate": 2.612e-06, + "loss": 14661.5688, + "step": 13060 + }, + { + "epoch": 0.026402226917747063, + "grad_norm": 19078.00390625, + "learning_rate": 2.6140000000000003e-06, + "loss": 15723.1844, + "step": 13070 + }, + { + "epoch": 0.02642242755043088, + "grad_norm": 30625.85546875, + "learning_rate": 2.616e-06, + "loss": 19210.6359, + "step": 13080 + }, + { + "epoch": 0.026442628183114696, + "grad_norm": 37906.73828125, + "learning_rate": 2.618e-06, + "loss": 3408.4062, + "step": 13090 + }, + { + "epoch": 0.02646282881579851, + "grad_norm": 204851.9375, + "learning_rate": 2.6200000000000003e-06, + "loss": 12439.8656, + "step": 13100 + }, + { + "epoch": 0.026483029448482326, + "grad_norm": 6872.52099609375, + "learning_rate": 2.622e-06, + "loss": 11342.6289, + "step": 13110 + }, + { + "epoch": 0.026503230081166142, + "grad_norm": 23850.44140625, + "learning_rate": 2.6240000000000006e-06, + "loss": 11027.8609, + "step": 13120 + }, + { + "epoch": 0.02652343071384996, + "grad_norm": 1969.2933349609375, + "learning_rate": 2.6260000000000004e-06, + "loss": 5015.2953, + "step": 13130 + }, + { + "epoch": 0.026543631346533772, + "grad_norm": 79839.9296875, + "learning_rate": 2.628e-06, + "loss": 13136.8438, + "step": 13140 + }, + { + "epoch": 0.02656383197921759, + "grad_norm": 51721.43359375, + "learning_rate": 2.6300000000000002e-06, + "loss": 11662.3188, + "step": 13150 + }, + { + "epoch": 0.026584032611901405, + "grad_norm": 21341.966796875, + "learning_rate": 2.632e-06, + "loss": 9740.7992, + "step": 13160 + }, + { + "epoch": 0.02660423324458522, + "grad_norm": 113993.4921875, + "learning_rate": 2.6340000000000005e-06, + "loss": 32151.8312, + "step": 13170 + }, + { + "epoch": 0.026624433877269035, + "grad_norm": 44694.703125, + "learning_rate": 2.6360000000000003e-06, + "loss": 10529.0859, + "step": 13180 + }, + { + "epoch": 0.02664463450995285, + "grad_norm": 14563.830078125, + "learning_rate": 2.638e-06, + "loss": 10124.95, + "step": 13190 + }, + { + "epoch": 0.026664835142636668, + "grad_norm": 166734.015625, + "learning_rate": 2.64e-06, + "loss": 26924.4219, + "step": 13200 + }, + { + "epoch": 0.026685035775320484, + "grad_norm": 97431.375, + "learning_rate": 2.6420000000000003e-06, + "loss": 16384.625, + "step": 13210 + }, + { + "epoch": 0.026705236408004297, + "grad_norm": 5147.94775390625, + "learning_rate": 2.6440000000000004e-06, + "loss": 21221.1672, + "step": 13220 + }, + { + "epoch": 0.026725437040688114, + "grad_norm": 64287.87109375, + "learning_rate": 2.646e-06, + "loss": 21858.1828, + "step": 13230 + }, + { + "epoch": 0.02674563767337193, + "grad_norm": 23355.234375, + "learning_rate": 2.648e-06, + "loss": 10317.543, + "step": 13240 + }, + { + "epoch": 0.026765838306055747, + "grad_norm": 180818.59375, + "learning_rate": 2.6500000000000005e-06, + "loss": 28091.1937, + "step": 13250 + }, + { + "epoch": 0.02678603893873956, + "grad_norm": 537.61181640625, + "learning_rate": 2.652e-06, + "loss": 10255.5164, + "step": 13260 + }, + { + "epoch": 0.026806239571423376, + "grad_norm": 13248.1767578125, + "learning_rate": 2.6540000000000003e-06, + "loss": 6697.0406, + "step": 13270 + }, + { + "epoch": 0.026826440204107193, + "grad_norm": 15697.3134765625, + "learning_rate": 2.656e-06, + "loss": 10928.5961, + "step": 13280 + }, + { + "epoch": 0.02684664083679101, + "grad_norm": 15383.7060546875, + "learning_rate": 2.6580000000000002e-06, + "loss": 10770.2156, + "step": 13290 + }, + { + "epoch": 0.026866841469474823, + "grad_norm": 5368.7626953125, + "learning_rate": 2.6600000000000004e-06, + "loss": 10564.4734, + "step": 13300 + }, + { + "epoch": 0.02688704210215864, + "grad_norm": 8100.47119140625, + "learning_rate": 2.662e-06, + "loss": 17515.6578, + "step": 13310 + }, + { + "epoch": 0.026907242734842456, + "grad_norm": 56264.59375, + "learning_rate": 2.6640000000000007e-06, + "loss": 20582.9625, + "step": 13320 + }, + { + "epoch": 0.026927443367526272, + "grad_norm": 21853.419921875, + "learning_rate": 2.6660000000000004e-06, + "loss": 5038.0016, + "step": 13330 + }, + { + "epoch": 0.026947644000210085, + "grad_norm": 11115.3857421875, + "learning_rate": 2.668e-06, + "loss": 22938.4969, + "step": 13340 + }, + { + "epoch": 0.0269678446328939, + "grad_norm": 136652.75, + "learning_rate": 2.6700000000000003e-06, + "loss": 10314.4562, + "step": 13350 + }, + { + "epoch": 0.026988045265577718, + "grad_norm": 1420.9080810546875, + "learning_rate": 2.672e-06, + "loss": 9690.0414, + "step": 13360 + }, + { + "epoch": 0.027008245898261535, + "grad_norm": 3799.61279296875, + "learning_rate": 2.6740000000000006e-06, + "loss": 22772.1719, + "step": 13370 + }, + { + "epoch": 0.027028446530945348, + "grad_norm": 15764.0947265625, + "learning_rate": 2.6760000000000003e-06, + "loss": 9172.0133, + "step": 13380 + }, + { + "epoch": 0.027048647163629164, + "grad_norm": 1075.5709228515625, + "learning_rate": 2.678e-06, + "loss": 21139.1984, + "step": 13390 + }, + { + "epoch": 0.02706884779631298, + "grad_norm": 72857.0625, + "learning_rate": 2.68e-06, + "loss": 13356.1562, + "step": 13400 + }, + { + "epoch": 0.027089048428996797, + "grad_norm": 4616.51220703125, + "learning_rate": 2.6820000000000003e-06, + "loss": 9925.0656, + "step": 13410 + }, + { + "epoch": 0.02710924906168061, + "grad_norm": 28139.423828125, + "learning_rate": 2.6840000000000005e-06, + "loss": 26632.6875, + "step": 13420 + }, + { + "epoch": 0.027129449694364427, + "grad_norm": 49998.90234375, + "learning_rate": 2.686e-06, + "loss": 6686.4258, + "step": 13430 + }, + { + "epoch": 0.027149650327048244, + "grad_norm": 13128.28515625, + "learning_rate": 2.688e-06, + "loss": 13650.1875, + "step": 13440 + }, + { + "epoch": 0.02716985095973206, + "grad_norm": 11746.6328125, + "learning_rate": 2.6900000000000005e-06, + "loss": 15277.8125, + "step": 13450 + }, + { + "epoch": 0.027190051592415873, + "grad_norm": 277402.40625, + "learning_rate": 2.6920000000000002e-06, + "loss": 20208.9406, + "step": 13460 + }, + { + "epoch": 0.02721025222509969, + "grad_norm": 21107.326171875, + "learning_rate": 2.694e-06, + "loss": 36523.9125, + "step": 13470 + }, + { + "epoch": 0.027230452857783506, + "grad_norm": 15579.6767578125, + "learning_rate": 2.696e-06, + "loss": 4704.1621, + "step": 13480 + }, + { + "epoch": 0.027250653490467323, + "grad_norm": 29024.029296875, + "learning_rate": 2.6980000000000003e-06, + "loss": 23443.1469, + "step": 13490 + }, + { + "epoch": 0.027270854123151136, + "grad_norm": 41212.8359375, + "learning_rate": 2.7000000000000004e-06, + "loss": 25965.6125, + "step": 13500 + }, + { + "epoch": 0.027291054755834952, + "grad_norm": 118112.7578125, + "learning_rate": 2.702e-06, + "loss": 9839.7969, + "step": 13510 + }, + { + "epoch": 0.02731125538851877, + "grad_norm": 72377.4296875, + "learning_rate": 2.704e-06, + "loss": 13038.1203, + "step": 13520 + }, + { + "epoch": 0.027331456021202585, + "grad_norm": 110076.703125, + "learning_rate": 2.7060000000000004e-06, + "loss": 7340.25, + "step": 13530 + }, + { + "epoch": 0.0273516566538864, + "grad_norm": 6503.0400390625, + "learning_rate": 2.708e-06, + "loss": 9888.0867, + "step": 13540 + }, + { + "epoch": 0.027371857286570215, + "grad_norm": 36694.1875, + "learning_rate": 2.7100000000000003e-06, + "loss": 16398.3094, + "step": 13550 + }, + { + "epoch": 0.02739205791925403, + "grad_norm": 19227.27734375, + "learning_rate": 2.712e-06, + "loss": 16395.4953, + "step": 13560 + }, + { + "epoch": 0.027412258551937848, + "grad_norm": 88472.734375, + "learning_rate": 2.7139999999999998e-06, + "loss": 15062.6781, + "step": 13570 + }, + { + "epoch": 0.02743245918462166, + "grad_norm": 41724.05078125, + "learning_rate": 2.7160000000000003e-06, + "loss": 5731.8766, + "step": 13580 + }, + { + "epoch": 0.027452659817305478, + "grad_norm": 91493.765625, + "learning_rate": 2.718e-06, + "loss": 12494.1953, + "step": 13590 + }, + { + "epoch": 0.027472860449989294, + "grad_norm": 144784.953125, + "learning_rate": 2.7200000000000002e-06, + "loss": 13137.8406, + "step": 13600 + }, + { + "epoch": 0.02749306108267311, + "grad_norm": 13636.83203125, + "learning_rate": 2.7220000000000004e-06, + "loss": 12699.2914, + "step": 13610 + }, + { + "epoch": 0.027513261715356924, + "grad_norm": 34689.9296875, + "learning_rate": 2.724e-06, + "loss": 8962.5438, + "step": 13620 + }, + { + "epoch": 0.02753346234804074, + "grad_norm": 1641.8592529296875, + "learning_rate": 2.7260000000000002e-06, + "loss": 10569.3586, + "step": 13630 + }, + { + "epoch": 0.027553662980724557, + "grad_norm": 19734.142578125, + "learning_rate": 2.728e-06, + "loss": 5159.5434, + "step": 13640 + }, + { + "epoch": 0.027573863613408373, + "grad_norm": 11878.4951171875, + "learning_rate": 2.7300000000000005e-06, + "loss": 14464.2375, + "step": 13650 + }, + { + "epoch": 0.027594064246092186, + "grad_norm": 372074.875, + "learning_rate": 2.7320000000000003e-06, + "loss": 27301.3187, + "step": 13660 + }, + { + "epoch": 0.027614264878776003, + "grad_norm": 78942.5390625, + "learning_rate": 2.734e-06, + "loss": 11413.3445, + "step": 13670 + }, + { + "epoch": 0.02763446551145982, + "grad_norm": 5311.1904296875, + "learning_rate": 2.736e-06, + "loss": 6026.0617, + "step": 13680 + }, + { + "epoch": 0.027654666144143636, + "grad_norm": 13011.14453125, + "learning_rate": 2.7380000000000003e-06, + "loss": 13492.8625, + "step": 13690 + }, + { + "epoch": 0.02767486677682745, + "grad_norm": 42491.65234375, + "learning_rate": 2.7400000000000004e-06, + "loss": 17553.8688, + "step": 13700 + }, + { + "epoch": 0.027695067409511265, + "grad_norm": 13514.7314453125, + "learning_rate": 2.742e-06, + "loss": 9556.7586, + "step": 13710 + }, + { + "epoch": 0.027715268042195082, + "grad_norm": 160922.90625, + "learning_rate": 2.744e-06, + "loss": 15303.3156, + "step": 13720 + }, + { + "epoch": 0.0277354686748789, + "grad_norm": 133770.671875, + "learning_rate": 2.7460000000000005e-06, + "loss": 17781.2125, + "step": 13730 + }, + { + "epoch": 0.02775566930756271, + "grad_norm": 105108.296875, + "learning_rate": 2.748e-06, + "loss": 12659.9258, + "step": 13740 + }, + { + "epoch": 0.027775869940246528, + "grad_norm": 114163.578125, + "learning_rate": 2.7500000000000004e-06, + "loss": 26672.2812, + "step": 13750 + }, + { + "epoch": 0.027796070572930345, + "grad_norm": 56328.95703125, + "learning_rate": 2.752e-06, + "loss": 28505.9531, + "step": 13760 + }, + { + "epoch": 0.02781627120561416, + "grad_norm": 11064.9169921875, + "learning_rate": 2.754e-06, + "loss": 28482.8312, + "step": 13770 + }, + { + "epoch": 0.027836471838297974, + "grad_norm": 151894.78125, + "learning_rate": 2.7560000000000004e-06, + "loss": 18174.6125, + "step": 13780 + }, + { + "epoch": 0.02785667247098179, + "grad_norm": 3756.63916015625, + "learning_rate": 2.758e-06, + "loss": 7673.8805, + "step": 13790 + }, + { + "epoch": 0.027876873103665607, + "grad_norm": 159950.671875, + "learning_rate": 2.7600000000000003e-06, + "loss": 10828.1344, + "step": 13800 + }, + { + "epoch": 0.027897073736349424, + "grad_norm": 122229.59375, + "learning_rate": 2.7620000000000004e-06, + "loss": 12041.6555, + "step": 13810 + }, + { + "epoch": 0.027917274369033237, + "grad_norm": 67404.1953125, + "learning_rate": 2.764e-06, + "loss": 7816.9969, + "step": 13820 + }, + { + "epoch": 0.027937475001717053, + "grad_norm": 13015.927734375, + "learning_rate": 2.7660000000000003e-06, + "loss": 14250.1391, + "step": 13830 + }, + { + "epoch": 0.02795767563440087, + "grad_norm": 1933.580078125, + "learning_rate": 2.768e-06, + "loss": 6212.6891, + "step": 13840 + }, + { + "epoch": 0.027977876267084686, + "grad_norm": 16223.65234375, + "learning_rate": 2.7700000000000006e-06, + "loss": 10642.0508, + "step": 13850 + }, + { + "epoch": 0.0279980768997685, + "grad_norm": 52570.71875, + "learning_rate": 2.7720000000000003e-06, + "loss": 8040.9297, + "step": 13860 + }, + { + "epoch": 0.028018277532452316, + "grad_norm": 72443.78125, + "learning_rate": 2.774e-06, + "loss": 8844.5109, + "step": 13870 + }, + { + "epoch": 0.028038478165136133, + "grad_norm": 63389.9921875, + "learning_rate": 2.776e-06, + "loss": 14330.9219, + "step": 13880 + }, + { + "epoch": 0.02805867879781995, + "grad_norm": 15659.283203125, + "learning_rate": 2.7780000000000003e-06, + "loss": 9930.6812, + "step": 13890 + }, + { + "epoch": 0.028078879430503762, + "grad_norm": 4276.548828125, + "learning_rate": 2.7800000000000005e-06, + "loss": 11997.2398, + "step": 13900 + }, + { + "epoch": 0.02809908006318758, + "grad_norm": 1202.822265625, + "learning_rate": 2.7820000000000002e-06, + "loss": 7624.8703, + "step": 13910 + }, + { + "epoch": 0.028119280695871395, + "grad_norm": 40000.28125, + "learning_rate": 2.784e-06, + "loss": 8663.6945, + "step": 13920 + }, + { + "epoch": 0.02813948132855521, + "grad_norm": 42517.19921875, + "learning_rate": 2.7860000000000005e-06, + "loss": 6620.0773, + "step": 13930 + }, + { + "epoch": 0.028159681961239025, + "grad_norm": 0.0, + "learning_rate": 2.7880000000000002e-06, + "loss": 16824.8609, + "step": 13940 + }, + { + "epoch": 0.02817988259392284, + "grad_norm": 17611.54296875, + "learning_rate": 2.7900000000000004e-06, + "loss": 12648.6688, + "step": 13950 + }, + { + "epoch": 0.028200083226606658, + "grad_norm": 217381.9375, + "learning_rate": 2.792e-06, + "loss": 11423.6648, + "step": 13960 + }, + { + "epoch": 0.028220283859290474, + "grad_norm": 87559.046875, + "learning_rate": 2.794e-06, + "loss": 9963.9867, + "step": 13970 + }, + { + "epoch": 0.028240484491974287, + "grad_norm": 22645.1875, + "learning_rate": 2.7960000000000004e-06, + "loss": 13536.5922, + "step": 13980 + }, + { + "epoch": 0.028260685124658104, + "grad_norm": 45039.91796875, + "learning_rate": 2.798e-06, + "loss": 12002.9188, + "step": 13990 + }, + { + "epoch": 0.02828088575734192, + "grad_norm": 30949.95703125, + "learning_rate": 2.8000000000000003e-06, + "loss": 11150.8969, + "step": 14000 + }, + { + "epoch": 0.028301086390025737, + "grad_norm": 4246.37109375, + "learning_rate": 2.8020000000000004e-06, + "loss": 13348.5391, + "step": 14010 + }, + { + "epoch": 0.02832128702270955, + "grad_norm": 8115.3896484375, + "learning_rate": 2.804e-06, + "loss": 8750.5828, + "step": 14020 + }, + { + "epoch": 0.028341487655393367, + "grad_norm": 3538.811279296875, + "learning_rate": 2.8060000000000003e-06, + "loss": 17661.9922, + "step": 14030 + }, + { + "epoch": 0.028361688288077183, + "grad_norm": 78515.71875, + "learning_rate": 2.808e-06, + "loss": 15248.9578, + "step": 14040 + }, + { + "epoch": 0.028381888920761, + "grad_norm": 7392.57421875, + "learning_rate": 2.8100000000000006e-06, + "loss": 13764.2453, + "step": 14050 + }, + { + "epoch": 0.028402089553444813, + "grad_norm": 5001.90283203125, + "learning_rate": 2.8120000000000004e-06, + "loss": 5169.2871, + "step": 14060 + }, + { + "epoch": 0.02842229018612863, + "grad_norm": 7613.146484375, + "learning_rate": 2.814e-06, + "loss": 15861.5141, + "step": 14070 + }, + { + "epoch": 0.028442490818812446, + "grad_norm": 118792.8515625, + "learning_rate": 2.8160000000000002e-06, + "loss": 14122.5344, + "step": 14080 + }, + { + "epoch": 0.028462691451496262, + "grad_norm": 138931.046875, + "learning_rate": 2.8180000000000004e-06, + "loss": 10800.5602, + "step": 14090 + }, + { + "epoch": 0.028482892084180075, + "grad_norm": 42788.02734375, + "learning_rate": 2.82e-06, + "loss": 17629.1016, + "step": 14100 + }, + { + "epoch": 0.028503092716863892, + "grad_norm": 10886.0712890625, + "learning_rate": 2.8220000000000003e-06, + "loss": 19253.3969, + "step": 14110 + }, + { + "epoch": 0.02852329334954771, + "grad_norm": 1061.8985595703125, + "learning_rate": 2.824e-06, + "loss": 20724.3672, + "step": 14120 + }, + { + "epoch": 0.028543493982231525, + "grad_norm": 5569.22265625, + "learning_rate": 2.8260000000000006e-06, + "loss": 4230.1633, + "step": 14130 + }, + { + "epoch": 0.028563694614915338, + "grad_norm": 29828.978515625, + "learning_rate": 2.8280000000000003e-06, + "loss": 9171.4234, + "step": 14140 + }, + { + "epoch": 0.028583895247599155, + "grad_norm": 8023.9306640625, + "learning_rate": 2.83e-06, + "loss": 22179.6188, + "step": 14150 + }, + { + "epoch": 0.02860409588028297, + "grad_norm": 120329.234375, + "learning_rate": 2.832e-06, + "loss": 7507.0289, + "step": 14160 + }, + { + "epoch": 0.028624296512966788, + "grad_norm": 4185.22119140625, + "learning_rate": 2.834e-06, + "loss": 14160.6734, + "step": 14170 + }, + { + "epoch": 0.0286444971456506, + "grad_norm": 158098.109375, + "learning_rate": 2.8360000000000005e-06, + "loss": 14956.9937, + "step": 14180 + }, + { + "epoch": 0.028664697778334417, + "grad_norm": 80058.2734375, + "learning_rate": 2.838e-06, + "loss": 26785.6719, + "step": 14190 + }, + { + "epoch": 0.028684898411018234, + "grad_norm": 3021.18310546875, + "learning_rate": 2.84e-06, + "loss": 22100.8937, + "step": 14200 + }, + { + "epoch": 0.02870509904370205, + "grad_norm": 46829.1484375, + "learning_rate": 2.8420000000000005e-06, + "loss": 23999.2734, + "step": 14210 + }, + { + "epoch": 0.028725299676385863, + "grad_norm": 13218.3154296875, + "learning_rate": 2.8440000000000002e-06, + "loss": 15940.2797, + "step": 14220 + }, + { + "epoch": 0.02874550030906968, + "grad_norm": 15421.77734375, + "learning_rate": 2.8460000000000004e-06, + "loss": 14686.8328, + "step": 14230 + }, + { + "epoch": 0.028765700941753496, + "grad_norm": 845.8707275390625, + "learning_rate": 2.848e-06, + "loss": 17599.9641, + "step": 14240 + }, + { + "epoch": 0.028785901574437313, + "grad_norm": 82589.40625, + "learning_rate": 2.85e-06, + "loss": 18072.7578, + "step": 14250 + }, + { + "epoch": 0.028806102207121126, + "grad_norm": 26061.923828125, + "learning_rate": 2.8520000000000004e-06, + "loss": 2322.3699, + "step": 14260 + }, + { + "epoch": 0.028826302839804942, + "grad_norm": 10072.474609375, + "learning_rate": 2.854e-06, + "loss": 8132.3055, + "step": 14270 + }, + { + "epoch": 0.02884650347248876, + "grad_norm": 5162.27978515625, + "learning_rate": 2.8560000000000003e-06, + "loss": 10321.0039, + "step": 14280 + }, + { + "epoch": 0.028866704105172575, + "grad_norm": 20901.451171875, + "learning_rate": 2.8580000000000004e-06, + "loss": 25616.4953, + "step": 14290 + }, + { + "epoch": 0.02888690473785639, + "grad_norm": 82600.4296875, + "learning_rate": 2.86e-06, + "loss": 24604.55, + "step": 14300 + }, + { + "epoch": 0.028907105370540205, + "grad_norm": 34753.6328125, + "learning_rate": 2.8620000000000003e-06, + "loss": 8326.6102, + "step": 14310 + }, + { + "epoch": 0.02892730600322402, + "grad_norm": 82101.4296875, + "learning_rate": 2.864e-06, + "loss": 20057.6703, + "step": 14320 + }, + { + "epoch": 0.028947506635907838, + "grad_norm": 69007.3671875, + "learning_rate": 2.8660000000000006e-06, + "loss": 7849.1477, + "step": 14330 + }, + { + "epoch": 0.02896770726859165, + "grad_norm": 132981.390625, + "learning_rate": 2.8680000000000003e-06, + "loss": 21027.2219, + "step": 14340 + }, + { + "epoch": 0.028987907901275468, + "grad_norm": 5131.77099609375, + "learning_rate": 2.87e-06, + "loss": 15819.7641, + "step": 14350 + }, + { + "epoch": 0.029008108533959284, + "grad_norm": 0.0, + "learning_rate": 2.872e-06, + "loss": 19211.8734, + "step": 14360 + }, + { + "epoch": 0.0290283091666431, + "grad_norm": 13160.7119140625, + "learning_rate": 2.874e-06, + "loss": 10589.6156, + "step": 14370 + }, + { + "epoch": 0.029048509799326914, + "grad_norm": 49369.6015625, + "learning_rate": 2.8760000000000005e-06, + "loss": 30471.2906, + "step": 14380 + }, + { + "epoch": 0.02906871043201073, + "grad_norm": 14924.681640625, + "learning_rate": 2.8780000000000002e-06, + "loss": 17458.6406, + "step": 14390 + }, + { + "epoch": 0.029088911064694547, + "grad_norm": 709347.25, + "learning_rate": 2.88e-06, + "loss": 26335.9, + "step": 14400 + }, + { + "epoch": 0.029109111697378363, + "grad_norm": 7809.1796875, + "learning_rate": 2.8820000000000005e-06, + "loss": 6343.6633, + "step": 14410 + }, + { + "epoch": 0.029129312330062176, + "grad_norm": 109330.046875, + "learning_rate": 2.8840000000000003e-06, + "loss": 10608.2781, + "step": 14420 + }, + { + "epoch": 0.029149512962745993, + "grad_norm": 2975.815185546875, + "learning_rate": 2.8860000000000004e-06, + "loss": 8103.1477, + "step": 14430 + }, + { + "epoch": 0.02916971359542981, + "grad_norm": 7998.8525390625, + "learning_rate": 2.888e-06, + "loss": 19149.7703, + "step": 14440 + }, + { + "epoch": 0.029189914228113626, + "grad_norm": 92207.5625, + "learning_rate": 2.89e-06, + "loss": 10075.6172, + "step": 14450 + }, + { + "epoch": 0.02921011486079744, + "grad_norm": 12219.14453125, + "learning_rate": 2.8920000000000004e-06, + "loss": 24813.2812, + "step": 14460 + }, + { + "epoch": 0.029230315493481256, + "grad_norm": 637.7465209960938, + "learning_rate": 2.894e-06, + "loss": 7852.1039, + "step": 14470 + }, + { + "epoch": 0.029250516126165072, + "grad_norm": 67523.6171875, + "learning_rate": 2.8960000000000003e-06, + "loss": 21976.9594, + "step": 14480 + }, + { + "epoch": 0.02927071675884889, + "grad_norm": 229787.890625, + "learning_rate": 2.8980000000000005e-06, + "loss": 22247.0469, + "step": 14490 + }, + { + "epoch": 0.029290917391532702, + "grad_norm": 3118.15087890625, + "learning_rate": 2.9e-06, + "loss": 14907.6922, + "step": 14500 + }, + { + "epoch": 0.02931111802421652, + "grad_norm": 13669.6162109375, + "learning_rate": 2.9020000000000003e-06, + "loss": 11665.3867, + "step": 14510 + }, + { + "epoch": 0.029331318656900335, + "grad_norm": 174370.09375, + "learning_rate": 2.904e-06, + "loss": 10456.4672, + "step": 14520 + }, + { + "epoch": 0.02935151928958415, + "grad_norm": 27970.708984375, + "learning_rate": 2.9060000000000006e-06, + "loss": 10762.1242, + "step": 14530 + }, + { + "epoch": 0.029371719922267964, + "grad_norm": 0.0, + "learning_rate": 2.9080000000000004e-06, + "loss": 10749.3422, + "step": 14540 + }, + { + "epoch": 0.02939192055495178, + "grad_norm": 60732.05078125, + "learning_rate": 2.91e-06, + "loss": 6879.8945, + "step": 14550 + }, + { + "epoch": 0.029412121187635597, + "grad_norm": 3276.12158203125, + "learning_rate": 2.9120000000000002e-06, + "loss": 17205.0859, + "step": 14560 + }, + { + "epoch": 0.029432321820319414, + "grad_norm": 82156.4921875, + "learning_rate": 2.914e-06, + "loss": 15563.825, + "step": 14570 + }, + { + "epoch": 0.029452522453003227, + "grad_norm": 7938.693359375, + "learning_rate": 2.9160000000000005e-06, + "loss": 16743.9062, + "step": 14580 + }, + { + "epoch": 0.029472723085687044, + "grad_norm": 24446.73828125, + "learning_rate": 2.9180000000000003e-06, + "loss": 5533.0367, + "step": 14590 + }, + { + "epoch": 0.02949292371837086, + "grad_norm": 6836.45654296875, + "learning_rate": 2.92e-06, + "loss": 9810.3922, + "step": 14600 + }, + { + "epoch": 0.029513124351054677, + "grad_norm": 11404.931640625, + "learning_rate": 2.9220000000000006e-06, + "loss": 20133.6437, + "step": 14610 + }, + { + "epoch": 0.02953332498373849, + "grad_norm": 56458.984375, + "learning_rate": 2.9240000000000003e-06, + "loss": 10325.9844, + "step": 14620 + }, + { + "epoch": 0.029553525616422306, + "grad_norm": 81666.2890625, + "learning_rate": 2.9260000000000004e-06, + "loss": 23634.6719, + "step": 14630 + }, + { + "epoch": 0.029573726249106123, + "grad_norm": 80660.3984375, + "learning_rate": 2.928e-06, + "loss": 12756.1422, + "step": 14640 + }, + { + "epoch": 0.02959392688178994, + "grad_norm": 173424.4375, + "learning_rate": 2.93e-06, + "loss": 11412.9141, + "step": 14650 + }, + { + "epoch": 0.029614127514473752, + "grad_norm": 1118.6212158203125, + "learning_rate": 2.9320000000000005e-06, + "loss": 19196.7766, + "step": 14660 + }, + { + "epoch": 0.02963432814715757, + "grad_norm": 77642.1875, + "learning_rate": 2.934e-06, + "loss": 6594.8188, + "step": 14670 + }, + { + "epoch": 0.029654528779841385, + "grad_norm": 95290.7578125, + "learning_rate": 2.9360000000000003e-06, + "loss": 12828.3086, + "step": 14680 + }, + { + "epoch": 0.029674729412525202, + "grad_norm": 81175.984375, + "learning_rate": 2.9380000000000005e-06, + "loss": 10109.4266, + "step": 14690 + }, + { + "epoch": 0.029694930045209015, + "grad_norm": 13109.5654296875, + "learning_rate": 2.9400000000000002e-06, + "loss": 2511.5266, + "step": 14700 + }, + { + "epoch": 0.02971513067789283, + "grad_norm": 0.0, + "learning_rate": 2.9420000000000004e-06, + "loss": 4538.1477, + "step": 14710 + }, + { + "epoch": 0.029735331310576648, + "grad_norm": 21178.171875, + "learning_rate": 2.944e-06, + "loss": 24643.825, + "step": 14720 + }, + { + "epoch": 0.029755531943260465, + "grad_norm": 361046.25, + "learning_rate": 2.946e-06, + "loss": 14344.3594, + "step": 14730 + }, + { + "epoch": 0.029775732575944278, + "grad_norm": 11598.2119140625, + "learning_rate": 2.9480000000000004e-06, + "loss": 7876.4781, + "step": 14740 + }, + { + "epoch": 0.029795933208628094, + "grad_norm": 58749.05078125, + "learning_rate": 2.95e-06, + "loss": 12705.0734, + "step": 14750 + }, + { + "epoch": 0.02981613384131191, + "grad_norm": 13300.9306640625, + "learning_rate": 2.9520000000000003e-06, + "loss": 8180.093, + "step": 14760 + }, + { + "epoch": 0.029836334473995727, + "grad_norm": 25972.13671875, + "learning_rate": 2.954e-06, + "loss": 7277.5336, + "step": 14770 + }, + { + "epoch": 0.02985653510667954, + "grad_norm": 171471.34375, + "learning_rate": 2.956e-06, + "loss": 21007.0234, + "step": 14780 + }, + { + "epoch": 0.029876735739363357, + "grad_norm": 3798.3505859375, + "learning_rate": 2.9580000000000003e-06, + "loss": 15173.8531, + "step": 14790 + }, + { + "epoch": 0.029896936372047173, + "grad_norm": 108940.8828125, + "learning_rate": 2.96e-06, + "loss": 25525.2891, + "step": 14800 + }, + { + "epoch": 0.02991713700473099, + "grad_norm": 92299.765625, + "learning_rate": 2.9620000000000006e-06, + "loss": 13279.7109, + "step": 14810 + }, + { + "epoch": 0.029937337637414803, + "grad_norm": 21940.810546875, + "learning_rate": 2.9640000000000003e-06, + "loss": 9009.2594, + "step": 14820 + }, + { + "epoch": 0.02995753827009862, + "grad_norm": 43630.6875, + "learning_rate": 2.966e-06, + "loss": 10081.8969, + "step": 14830 + }, + { + "epoch": 0.029977738902782436, + "grad_norm": 42623.16796875, + "learning_rate": 2.9680000000000002e-06, + "loss": 15223.2203, + "step": 14840 + }, + { + "epoch": 0.029997939535466252, + "grad_norm": 1663.044921875, + "learning_rate": 2.97e-06, + "loss": 4367.4348, + "step": 14850 + }, + { + "epoch": 0.030018140168150065, + "grad_norm": 62046.52734375, + "learning_rate": 2.9720000000000005e-06, + "loss": 8420.1422, + "step": 14860 + }, + { + "epoch": 0.030038340800833882, + "grad_norm": 12863.66796875, + "learning_rate": 2.9740000000000002e-06, + "loss": 6435.0605, + "step": 14870 + }, + { + "epoch": 0.0300585414335177, + "grad_norm": 84751.1640625, + "learning_rate": 2.976e-06, + "loss": 9220.4156, + "step": 14880 + }, + { + "epoch": 0.030078742066201515, + "grad_norm": 12990.5322265625, + "learning_rate": 2.9780000000000005e-06, + "loss": 38632.7219, + "step": 14890 + }, + { + "epoch": 0.030098942698885328, + "grad_norm": 31026.849609375, + "learning_rate": 2.9800000000000003e-06, + "loss": 7617.6289, + "step": 14900 + }, + { + "epoch": 0.030119143331569145, + "grad_norm": 13054.3046875, + "learning_rate": 2.9820000000000004e-06, + "loss": 19622.9313, + "step": 14910 + }, + { + "epoch": 0.03013934396425296, + "grad_norm": 75679.2578125, + "learning_rate": 2.984e-06, + "loss": 17510.1906, + "step": 14920 + }, + { + "epoch": 0.030159544596936778, + "grad_norm": 38597.51953125, + "learning_rate": 2.986e-06, + "loss": 5165.5199, + "step": 14930 + }, + { + "epoch": 0.03017974522962059, + "grad_norm": 66096.9765625, + "learning_rate": 2.9880000000000004e-06, + "loss": 18957.9594, + "step": 14940 + }, + { + "epoch": 0.030199945862304407, + "grad_norm": 6212.60498046875, + "learning_rate": 2.99e-06, + "loss": 24253.35, + "step": 14950 + }, + { + "epoch": 0.030220146494988224, + "grad_norm": 2696.39306640625, + "learning_rate": 2.9920000000000003e-06, + "loss": 11583.4406, + "step": 14960 + }, + { + "epoch": 0.03024034712767204, + "grad_norm": 357256.46875, + "learning_rate": 2.994e-06, + "loss": 20950.5156, + "step": 14970 + }, + { + "epoch": 0.030260547760355853, + "grad_norm": 43397.08984375, + "learning_rate": 2.996e-06, + "loss": 40105.8875, + "step": 14980 + }, + { + "epoch": 0.03028074839303967, + "grad_norm": 1545.4395751953125, + "learning_rate": 2.9980000000000003e-06, + "loss": 15180.0, + "step": 14990 + }, + { + "epoch": 0.030300949025723486, + "grad_norm": 192773.796875, + "learning_rate": 3e-06, + "loss": 12873.125, + "step": 15000 + }, + { + "epoch": 0.030321149658407303, + "grad_norm": 41382.10546875, + "learning_rate": 3.0020000000000006e-06, + "loss": 13543.1547, + "step": 15010 + }, + { + "epoch": 0.030341350291091116, + "grad_norm": 3580.05322265625, + "learning_rate": 3.0040000000000004e-06, + "loss": 19569.7734, + "step": 15020 + }, + { + "epoch": 0.030361550923774933, + "grad_norm": 136265.375, + "learning_rate": 3.006e-06, + "loss": 13435.8844, + "step": 15030 + }, + { + "epoch": 0.03038175155645875, + "grad_norm": 104977.921875, + "learning_rate": 3.0080000000000003e-06, + "loss": 15861.25, + "step": 15040 + }, + { + "epoch": 0.030401952189142566, + "grad_norm": 266489.4375, + "learning_rate": 3.01e-06, + "loss": 13424.1422, + "step": 15050 + }, + { + "epoch": 0.03042215282182638, + "grad_norm": 5646.693359375, + "learning_rate": 3.0120000000000006e-06, + "loss": 7548.8625, + "step": 15060 + }, + { + "epoch": 0.030442353454510195, + "grad_norm": 6056.9892578125, + "learning_rate": 3.0140000000000003e-06, + "loss": 9783.4562, + "step": 15070 + }, + { + "epoch": 0.030462554087194012, + "grad_norm": 21704.591796875, + "learning_rate": 3.016e-06, + "loss": 24293.975, + "step": 15080 + }, + { + "epoch": 0.03048275471987783, + "grad_norm": 11672.8779296875, + "learning_rate": 3.0180000000000006e-06, + "loss": 17450.8281, + "step": 15090 + }, + { + "epoch": 0.03050295535256164, + "grad_norm": 424020.78125, + "learning_rate": 3.0200000000000003e-06, + "loss": 15003.1969, + "step": 15100 + }, + { + "epoch": 0.030523155985245458, + "grad_norm": 39879.27734375, + "learning_rate": 3.0220000000000005e-06, + "loss": 16772.0172, + "step": 15110 + }, + { + "epoch": 0.030543356617929274, + "grad_norm": 18552.01953125, + "learning_rate": 3.024e-06, + "loss": 8084.1992, + "step": 15120 + }, + { + "epoch": 0.03056355725061309, + "grad_norm": 53511.890625, + "learning_rate": 3.026e-06, + "loss": 4430.2199, + "step": 15130 + }, + { + "epoch": 0.030583757883296904, + "grad_norm": 36348.69140625, + "learning_rate": 3.0280000000000005e-06, + "loss": 6054.7445, + "step": 15140 + }, + { + "epoch": 0.03060395851598072, + "grad_norm": 79262.65625, + "learning_rate": 3.0300000000000002e-06, + "loss": 27908.0563, + "step": 15150 + }, + { + "epoch": 0.030624159148664537, + "grad_norm": 204977.640625, + "learning_rate": 3.0320000000000004e-06, + "loss": 20344.7719, + "step": 15160 + }, + { + "epoch": 0.030644359781348354, + "grad_norm": 74723.1015625, + "learning_rate": 3.034e-06, + "loss": 15124.3422, + "step": 15170 + }, + { + "epoch": 0.030664560414032167, + "grad_norm": 6469.0439453125, + "learning_rate": 3.0360000000000002e-06, + "loss": 12160.1422, + "step": 15180 + }, + { + "epoch": 0.030684761046715983, + "grad_norm": 54025.90625, + "learning_rate": 3.0380000000000004e-06, + "loss": 14692.5297, + "step": 15190 + }, + { + "epoch": 0.0307049616793998, + "grad_norm": 80788.65625, + "learning_rate": 3.04e-06, + "loss": 24216.1109, + "step": 15200 + }, + { + "epoch": 0.030725162312083613, + "grad_norm": 296906.75, + "learning_rate": 3.0420000000000007e-06, + "loss": 33442.2937, + "step": 15210 + }, + { + "epoch": 0.03074536294476743, + "grad_norm": 17547.361328125, + "learning_rate": 3.0440000000000004e-06, + "loss": 13166.6516, + "step": 15220 + }, + { + "epoch": 0.030765563577451246, + "grad_norm": 16411.708984375, + "learning_rate": 3.046e-06, + "loss": 14248.8094, + "step": 15230 + }, + { + "epoch": 0.030785764210135062, + "grad_norm": 51761.48828125, + "learning_rate": 3.0480000000000003e-06, + "loss": 8209.3141, + "step": 15240 + }, + { + "epoch": 0.030805964842818875, + "grad_norm": 5586.64892578125, + "learning_rate": 3.05e-06, + "loss": 32540.1437, + "step": 15250 + }, + { + "epoch": 0.030826165475502692, + "grad_norm": 357.0714111328125, + "learning_rate": 3.0520000000000006e-06, + "loss": 6399.1527, + "step": 15260 + }, + { + "epoch": 0.03084636610818651, + "grad_norm": 1665.0633544921875, + "learning_rate": 3.0540000000000003e-06, + "loss": 8389.3984, + "step": 15270 + }, + { + "epoch": 0.030866566740870325, + "grad_norm": 10101.0517578125, + "learning_rate": 3.056e-06, + "loss": 6395.6438, + "step": 15280 + }, + { + "epoch": 0.030886767373554138, + "grad_norm": 266672.5, + "learning_rate": 3.0580000000000006e-06, + "loss": 19059.1937, + "step": 15290 + }, + { + "epoch": 0.030906968006237955, + "grad_norm": 40406.16015625, + "learning_rate": 3.0600000000000003e-06, + "loss": 12898.2172, + "step": 15300 + }, + { + "epoch": 0.03092716863892177, + "grad_norm": 14205.7119140625, + "learning_rate": 3.0620000000000005e-06, + "loss": 5148.3695, + "step": 15310 + }, + { + "epoch": 0.030947369271605588, + "grad_norm": 72735.3046875, + "learning_rate": 3.0640000000000002e-06, + "loss": 6666.3938, + "step": 15320 + }, + { + "epoch": 0.0309675699042894, + "grad_norm": 12438.6572265625, + "learning_rate": 3.066e-06, + "loss": 16091.0703, + "step": 15330 + }, + { + "epoch": 0.030987770536973217, + "grad_norm": 30813.275390625, + "learning_rate": 3.0680000000000005e-06, + "loss": 19830.4094, + "step": 15340 + }, + { + "epoch": 0.031007971169657034, + "grad_norm": 149524.484375, + "learning_rate": 3.0700000000000003e-06, + "loss": 20583.8422, + "step": 15350 + }, + { + "epoch": 0.03102817180234085, + "grad_norm": 36877.1953125, + "learning_rate": 3.072e-06, + "loss": 21377.8625, + "step": 15360 + }, + { + "epoch": 0.031048372435024663, + "grad_norm": 39404.140625, + "learning_rate": 3.074e-06, + "loss": 15372.9172, + "step": 15370 + }, + { + "epoch": 0.03106857306770848, + "grad_norm": 16292.638671875, + "learning_rate": 3.0760000000000003e-06, + "loss": 9898.1891, + "step": 15380 + }, + { + "epoch": 0.031088773700392296, + "grad_norm": 27407.748046875, + "learning_rate": 3.0780000000000004e-06, + "loss": 29504.1469, + "step": 15390 + }, + { + "epoch": 0.031108974333076113, + "grad_norm": 26019.146484375, + "learning_rate": 3.08e-06, + "loss": 8706.0492, + "step": 15400 + }, + { + "epoch": 0.031129174965759926, + "grad_norm": 509.1998291015625, + "learning_rate": 3.082e-06, + "loss": 8775.9445, + "step": 15410 + }, + { + "epoch": 0.031149375598443742, + "grad_norm": 38399.55859375, + "learning_rate": 3.0840000000000005e-06, + "loss": 12517.4648, + "step": 15420 + }, + { + "epoch": 0.03116957623112756, + "grad_norm": 6094.88818359375, + "learning_rate": 3.086e-06, + "loss": 16520.2313, + "step": 15430 + }, + { + "epoch": 0.031189776863811376, + "grad_norm": 19074.59375, + "learning_rate": 3.0880000000000003e-06, + "loss": 10167.1359, + "step": 15440 + }, + { + "epoch": 0.03120997749649519, + "grad_norm": 26654.6640625, + "learning_rate": 3.09e-06, + "loss": 15815.6969, + "step": 15450 + }, + { + "epoch": 0.031230178129179005, + "grad_norm": 5842.7353515625, + "learning_rate": 3.092e-06, + "loss": 13671.5547, + "step": 15460 + }, + { + "epoch": 0.03125037876186282, + "grad_norm": 35120.55859375, + "learning_rate": 3.0940000000000004e-06, + "loss": 6530.0074, + "step": 15470 + }, + { + "epoch": 0.03127057939454664, + "grad_norm": 8211.90625, + "learning_rate": 3.096e-06, + "loss": 6967.0891, + "step": 15480 + }, + { + "epoch": 0.03129078002723045, + "grad_norm": 18445.724609375, + "learning_rate": 3.0980000000000007e-06, + "loss": 11791.943, + "step": 15490 + }, + { + "epoch": 0.03131098065991427, + "grad_norm": 35376.41796875, + "learning_rate": 3.1000000000000004e-06, + "loss": 20873.5062, + "step": 15500 + }, + { + "epoch": 0.031331181292598084, + "grad_norm": 10044.6826171875, + "learning_rate": 3.102e-06, + "loss": 24148.5156, + "step": 15510 + }, + { + "epoch": 0.0313513819252819, + "grad_norm": 71262.3515625, + "learning_rate": 3.1040000000000003e-06, + "loss": 12301.8664, + "step": 15520 + }, + { + "epoch": 0.03137158255796572, + "grad_norm": 11119.357421875, + "learning_rate": 3.106e-06, + "loss": 7664.6125, + "step": 15530 + }, + { + "epoch": 0.03139178319064953, + "grad_norm": 12951.9013671875, + "learning_rate": 3.1080000000000006e-06, + "loss": 10962.4328, + "step": 15540 + }, + { + "epoch": 0.03141198382333334, + "grad_norm": 65203.3046875, + "learning_rate": 3.1100000000000003e-06, + "loss": 17719.5078, + "step": 15550 + }, + { + "epoch": 0.03143218445601716, + "grad_norm": 12068.0673828125, + "learning_rate": 3.112e-06, + "loss": 9302.6578, + "step": 15560 + }, + { + "epoch": 0.031452385088700976, + "grad_norm": 28812.5546875, + "learning_rate": 3.114e-06, + "loss": 9652.6906, + "step": 15570 + }, + { + "epoch": 0.031472585721384796, + "grad_norm": 52319.03515625, + "learning_rate": 3.1160000000000003e-06, + "loss": 11449.0727, + "step": 15580 + }, + { + "epoch": 0.03149278635406861, + "grad_norm": 4770.78759765625, + "learning_rate": 3.1180000000000005e-06, + "loss": 14749.9703, + "step": 15590 + }, + { + "epoch": 0.03151298698675242, + "grad_norm": 5529.22802734375, + "learning_rate": 3.12e-06, + "loss": 16857.3516, + "step": 15600 + }, + { + "epoch": 0.03153318761943624, + "grad_norm": 25150.79296875, + "learning_rate": 3.122e-06, + "loss": 22488.7266, + "step": 15610 + }, + { + "epoch": 0.031553388252120056, + "grad_norm": 40191.8984375, + "learning_rate": 3.1240000000000005e-06, + "loss": 4790.6102, + "step": 15620 + }, + { + "epoch": 0.03157358888480387, + "grad_norm": 315476.875, + "learning_rate": 3.1260000000000002e-06, + "loss": 18140.175, + "step": 15630 + }, + { + "epoch": 0.03159378951748769, + "grad_norm": 100106.2265625, + "learning_rate": 3.1280000000000004e-06, + "loss": 13861.7875, + "step": 15640 + }, + { + "epoch": 0.0316139901501715, + "grad_norm": 74829.1484375, + "learning_rate": 3.13e-06, + "loss": 10590.0852, + "step": 15650 + }, + { + "epoch": 0.03163419078285532, + "grad_norm": 4740.21728515625, + "learning_rate": 3.132e-06, + "loss": 9846.0602, + "step": 15660 + }, + { + "epoch": 0.031654391415539135, + "grad_norm": 51321.76953125, + "learning_rate": 3.1340000000000004e-06, + "loss": 12541.8703, + "step": 15670 + }, + { + "epoch": 0.03167459204822295, + "grad_norm": 119332.703125, + "learning_rate": 3.136e-06, + "loss": 14249.2531, + "step": 15680 + }, + { + "epoch": 0.03169479268090677, + "grad_norm": 135517.625, + "learning_rate": 3.1380000000000003e-06, + "loss": 14406.8641, + "step": 15690 + }, + { + "epoch": 0.03171499331359058, + "grad_norm": 353974.0625, + "learning_rate": 3.1400000000000004e-06, + "loss": 19093.0938, + "step": 15700 + }, + { + "epoch": 0.031735193946274394, + "grad_norm": 34639.87109375, + "learning_rate": 3.142e-06, + "loss": 5145.4957, + "step": 15710 + }, + { + "epoch": 0.031755394578958214, + "grad_norm": 107715.5, + "learning_rate": 3.1440000000000003e-06, + "loss": 27366.4781, + "step": 15720 + }, + { + "epoch": 0.03177559521164203, + "grad_norm": 17573.82421875, + "learning_rate": 3.146e-06, + "loss": 10100.9148, + "step": 15730 + }, + { + "epoch": 0.03179579584432585, + "grad_norm": 72320.3671875, + "learning_rate": 3.1480000000000006e-06, + "loss": 4397.9563, + "step": 15740 + }, + { + "epoch": 0.03181599647700966, + "grad_norm": 14350.7314453125, + "learning_rate": 3.1500000000000003e-06, + "loss": 29038.3156, + "step": 15750 + }, + { + "epoch": 0.03183619710969347, + "grad_norm": 260974.390625, + "learning_rate": 3.152e-06, + "loss": 31778.6063, + "step": 15760 + }, + { + "epoch": 0.03185639774237729, + "grad_norm": 32370.0390625, + "learning_rate": 3.154e-06, + "loss": 6897.1687, + "step": 15770 + }, + { + "epoch": 0.031876598375061106, + "grad_norm": 64236.16015625, + "learning_rate": 3.1560000000000004e-06, + "loss": 10085.9875, + "step": 15780 + }, + { + "epoch": 0.03189679900774492, + "grad_norm": 29210.666015625, + "learning_rate": 3.1580000000000005e-06, + "loss": 7747.3531, + "step": 15790 + }, + { + "epoch": 0.03191699964042874, + "grad_norm": 26555.900390625, + "learning_rate": 3.1600000000000002e-06, + "loss": 8613.7375, + "step": 15800 + }, + { + "epoch": 0.03193720027311255, + "grad_norm": 8346.439453125, + "learning_rate": 3.162e-06, + "loss": 18301.4, + "step": 15810 + }, + { + "epoch": 0.03195740090579637, + "grad_norm": 20107.88671875, + "learning_rate": 3.1640000000000005e-06, + "loss": 11527.2398, + "step": 15820 + }, + { + "epoch": 0.031977601538480185, + "grad_norm": 21246.677734375, + "learning_rate": 3.1660000000000003e-06, + "loss": 7324.2883, + "step": 15830 + }, + { + "epoch": 0.031997802171164, + "grad_norm": 2181.66845703125, + "learning_rate": 3.1680000000000004e-06, + "loss": 20248.5078, + "step": 15840 + }, + { + "epoch": 0.03201800280384782, + "grad_norm": 120911.1953125, + "learning_rate": 3.17e-06, + "loss": 24139.9391, + "step": 15850 + }, + { + "epoch": 0.03203820343653163, + "grad_norm": 17454.255859375, + "learning_rate": 3.172e-06, + "loss": 12023.1125, + "step": 15860 + }, + { + "epoch": 0.032058404069215445, + "grad_norm": 1430.8677978515625, + "learning_rate": 3.1740000000000004e-06, + "loss": 12516.3523, + "step": 15870 + }, + { + "epoch": 0.032078604701899265, + "grad_norm": 611188.3125, + "learning_rate": 3.176e-06, + "loss": 24674.2984, + "step": 15880 + }, + { + "epoch": 0.03209880533458308, + "grad_norm": 273221.9375, + "learning_rate": 3.1780000000000003e-06, + "loss": 21131.6125, + "step": 15890 + }, + { + "epoch": 0.0321190059672669, + "grad_norm": 6008.62646484375, + "learning_rate": 3.1800000000000005e-06, + "loss": 25352.7188, + "step": 15900 + }, + { + "epoch": 0.03213920659995071, + "grad_norm": 35419.86328125, + "learning_rate": 3.182e-06, + "loss": 20032.8672, + "step": 15910 + }, + { + "epoch": 0.032159407232634524, + "grad_norm": 50854.4296875, + "learning_rate": 3.1840000000000003e-06, + "loss": 18728.5656, + "step": 15920 + }, + { + "epoch": 0.032179607865318344, + "grad_norm": 16192.96875, + "learning_rate": 3.186e-06, + "loss": 9744.9609, + "step": 15930 + }, + { + "epoch": 0.03219980849800216, + "grad_norm": 146753.109375, + "learning_rate": 3.188e-06, + "loss": 27664.425, + "step": 15940 + }, + { + "epoch": 0.03222000913068597, + "grad_norm": 26188.703125, + "learning_rate": 3.1900000000000004e-06, + "loss": 7899.8578, + "step": 15950 + }, + { + "epoch": 0.03224020976336979, + "grad_norm": 25683.427734375, + "learning_rate": 3.192e-06, + "loss": 11779.0148, + "step": 15960 + }, + { + "epoch": 0.0322604103960536, + "grad_norm": 67310.40625, + "learning_rate": 3.1940000000000003e-06, + "loss": 15998.6906, + "step": 15970 + }, + { + "epoch": 0.03228061102873742, + "grad_norm": 79459.1640625, + "learning_rate": 3.1960000000000004e-06, + "loss": 11697.5375, + "step": 15980 + }, + { + "epoch": 0.032300811661421236, + "grad_norm": 8730.689453125, + "learning_rate": 3.198e-06, + "loss": 25090.3906, + "step": 15990 + }, + { + "epoch": 0.03232101229410505, + "grad_norm": 10186.158203125, + "learning_rate": 3.2000000000000003e-06, + "loss": 11825.9078, + "step": 16000 + }, + { + "epoch": 0.03234121292678887, + "grad_norm": 4239.09619140625, + "learning_rate": 3.202e-06, + "loss": 13255.1406, + "step": 16010 + }, + { + "epoch": 0.03236141355947268, + "grad_norm": 56691.203125, + "learning_rate": 3.2040000000000006e-06, + "loss": 17544.3688, + "step": 16020 + }, + { + "epoch": 0.032381614192156495, + "grad_norm": 0.0, + "learning_rate": 3.2060000000000003e-06, + "loss": 19608.1469, + "step": 16030 + }, + { + "epoch": 0.032401814824840315, + "grad_norm": 193368.640625, + "learning_rate": 3.208e-06, + "loss": 23354.425, + "step": 16040 + }, + { + "epoch": 0.03242201545752413, + "grad_norm": 96181.1796875, + "learning_rate": 3.21e-06, + "loss": 18984.8609, + "step": 16050 + }, + { + "epoch": 0.03244221609020795, + "grad_norm": 70872.578125, + "learning_rate": 3.212e-06, + "loss": 19067.2797, + "step": 16060 + }, + { + "epoch": 0.03246241672289176, + "grad_norm": 140736.796875, + "learning_rate": 3.2140000000000005e-06, + "loss": 17485.775, + "step": 16070 + }, + { + "epoch": 0.032482617355575574, + "grad_norm": 2831.947265625, + "learning_rate": 3.216e-06, + "loss": 10644.1734, + "step": 16080 + }, + { + "epoch": 0.032502817988259394, + "grad_norm": 13163.2802734375, + "learning_rate": 3.218e-06, + "loss": 17551.2359, + "step": 16090 + }, + { + "epoch": 0.03252301862094321, + "grad_norm": 3544.086669921875, + "learning_rate": 3.2200000000000005e-06, + "loss": 8149.5133, + "step": 16100 + }, + { + "epoch": 0.03254321925362702, + "grad_norm": 21898.005859375, + "learning_rate": 3.2220000000000002e-06, + "loss": 28029.2594, + "step": 16110 + }, + { + "epoch": 0.03256341988631084, + "grad_norm": 75442.59375, + "learning_rate": 3.2240000000000004e-06, + "loss": 19827.6031, + "step": 16120 + }, + { + "epoch": 0.03258362051899465, + "grad_norm": 9873.7470703125, + "learning_rate": 3.226e-06, + "loss": 19043.3469, + "step": 16130 + }, + { + "epoch": 0.03260382115167847, + "grad_norm": 405085.71875, + "learning_rate": 3.228e-06, + "loss": 18137.8766, + "step": 16140 + }, + { + "epoch": 0.032624021784362287, + "grad_norm": 17343.326171875, + "learning_rate": 3.2300000000000004e-06, + "loss": 9458.65, + "step": 16150 + }, + { + "epoch": 0.0326442224170461, + "grad_norm": 115963.109375, + "learning_rate": 3.232e-06, + "loss": 12964.6039, + "step": 16160 + }, + { + "epoch": 0.03266442304972992, + "grad_norm": 0.0, + "learning_rate": 3.2340000000000003e-06, + "loss": 22704.2469, + "step": 16170 + }, + { + "epoch": 0.03268462368241373, + "grad_norm": 6045.40625, + "learning_rate": 3.2360000000000004e-06, + "loss": 15113.4297, + "step": 16180 + }, + { + "epoch": 0.032704824315097546, + "grad_norm": 14623.3330078125, + "learning_rate": 3.238e-06, + "loss": 5202.8188, + "step": 16190 + }, + { + "epoch": 0.032725024947781366, + "grad_norm": 209670.34375, + "learning_rate": 3.2400000000000003e-06, + "loss": 17867.325, + "step": 16200 + }, + { + "epoch": 0.03274522558046518, + "grad_norm": 31780.798828125, + "learning_rate": 3.242e-06, + "loss": 16723.2203, + "step": 16210 + }, + { + "epoch": 0.032765426213149, + "grad_norm": 129659.2734375, + "learning_rate": 3.2440000000000006e-06, + "loss": 17908.8094, + "step": 16220 + }, + { + "epoch": 0.03278562684583281, + "grad_norm": 27350.861328125, + "learning_rate": 3.2460000000000003e-06, + "loss": 24675.3266, + "step": 16230 + }, + { + "epoch": 0.032805827478516625, + "grad_norm": 8674.0625, + "learning_rate": 3.248e-06, + "loss": 12123.0219, + "step": 16240 + }, + { + "epoch": 0.032826028111200445, + "grad_norm": 85603.34375, + "learning_rate": 3.2500000000000002e-06, + "loss": 18968.9891, + "step": 16250 + }, + { + "epoch": 0.03284622874388426, + "grad_norm": 10447.5244140625, + "learning_rate": 3.252e-06, + "loss": 14503.9312, + "step": 16260 + }, + { + "epoch": 0.03286642937656807, + "grad_norm": 96753.03125, + "learning_rate": 3.2540000000000005e-06, + "loss": 11984.5688, + "step": 16270 + }, + { + "epoch": 0.03288663000925189, + "grad_norm": 3593.543212890625, + "learning_rate": 3.2560000000000003e-06, + "loss": 13990.0375, + "step": 16280 + }, + { + "epoch": 0.032906830641935704, + "grad_norm": 6617.42919921875, + "learning_rate": 3.258e-06, + "loss": 5273.2437, + "step": 16290 + }, + { + "epoch": 0.032927031274619524, + "grad_norm": 9372.26953125, + "learning_rate": 3.2600000000000006e-06, + "loss": 10414.2289, + "step": 16300 + }, + { + "epoch": 0.03294723190730334, + "grad_norm": 1195.3289794921875, + "learning_rate": 3.2620000000000003e-06, + "loss": 22901.6469, + "step": 16310 + }, + { + "epoch": 0.03296743253998715, + "grad_norm": 113486.953125, + "learning_rate": 3.2640000000000004e-06, + "loss": 9534.9234, + "step": 16320 + }, + { + "epoch": 0.03298763317267097, + "grad_norm": 104877.6875, + "learning_rate": 3.266e-06, + "loss": 12985.2086, + "step": 16330 + }, + { + "epoch": 0.03300783380535478, + "grad_norm": 10386.7802734375, + "learning_rate": 3.268e-06, + "loss": 3508.9137, + "step": 16340 + }, + { + "epoch": 0.033028034438038596, + "grad_norm": 99817.0859375, + "learning_rate": 3.2700000000000005e-06, + "loss": 21511.7188, + "step": 16350 + }, + { + "epoch": 0.033048235070722416, + "grad_norm": 5003.7060546875, + "learning_rate": 3.272e-06, + "loss": 10792.7891, + "step": 16360 + }, + { + "epoch": 0.03306843570340623, + "grad_norm": 2739.457763671875, + "learning_rate": 3.2740000000000003e-06, + "loss": 18253.6937, + "step": 16370 + }, + { + "epoch": 0.03308863633609005, + "grad_norm": 36231.48046875, + "learning_rate": 3.2760000000000005e-06, + "loss": 12563.2203, + "step": 16380 + }, + { + "epoch": 0.03310883696877386, + "grad_norm": 3848.15380859375, + "learning_rate": 3.278e-06, + "loss": 7853.4023, + "step": 16390 + }, + { + "epoch": 0.033129037601457675, + "grad_norm": 123650.4609375, + "learning_rate": 3.2800000000000004e-06, + "loss": 23933.4156, + "step": 16400 + }, + { + "epoch": 0.033149238234141495, + "grad_norm": 146966.421875, + "learning_rate": 3.282e-06, + "loss": 15252.2375, + "step": 16410 + }, + { + "epoch": 0.03316943886682531, + "grad_norm": 6271.6650390625, + "learning_rate": 3.2840000000000007e-06, + "loss": 8323.6594, + "step": 16420 + }, + { + "epoch": 0.03318963949950912, + "grad_norm": 4434.69287109375, + "learning_rate": 3.2860000000000004e-06, + "loss": 5542.1906, + "step": 16430 + }, + { + "epoch": 0.03320984013219294, + "grad_norm": 1219.5732421875, + "learning_rate": 3.288e-06, + "loss": 6033.3012, + "step": 16440 + }, + { + "epoch": 0.033230040764876755, + "grad_norm": 35225.734375, + "learning_rate": 3.2900000000000003e-06, + "loss": 19565.7281, + "step": 16450 + }, + { + "epoch": 0.033250241397560575, + "grad_norm": 13368.47265625, + "learning_rate": 3.292e-06, + "loss": 11121.7891, + "step": 16460 + }, + { + "epoch": 0.03327044203024439, + "grad_norm": 7032.96826171875, + "learning_rate": 3.2940000000000006e-06, + "loss": 6630.0242, + "step": 16470 + }, + { + "epoch": 0.0332906426629282, + "grad_norm": 1647.4195556640625, + "learning_rate": 3.2960000000000003e-06, + "loss": 8845.1344, + "step": 16480 + }, + { + "epoch": 0.03331084329561202, + "grad_norm": 8173.96240234375, + "learning_rate": 3.298e-06, + "loss": 10817.6844, + "step": 16490 + }, + { + "epoch": 0.033331043928295834, + "grad_norm": 11659.3662109375, + "learning_rate": 3.3000000000000006e-06, + "loss": 22584.9219, + "step": 16500 + }, + { + "epoch": 0.03335124456097965, + "grad_norm": 14257.68359375, + "learning_rate": 3.3020000000000003e-06, + "loss": 16697.7172, + "step": 16510 + }, + { + "epoch": 0.03337144519366347, + "grad_norm": 12166.982421875, + "learning_rate": 3.3040000000000005e-06, + "loss": 4789.6156, + "step": 16520 + }, + { + "epoch": 0.03339164582634728, + "grad_norm": 15526.052734375, + "learning_rate": 3.306e-06, + "loss": 8868.7437, + "step": 16530 + }, + { + "epoch": 0.0334118464590311, + "grad_norm": 106974.3984375, + "learning_rate": 3.308e-06, + "loss": 15233.6187, + "step": 16540 + }, + { + "epoch": 0.03343204709171491, + "grad_norm": 38532.36328125, + "learning_rate": 3.3100000000000005e-06, + "loss": 18870.075, + "step": 16550 + }, + { + "epoch": 0.033452247724398726, + "grad_norm": 35744.94921875, + "learning_rate": 3.3120000000000002e-06, + "loss": 22547.0203, + "step": 16560 + }, + { + "epoch": 0.033472448357082546, + "grad_norm": 5545.744140625, + "learning_rate": 3.314e-06, + "loss": 24805.8297, + "step": 16570 + }, + { + "epoch": 0.03349264898976636, + "grad_norm": 41578.41015625, + "learning_rate": 3.3160000000000005e-06, + "loss": 7763.25, + "step": 16580 + }, + { + "epoch": 0.03351284962245017, + "grad_norm": 2598.746826171875, + "learning_rate": 3.3180000000000003e-06, + "loss": 9626.0461, + "step": 16590 + }, + { + "epoch": 0.03353305025513399, + "grad_norm": 3137.630126953125, + "learning_rate": 3.3200000000000004e-06, + "loss": 8524.2023, + "step": 16600 + }, + { + "epoch": 0.033553250887817805, + "grad_norm": 18509.330078125, + "learning_rate": 3.322e-06, + "loss": 6090.8762, + "step": 16610 + }, + { + "epoch": 0.033573451520501625, + "grad_norm": 14516.33203125, + "learning_rate": 3.324e-06, + "loss": 14900.9062, + "step": 16620 + }, + { + "epoch": 0.03359365215318544, + "grad_norm": 33471.0078125, + "learning_rate": 3.3260000000000004e-06, + "loss": 10097.2289, + "step": 16630 + }, + { + "epoch": 0.03361385278586925, + "grad_norm": 52490.3203125, + "learning_rate": 3.328e-06, + "loss": 18656.725, + "step": 16640 + }, + { + "epoch": 0.03363405341855307, + "grad_norm": 23202.955078125, + "learning_rate": 3.3300000000000003e-06, + "loss": 10153.7687, + "step": 16650 + }, + { + "epoch": 0.033654254051236884, + "grad_norm": 6593.42138671875, + "learning_rate": 3.332e-06, + "loss": 14101.5469, + "step": 16660 + }, + { + "epoch": 0.0336744546839207, + "grad_norm": 81152.1875, + "learning_rate": 3.334e-06, + "loss": 7317.3469, + "step": 16670 + }, + { + "epoch": 0.03369465531660452, + "grad_norm": 4026.0634765625, + "learning_rate": 3.3360000000000003e-06, + "loss": 11080.6313, + "step": 16680 + }, + { + "epoch": 0.03371485594928833, + "grad_norm": 19021.298828125, + "learning_rate": 3.338e-06, + "loss": 11685.9937, + "step": 16690 + }, + { + "epoch": 0.03373505658197215, + "grad_norm": 308407.59375, + "learning_rate": 3.3400000000000006e-06, + "loss": 27133.3781, + "step": 16700 + }, + { + "epoch": 0.03375525721465596, + "grad_norm": 3376.509033203125, + "learning_rate": 3.3420000000000004e-06, + "loss": 25557.5781, + "step": 16710 + }, + { + "epoch": 0.033775457847339777, + "grad_norm": 11413.9013671875, + "learning_rate": 3.344e-06, + "loss": 18580.7172, + "step": 16720 + }, + { + "epoch": 0.033795658480023597, + "grad_norm": 3440.18115234375, + "learning_rate": 3.3460000000000002e-06, + "loss": 7087.5891, + "step": 16730 + }, + { + "epoch": 0.03381585911270741, + "grad_norm": 148136.015625, + "learning_rate": 3.348e-06, + "loss": 13512.5219, + "step": 16740 + }, + { + "epoch": 0.03383605974539122, + "grad_norm": 18137.13671875, + "learning_rate": 3.3500000000000005e-06, + "loss": 24024.9609, + "step": 16750 + }, + { + "epoch": 0.03385626037807504, + "grad_norm": 6912.65380859375, + "learning_rate": 3.3520000000000003e-06, + "loss": 3203.6346, + "step": 16760 + }, + { + "epoch": 0.033876461010758856, + "grad_norm": 7800.7412109375, + "learning_rate": 3.354e-06, + "loss": 23860.1063, + "step": 16770 + }, + { + "epoch": 0.033896661643442676, + "grad_norm": 373431.78125, + "learning_rate": 3.3560000000000006e-06, + "loss": 25209.5062, + "step": 16780 + }, + { + "epoch": 0.03391686227612649, + "grad_norm": 42367.60546875, + "learning_rate": 3.3580000000000003e-06, + "loss": 15318.0531, + "step": 16790 + }, + { + "epoch": 0.0339370629088103, + "grad_norm": 151208.921875, + "learning_rate": 3.3600000000000004e-06, + "loss": 6420.8949, + "step": 16800 + }, + { + "epoch": 0.03395726354149412, + "grad_norm": 26386.439453125, + "learning_rate": 3.362e-06, + "loss": 16404.7469, + "step": 16810 + }, + { + "epoch": 0.033977464174177935, + "grad_norm": 6438.2490234375, + "learning_rate": 3.364e-06, + "loss": 11794.875, + "step": 16820 + }, + { + "epoch": 0.03399766480686175, + "grad_norm": 99969.984375, + "learning_rate": 3.3660000000000005e-06, + "loss": 18131.1516, + "step": 16830 + }, + { + "epoch": 0.03401786543954557, + "grad_norm": 11545.2216796875, + "learning_rate": 3.368e-06, + "loss": 4151.2848, + "step": 16840 + }, + { + "epoch": 0.03403806607222938, + "grad_norm": 3393.8828125, + "learning_rate": 3.3700000000000003e-06, + "loss": 19711.9516, + "step": 16850 + }, + { + "epoch": 0.0340582667049132, + "grad_norm": 3949.08642578125, + "learning_rate": 3.372e-06, + "loss": 12221.4922, + "step": 16860 + }, + { + "epoch": 0.034078467337597014, + "grad_norm": 43901.1328125, + "learning_rate": 3.3740000000000002e-06, + "loss": 15828.0844, + "step": 16870 + }, + { + "epoch": 0.03409866797028083, + "grad_norm": 32736.6796875, + "learning_rate": 3.3760000000000004e-06, + "loss": 7265.1945, + "step": 16880 + }, + { + "epoch": 0.03411886860296465, + "grad_norm": 2712.299072265625, + "learning_rate": 3.378e-06, + "loss": 3297.143, + "step": 16890 + }, + { + "epoch": 0.03413906923564846, + "grad_norm": 55579.6953125, + "learning_rate": 3.3800000000000007e-06, + "loss": 11433.4461, + "step": 16900 + }, + { + "epoch": 0.03415926986833227, + "grad_norm": 12309.1767578125, + "learning_rate": 3.3820000000000004e-06, + "loss": 22062.2672, + "step": 16910 + }, + { + "epoch": 0.03417947050101609, + "grad_norm": 48222.87890625, + "learning_rate": 3.384e-06, + "loss": 5506.5148, + "step": 16920 + }, + { + "epoch": 0.034199671133699906, + "grad_norm": 129952.203125, + "learning_rate": 3.3860000000000003e-06, + "loss": 22663.3297, + "step": 16930 + }, + { + "epoch": 0.034219871766383726, + "grad_norm": 4283.2470703125, + "learning_rate": 3.388e-06, + "loss": 17597.3703, + "step": 16940 + }, + { + "epoch": 0.03424007239906754, + "grad_norm": 840.819091796875, + "learning_rate": 3.3900000000000006e-06, + "loss": 18916.4719, + "step": 16950 + }, + { + "epoch": 0.03426027303175135, + "grad_norm": 152302.25, + "learning_rate": 3.3920000000000003e-06, + "loss": 23552.5109, + "step": 16960 + }, + { + "epoch": 0.03428047366443517, + "grad_norm": 18784.564453125, + "learning_rate": 3.394e-06, + "loss": 26163.1172, + "step": 16970 + }, + { + "epoch": 0.034300674297118985, + "grad_norm": 7425.25732421875, + "learning_rate": 3.3960000000000006e-06, + "loss": 7574.3438, + "step": 16980 + }, + { + "epoch": 0.0343208749298028, + "grad_norm": 1015.4595336914062, + "learning_rate": 3.3980000000000003e-06, + "loss": 10558.3477, + "step": 16990 + }, + { + "epoch": 0.03434107556248662, + "grad_norm": 2871.915771484375, + "learning_rate": 3.4000000000000005e-06, + "loss": 4501.498, + "step": 17000 + }, + { + "epoch": 0.03436127619517043, + "grad_norm": 83271.8828125, + "learning_rate": 3.402e-06, + "loss": 25218.7719, + "step": 17010 + }, + { + "epoch": 0.03438147682785425, + "grad_norm": 43160.890625, + "learning_rate": 3.404e-06, + "loss": 8817.7922, + "step": 17020 + }, + { + "epoch": 0.034401677460538065, + "grad_norm": 23231.48828125, + "learning_rate": 3.4060000000000005e-06, + "loss": 3885.7984, + "step": 17030 + }, + { + "epoch": 0.03442187809322188, + "grad_norm": 3491.207763671875, + "learning_rate": 3.4080000000000002e-06, + "loss": 12160.4602, + "step": 17040 + }, + { + "epoch": 0.0344420787259057, + "grad_norm": 124416.53125, + "learning_rate": 3.4100000000000004e-06, + "loss": 11942.0352, + "step": 17050 + }, + { + "epoch": 0.03446227935858951, + "grad_norm": 4697.53857421875, + "learning_rate": 3.412e-06, + "loss": 12483.75, + "step": 17060 + }, + { + "epoch": 0.034482479991273324, + "grad_norm": 3951.79296875, + "learning_rate": 3.4140000000000003e-06, + "loss": 3913.6238, + "step": 17070 + }, + { + "epoch": 0.034502680623957144, + "grad_norm": 19934.150390625, + "learning_rate": 3.4160000000000004e-06, + "loss": 23656.3031, + "step": 17080 + }, + { + "epoch": 0.03452288125664096, + "grad_norm": 232613.875, + "learning_rate": 3.418e-06, + "loss": 20597.6359, + "step": 17090 + }, + { + "epoch": 0.03454308188932478, + "grad_norm": 161514.625, + "learning_rate": 3.4200000000000007e-06, + "loss": 21270.2797, + "step": 17100 + }, + { + "epoch": 0.03456328252200859, + "grad_norm": 22348.708984375, + "learning_rate": 3.4220000000000004e-06, + "loss": 35115.75, + "step": 17110 + }, + { + "epoch": 0.0345834831546924, + "grad_norm": 24508.236328125, + "learning_rate": 3.424e-06, + "loss": 7457.2039, + "step": 17120 + }, + { + "epoch": 0.03460368378737622, + "grad_norm": 25016.81640625, + "learning_rate": 3.4260000000000003e-06, + "loss": 4125.8871, + "step": 17130 + }, + { + "epoch": 0.034623884420060036, + "grad_norm": 29589.486328125, + "learning_rate": 3.428e-06, + "loss": 11598.0742, + "step": 17140 + }, + { + "epoch": 0.03464408505274385, + "grad_norm": 71626.5625, + "learning_rate": 3.4300000000000006e-06, + "loss": 11001.0125, + "step": 17150 + }, + { + "epoch": 0.03466428568542767, + "grad_norm": 78233.046875, + "learning_rate": 3.4320000000000003e-06, + "loss": 15454.125, + "step": 17160 + }, + { + "epoch": 0.03468448631811148, + "grad_norm": 436899.8125, + "learning_rate": 3.434e-06, + "loss": 28637.7375, + "step": 17170 + }, + { + "epoch": 0.0347046869507953, + "grad_norm": 7588.88330078125, + "learning_rate": 3.4360000000000006e-06, + "loss": 8261.7422, + "step": 17180 + }, + { + "epoch": 0.034724887583479115, + "grad_norm": 89926.03125, + "learning_rate": 3.4380000000000004e-06, + "loss": 18920.05, + "step": 17190 + }, + { + "epoch": 0.03474508821616293, + "grad_norm": 14734.162109375, + "learning_rate": 3.44e-06, + "loss": 34433.5094, + "step": 17200 + }, + { + "epoch": 0.03476528884884675, + "grad_norm": 207300.796875, + "learning_rate": 3.4420000000000002e-06, + "loss": 29748.7375, + "step": 17210 + }, + { + "epoch": 0.03478548948153056, + "grad_norm": 9209.4921875, + "learning_rate": 3.444e-06, + "loss": 21196.4578, + "step": 17220 + }, + { + "epoch": 0.034805690114214374, + "grad_norm": 70629.4609375, + "learning_rate": 3.4460000000000005e-06, + "loss": 8902.3672, + "step": 17230 + }, + { + "epoch": 0.034825890746898194, + "grad_norm": 3636.414306640625, + "learning_rate": 3.4480000000000003e-06, + "loss": 9006.3641, + "step": 17240 + }, + { + "epoch": 0.03484609137958201, + "grad_norm": 41923.15234375, + "learning_rate": 3.45e-06, + "loss": 10109.8602, + "step": 17250 + }, + { + "epoch": 0.03486629201226583, + "grad_norm": 256120.21875, + "learning_rate": 3.452e-06, + "loss": 15941.0453, + "step": 17260 + }, + { + "epoch": 0.03488649264494964, + "grad_norm": 27188.37890625, + "learning_rate": 3.4540000000000003e-06, + "loss": 12528.8445, + "step": 17270 + }, + { + "epoch": 0.034906693277633453, + "grad_norm": 61239.328125, + "learning_rate": 3.4560000000000005e-06, + "loss": 20430.8172, + "step": 17280 + }, + { + "epoch": 0.034926893910317273, + "grad_norm": 90972.625, + "learning_rate": 3.458e-06, + "loss": 9656.5656, + "step": 17290 + }, + { + "epoch": 0.03494709454300109, + "grad_norm": 24776.916015625, + "learning_rate": 3.46e-06, + "loss": 15002.5453, + "step": 17300 + }, + { + "epoch": 0.0349672951756849, + "grad_norm": 45235.30078125, + "learning_rate": 3.4620000000000005e-06, + "loss": 12877.8609, + "step": 17310 + }, + { + "epoch": 0.03498749580836872, + "grad_norm": 60230.98046875, + "learning_rate": 3.464e-06, + "loss": 12341.7883, + "step": 17320 + }, + { + "epoch": 0.03500769644105253, + "grad_norm": 30370.654296875, + "learning_rate": 3.4660000000000004e-06, + "loss": 17607.1922, + "step": 17330 + }, + { + "epoch": 0.03502789707373635, + "grad_norm": 11881.818359375, + "learning_rate": 3.468e-06, + "loss": 10358.9742, + "step": 17340 + }, + { + "epoch": 0.035048097706420166, + "grad_norm": 76757.6796875, + "learning_rate": 3.4700000000000002e-06, + "loss": 5226.5898, + "step": 17350 + }, + { + "epoch": 0.03506829833910398, + "grad_norm": 38634.328125, + "learning_rate": 3.4720000000000004e-06, + "loss": 28303.2062, + "step": 17360 + }, + { + "epoch": 0.0350884989717878, + "grad_norm": 341619.53125, + "learning_rate": 3.474e-06, + "loss": 18857.8422, + "step": 17370 + }, + { + "epoch": 0.03510869960447161, + "grad_norm": 1278.9058837890625, + "learning_rate": 3.4760000000000007e-06, + "loss": 5105.0574, + "step": 17380 + }, + { + "epoch": 0.035128900237155425, + "grad_norm": 39909.15625, + "learning_rate": 3.4780000000000004e-06, + "loss": 13325.8891, + "step": 17390 + }, + { + "epoch": 0.035149100869839245, + "grad_norm": 13091.666015625, + "learning_rate": 3.48e-06, + "loss": 25753.8516, + "step": 17400 + }, + { + "epoch": 0.03516930150252306, + "grad_norm": 24150.470703125, + "learning_rate": 3.4820000000000003e-06, + "loss": 8903.1281, + "step": 17410 + }, + { + "epoch": 0.03518950213520688, + "grad_norm": 260324.546875, + "learning_rate": 3.484e-06, + "loss": 21545.3453, + "step": 17420 + }, + { + "epoch": 0.03520970276789069, + "grad_norm": 44490.66015625, + "learning_rate": 3.4860000000000006e-06, + "loss": 43570.2406, + "step": 17430 + }, + { + "epoch": 0.035229903400574504, + "grad_norm": 46780.3984375, + "learning_rate": 3.4880000000000003e-06, + "loss": 10118.2875, + "step": 17440 + }, + { + "epoch": 0.035250104033258324, + "grad_norm": 28603.048828125, + "learning_rate": 3.49e-06, + "loss": 12383.0406, + "step": 17450 + }, + { + "epoch": 0.03527030466594214, + "grad_norm": 4830.92529296875, + "learning_rate": 3.492e-06, + "loss": 18394.5672, + "step": 17460 + }, + { + "epoch": 0.03529050529862595, + "grad_norm": 17417.24609375, + "learning_rate": 3.4940000000000003e-06, + "loss": 22264.9562, + "step": 17470 + }, + { + "epoch": 0.03531070593130977, + "grad_norm": 83910.6484375, + "learning_rate": 3.4960000000000005e-06, + "loss": 22635.7609, + "step": 17480 + }, + { + "epoch": 0.03533090656399358, + "grad_norm": 10870.6416015625, + "learning_rate": 3.4980000000000002e-06, + "loss": 23802.9953, + "step": 17490 + }, + { + "epoch": 0.0353511071966774, + "grad_norm": 21106.234375, + "learning_rate": 3.5e-06, + "loss": 5619.5813, + "step": 17500 + }, + { + "epoch": 0.035371307829361216, + "grad_norm": 38399.34375, + "learning_rate": 3.5020000000000005e-06, + "loss": 6996.9406, + "step": 17510 + }, + { + "epoch": 0.03539150846204503, + "grad_norm": 4990.86376953125, + "learning_rate": 3.5040000000000002e-06, + "loss": 14035.5828, + "step": 17520 + }, + { + "epoch": 0.03541170909472885, + "grad_norm": 21362.7265625, + "learning_rate": 3.5060000000000004e-06, + "loss": 12797.4266, + "step": 17530 + }, + { + "epoch": 0.03543190972741266, + "grad_norm": 4546.0869140625, + "learning_rate": 3.508e-06, + "loss": 12075.5609, + "step": 17540 + }, + { + "epoch": 0.035452110360096475, + "grad_norm": 1637.2991943359375, + "learning_rate": 3.5100000000000003e-06, + "loss": 20041.6016, + "step": 17550 + }, + { + "epoch": 0.035472310992780295, + "grad_norm": 38660.24609375, + "learning_rate": 3.5120000000000004e-06, + "loss": 16210.8047, + "step": 17560 + }, + { + "epoch": 0.03549251162546411, + "grad_norm": 15388.294921875, + "learning_rate": 3.514e-06, + "loss": 10838.1922, + "step": 17570 + }, + { + "epoch": 0.03551271225814793, + "grad_norm": 64607.078125, + "learning_rate": 3.5160000000000007e-06, + "loss": 10983.9211, + "step": 17580 + }, + { + "epoch": 0.03553291289083174, + "grad_norm": 28870.697265625, + "learning_rate": 3.5180000000000005e-06, + "loss": 10005.0156, + "step": 17590 + }, + { + "epoch": 0.035553113523515555, + "grad_norm": 64357.2109375, + "learning_rate": 3.52e-06, + "loss": 11288.1156, + "step": 17600 + }, + { + "epoch": 0.035573314156199375, + "grad_norm": 23149.013671875, + "learning_rate": 3.5220000000000003e-06, + "loss": 9972.0547, + "step": 17610 + }, + { + "epoch": 0.03559351478888319, + "grad_norm": 162811.8125, + "learning_rate": 3.524e-06, + "loss": 13973.075, + "step": 17620 + }, + { + "epoch": 0.035613715421567, + "grad_norm": 11130.142578125, + "learning_rate": 3.5260000000000006e-06, + "loss": 7857.3836, + "step": 17630 + }, + { + "epoch": 0.03563391605425082, + "grad_norm": 5469.89599609375, + "learning_rate": 3.5280000000000004e-06, + "loss": 6017.0988, + "step": 17640 + }, + { + "epoch": 0.035654116686934634, + "grad_norm": 10494.9013671875, + "learning_rate": 3.53e-06, + "loss": 17160.425, + "step": 17650 + }, + { + "epoch": 0.035674317319618454, + "grad_norm": 70731.109375, + "learning_rate": 3.5320000000000002e-06, + "loss": 4595.6258, + "step": 17660 + }, + { + "epoch": 0.03569451795230227, + "grad_norm": 10750.1044921875, + "learning_rate": 3.5340000000000004e-06, + "loss": 4257.0887, + "step": 17670 + }, + { + "epoch": 0.03571471858498608, + "grad_norm": 12976.7568359375, + "learning_rate": 3.5360000000000005e-06, + "loss": 11587.957, + "step": 17680 + }, + { + "epoch": 0.0357349192176699, + "grad_norm": 2401.432861328125, + "learning_rate": 3.5380000000000003e-06, + "loss": 14208.0156, + "step": 17690 + }, + { + "epoch": 0.03575511985035371, + "grad_norm": 51910.734375, + "learning_rate": 3.54e-06, + "loss": 14836.8969, + "step": 17700 + }, + { + "epoch": 0.035775320483037526, + "grad_norm": 290.2644958496094, + "learning_rate": 3.5420000000000006e-06, + "loss": 9808.3828, + "step": 17710 + }, + { + "epoch": 0.035795521115721346, + "grad_norm": 2831.1328125, + "learning_rate": 3.5440000000000003e-06, + "loss": 20257.4, + "step": 17720 + }, + { + "epoch": 0.03581572174840516, + "grad_norm": 149639.140625, + "learning_rate": 3.5460000000000004e-06, + "loss": 11171.5172, + "step": 17730 + }, + { + "epoch": 0.03583592238108898, + "grad_norm": 92128.2421875, + "learning_rate": 3.548e-06, + "loss": 7184.0969, + "step": 17740 + }, + { + "epoch": 0.03585612301377279, + "grad_norm": 5121.955078125, + "learning_rate": 3.5500000000000003e-06, + "loss": 27569.4375, + "step": 17750 + }, + { + "epoch": 0.035876323646456605, + "grad_norm": 7062.86376953125, + "learning_rate": 3.5520000000000005e-06, + "loss": 9260.7789, + "step": 17760 + }, + { + "epoch": 0.035896524279140425, + "grad_norm": 7794.8310546875, + "learning_rate": 3.554e-06, + "loss": 31852.9844, + "step": 17770 + }, + { + "epoch": 0.03591672491182424, + "grad_norm": 19726.341796875, + "learning_rate": 3.5560000000000008e-06, + "loss": 6184.8035, + "step": 17780 + }, + { + "epoch": 0.03593692554450805, + "grad_norm": 656.2503051757812, + "learning_rate": 3.5580000000000005e-06, + "loss": 6626.6125, + "step": 17790 + }, + { + "epoch": 0.03595712617719187, + "grad_norm": 53710.484375, + "learning_rate": 3.5600000000000002e-06, + "loss": 18580.2578, + "step": 17800 + }, + { + "epoch": 0.035977326809875684, + "grad_norm": 48620.89453125, + "learning_rate": 3.5620000000000004e-06, + "loss": 10915.85, + "step": 17810 + }, + { + "epoch": 0.035997527442559504, + "grad_norm": 115573.703125, + "learning_rate": 3.564e-06, + "loss": 11144.6016, + "step": 17820 + }, + { + "epoch": 0.03601772807524332, + "grad_norm": 49642.60546875, + "learning_rate": 3.566e-06, + "loss": 10085.2406, + "step": 17830 + }, + { + "epoch": 0.03603792870792713, + "grad_norm": 74848.1953125, + "learning_rate": 3.5680000000000004e-06, + "loss": 8539.1313, + "step": 17840 + }, + { + "epoch": 0.03605812934061095, + "grad_norm": 415497.8125, + "learning_rate": 3.57e-06, + "loss": 21792.8063, + "step": 17850 + }, + { + "epoch": 0.036078329973294763, + "grad_norm": 7001.62890625, + "learning_rate": 3.5720000000000003e-06, + "loss": 7649.5055, + "step": 17860 + }, + { + "epoch": 0.03609853060597858, + "grad_norm": 39210.265625, + "learning_rate": 3.5740000000000004e-06, + "loss": 14714.8094, + "step": 17870 + }, + { + "epoch": 0.0361187312386624, + "grad_norm": 5538.02587890625, + "learning_rate": 3.576e-06, + "loss": 12075.1852, + "step": 17880 + }, + { + "epoch": 0.03613893187134621, + "grad_norm": 7320.732421875, + "learning_rate": 3.5780000000000003e-06, + "loss": 6740.8539, + "step": 17890 + }, + { + "epoch": 0.03615913250403003, + "grad_norm": 106438.7890625, + "learning_rate": 3.58e-06, + "loss": 18595.8094, + "step": 17900 + }, + { + "epoch": 0.03617933313671384, + "grad_norm": 5093.02783203125, + "learning_rate": 3.5820000000000006e-06, + "loss": 13573.7547, + "step": 17910 + }, + { + "epoch": 0.036199533769397656, + "grad_norm": 47366.01953125, + "learning_rate": 3.5840000000000003e-06, + "loss": 32076.0062, + "step": 17920 + }, + { + "epoch": 0.036219734402081476, + "grad_norm": 3262.907958984375, + "learning_rate": 3.586e-06, + "loss": 14656.1078, + "step": 17930 + }, + { + "epoch": 0.03623993503476529, + "grad_norm": 18081.849609375, + "learning_rate": 3.588e-06, + "loss": 5790.1582, + "step": 17940 + }, + { + "epoch": 0.0362601356674491, + "grad_norm": 4011.283935546875, + "learning_rate": 3.5900000000000004e-06, + "loss": 2758.1791, + "step": 17950 + }, + { + "epoch": 0.03628033630013292, + "grad_norm": 8850.5712890625, + "learning_rate": 3.5920000000000005e-06, + "loss": 28747.4562, + "step": 17960 + }, + { + "epoch": 0.036300536932816735, + "grad_norm": 146707.1875, + "learning_rate": 3.5940000000000002e-06, + "loss": 12225.1648, + "step": 17970 + }, + { + "epoch": 0.036320737565500555, + "grad_norm": 13754.3583984375, + "learning_rate": 3.596e-06, + "loss": 20298.7469, + "step": 17980 + }, + { + "epoch": 0.03634093819818437, + "grad_norm": 565.1630859375, + "learning_rate": 3.5980000000000005e-06, + "loss": 8996.3773, + "step": 17990 + }, + { + "epoch": 0.03636113883086818, + "grad_norm": 252688.15625, + "learning_rate": 3.6000000000000003e-06, + "loss": 16525.2578, + "step": 18000 + }, + { + "epoch": 0.036381339463552, + "grad_norm": 76930.0703125, + "learning_rate": 3.6020000000000004e-06, + "loss": 31109.95, + "step": 18010 + }, + { + "epoch": 0.036401540096235814, + "grad_norm": 4840.16552734375, + "learning_rate": 3.604e-06, + "loss": 10014.0164, + "step": 18020 + }, + { + "epoch": 0.03642174072891963, + "grad_norm": 2818.22021484375, + "learning_rate": 3.606e-06, + "loss": 8438.3266, + "step": 18030 + }, + { + "epoch": 0.03644194136160345, + "grad_norm": 29042.8203125, + "learning_rate": 3.6080000000000004e-06, + "loss": 12076.8523, + "step": 18040 + }, + { + "epoch": 0.03646214199428726, + "grad_norm": 46622.9765625, + "learning_rate": 3.61e-06, + "loss": 5137.0125, + "step": 18050 + }, + { + "epoch": 0.03648234262697108, + "grad_norm": 43567.1953125, + "learning_rate": 3.6120000000000003e-06, + "loss": 6865.7867, + "step": 18060 + }, + { + "epoch": 0.03650254325965489, + "grad_norm": 13312.646484375, + "learning_rate": 3.6140000000000005e-06, + "loss": 10357.6445, + "step": 18070 + }, + { + "epoch": 0.036522743892338706, + "grad_norm": 44589.16796875, + "learning_rate": 3.616e-06, + "loss": 11118.5461, + "step": 18080 + }, + { + "epoch": 0.036542944525022526, + "grad_norm": 8535.9697265625, + "learning_rate": 3.6180000000000003e-06, + "loss": 6849.0672, + "step": 18090 + }, + { + "epoch": 0.03656314515770634, + "grad_norm": 17454.6875, + "learning_rate": 3.62e-06, + "loss": 20859.2734, + "step": 18100 + }, + { + "epoch": 0.03658334579039015, + "grad_norm": 174297.703125, + "learning_rate": 3.6220000000000006e-06, + "loss": 28005.3187, + "step": 18110 + }, + { + "epoch": 0.03660354642307397, + "grad_norm": 182458.390625, + "learning_rate": 3.6240000000000004e-06, + "loss": 11480.0172, + "step": 18120 + }, + { + "epoch": 0.036623747055757785, + "grad_norm": 3663.33544921875, + "learning_rate": 3.626e-06, + "loss": 16733.5516, + "step": 18130 + }, + { + "epoch": 0.036643947688441605, + "grad_norm": 109074.5546875, + "learning_rate": 3.6280000000000002e-06, + "loss": 7046.3594, + "step": 18140 + }, + { + "epoch": 0.03666414832112542, + "grad_norm": 9047.421875, + "learning_rate": 3.6300000000000004e-06, + "loss": 16343.2109, + "step": 18150 + }, + { + "epoch": 0.03668434895380923, + "grad_norm": 38229.8984375, + "learning_rate": 3.6320000000000005e-06, + "loss": 5669.3328, + "step": 18160 + }, + { + "epoch": 0.03670454958649305, + "grad_norm": 76971.875, + "learning_rate": 3.6340000000000003e-06, + "loss": 5838.984, + "step": 18170 + }, + { + "epoch": 0.036724750219176865, + "grad_norm": 25233.509765625, + "learning_rate": 3.636e-06, + "loss": 13984.9562, + "step": 18180 + }, + { + "epoch": 0.03674495085186068, + "grad_norm": 8538.9091796875, + "learning_rate": 3.6380000000000006e-06, + "loss": 13182.0844, + "step": 18190 + }, + { + "epoch": 0.0367651514845445, + "grad_norm": 220233.875, + "learning_rate": 3.6400000000000003e-06, + "loss": 31728.5156, + "step": 18200 + }, + { + "epoch": 0.03678535211722831, + "grad_norm": 47413.10546875, + "learning_rate": 3.6420000000000005e-06, + "loss": 18160.6547, + "step": 18210 + }, + { + "epoch": 0.03680555274991213, + "grad_norm": 8412.3173828125, + "learning_rate": 3.644e-06, + "loss": 2620.3158, + "step": 18220 + }, + { + "epoch": 0.036825753382595944, + "grad_norm": 110638.2890625, + "learning_rate": 3.646e-06, + "loss": 7332.2398, + "step": 18230 + }, + { + "epoch": 0.03684595401527976, + "grad_norm": 93816.515625, + "learning_rate": 3.6480000000000005e-06, + "loss": 7704.675, + "step": 18240 + }, + { + "epoch": 0.03686615464796358, + "grad_norm": 16953.9609375, + "learning_rate": 3.65e-06, + "loss": 9727.6578, + "step": 18250 + }, + { + "epoch": 0.03688635528064739, + "grad_norm": 1179.1768798828125, + "learning_rate": 3.6520000000000004e-06, + "loss": 5820.541, + "step": 18260 + }, + { + "epoch": 0.0369065559133312, + "grad_norm": 37224.88671875, + "learning_rate": 3.6540000000000005e-06, + "loss": 18983.3281, + "step": 18270 + }, + { + "epoch": 0.03692675654601502, + "grad_norm": 52014.44921875, + "learning_rate": 3.6560000000000002e-06, + "loss": 5734.1313, + "step": 18280 + }, + { + "epoch": 0.036946957178698836, + "grad_norm": 23458.369140625, + "learning_rate": 3.6580000000000004e-06, + "loss": 12525.4156, + "step": 18290 + }, + { + "epoch": 0.03696715781138265, + "grad_norm": 6279.208984375, + "learning_rate": 3.66e-06, + "loss": 8377.5125, + "step": 18300 + }, + { + "epoch": 0.03698735844406647, + "grad_norm": 40800.0, + "learning_rate": 3.6620000000000007e-06, + "loss": 25188.0281, + "step": 18310 + }, + { + "epoch": 0.03700755907675028, + "grad_norm": 8989.099609375, + "learning_rate": 3.6640000000000004e-06, + "loss": 10361.675, + "step": 18320 + }, + { + "epoch": 0.0370277597094341, + "grad_norm": 16129.947265625, + "learning_rate": 3.666e-06, + "loss": 16121.2141, + "step": 18330 + }, + { + "epoch": 0.037047960342117915, + "grad_norm": 71407.6171875, + "learning_rate": 3.6680000000000003e-06, + "loss": 13762.1469, + "step": 18340 + }, + { + "epoch": 0.03706816097480173, + "grad_norm": 956.4016723632812, + "learning_rate": 3.6700000000000004e-06, + "loss": 6586.3578, + "step": 18350 + }, + { + "epoch": 0.03708836160748555, + "grad_norm": 40807.4609375, + "learning_rate": 3.6720000000000006e-06, + "loss": 7304.707, + "step": 18360 + }, + { + "epoch": 0.03710856224016936, + "grad_norm": 29203.7890625, + "learning_rate": 3.6740000000000003e-06, + "loss": 9177.1484, + "step": 18370 + }, + { + "epoch": 0.037128762872853174, + "grad_norm": 52033.1171875, + "learning_rate": 3.676e-06, + "loss": 23089.8875, + "step": 18380 + }, + { + "epoch": 0.037148963505536994, + "grad_norm": 1171.755615234375, + "learning_rate": 3.6780000000000006e-06, + "loss": 7560.7547, + "step": 18390 + }, + { + "epoch": 0.03716916413822081, + "grad_norm": 72953.453125, + "learning_rate": 3.6800000000000003e-06, + "loss": 8742.6039, + "step": 18400 + }, + { + "epoch": 0.03718936477090463, + "grad_norm": 51018.296875, + "learning_rate": 3.6820000000000005e-06, + "loss": 6255.6844, + "step": 18410 + }, + { + "epoch": 0.03720956540358844, + "grad_norm": 132652.0, + "learning_rate": 3.6840000000000002e-06, + "loss": 12292.8148, + "step": 18420 + }, + { + "epoch": 0.037229766036272254, + "grad_norm": 2471.40087890625, + "learning_rate": 3.686e-06, + "loss": 13232.4219, + "step": 18430 + }, + { + "epoch": 0.037249966668956074, + "grad_norm": 11225.302734375, + "learning_rate": 3.6880000000000005e-06, + "loss": 6769.1961, + "step": 18440 + }, + { + "epoch": 0.03727016730163989, + "grad_norm": 36483.32421875, + "learning_rate": 3.6900000000000002e-06, + "loss": 10644.7297, + "step": 18450 + }, + { + "epoch": 0.0372903679343237, + "grad_norm": 3811.140625, + "learning_rate": 3.692e-06, + "loss": 8666.2797, + "step": 18460 + }, + { + "epoch": 0.03731056856700752, + "grad_norm": 9950.71875, + "learning_rate": 3.6940000000000005e-06, + "loss": 12365.5328, + "step": 18470 + }, + { + "epoch": 0.03733076919969133, + "grad_norm": 6506.4951171875, + "learning_rate": 3.6960000000000003e-06, + "loss": 24491.1953, + "step": 18480 + }, + { + "epoch": 0.03735096983237515, + "grad_norm": 10206.599609375, + "learning_rate": 3.6980000000000004e-06, + "loss": 8828.7437, + "step": 18490 + }, + { + "epoch": 0.037371170465058966, + "grad_norm": 26155.27734375, + "learning_rate": 3.7e-06, + "loss": 15434.8406, + "step": 18500 + }, + { + "epoch": 0.03739137109774278, + "grad_norm": 17480.060546875, + "learning_rate": 3.702e-06, + "loss": 13136.0156, + "step": 18510 + }, + { + "epoch": 0.0374115717304266, + "grad_norm": 20925.740234375, + "learning_rate": 3.7040000000000005e-06, + "loss": 9061.1, + "step": 18520 + }, + { + "epoch": 0.03743177236311041, + "grad_norm": 66862.6328125, + "learning_rate": 3.706e-06, + "loss": 15322.1688, + "step": 18530 + }, + { + "epoch": 0.037451972995794225, + "grad_norm": 1475.4560546875, + "learning_rate": 3.7080000000000003e-06, + "loss": 10611.7547, + "step": 18540 + }, + { + "epoch": 0.037472173628478045, + "grad_norm": 3046.6943359375, + "learning_rate": 3.7100000000000005e-06, + "loss": 10734.0406, + "step": 18550 + }, + { + "epoch": 0.03749237426116186, + "grad_norm": 20673.505859375, + "learning_rate": 3.712e-06, + "loss": 15429.9297, + "step": 18560 + }, + { + "epoch": 0.03751257489384568, + "grad_norm": 7178.5751953125, + "learning_rate": 3.7140000000000004e-06, + "loss": 12812.2984, + "step": 18570 + }, + { + "epoch": 0.03753277552652949, + "grad_norm": 6221.658203125, + "learning_rate": 3.716e-06, + "loss": 11967.1227, + "step": 18580 + }, + { + "epoch": 0.037552976159213304, + "grad_norm": 28461.07421875, + "learning_rate": 3.7180000000000007e-06, + "loss": 11248.9336, + "step": 18590 + }, + { + "epoch": 0.037573176791897124, + "grad_norm": 15188.3125, + "learning_rate": 3.7200000000000004e-06, + "loss": 20849.9391, + "step": 18600 + }, + { + "epoch": 0.03759337742458094, + "grad_norm": 91898.6015625, + "learning_rate": 3.722e-06, + "loss": 6660.2352, + "step": 18610 + }, + { + "epoch": 0.03761357805726475, + "grad_norm": 29357.7109375, + "learning_rate": 3.7240000000000003e-06, + "loss": 19245.0062, + "step": 18620 + }, + { + "epoch": 0.03763377868994857, + "grad_norm": 89862.859375, + "learning_rate": 3.726e-06, + "loss": 19802.9922, + "step": 18630 + }, + { + "epoch": 0.03765397932263238, + "grad_norm": 2792.38037109375, + "learning_rate": 3.7280000000000006e-06, + "loss": 26023.45, + "step": 18640 + }, + { + "epoch": 0.0376741799553162, + "grad_norm": 296159.5, + "learning_rate": 3.7300000000000003e-06, + "loss": 17789.0125, + "step": 18650 + }, + { + "epoch": 0.037694380588000016, + "grad_norm": 14617.0087890625, + "learning_rate": 3.732e-06, + "loss": 17595.7922, + "step": 18660 + }, + { + "epoch": 0.03771458122068383, + "grad_norm": 38809.76171875, + "learning_rate": 3.7340000000000006e-06, + "loss": 14074.3813, + "step": 18670 + }, + { + "epoch": 0.03773478185336765, + "grad_norm": 48258.78125, + "learning_rate": 3.7360000000000003e-06, + "loss": 6602.6422, + "step": 18680 + }, + { + "epoch": 0.03775498248605146, + "grad_norm": 12384.3837890625, + "learning_rate": 3.7380000000000005e-06, + "loss": 7082.7672, + "step": 18690 + }, + { + "epoch": 0.037775183118735275, + "grad_norm": 24258.859375, + "learning_rate": 3.74e-06, + "loss": 4788.1187, + "step": 18700 + }, + { + "epoch": 0.037795383751419095, + "grad_norm": 622.4581298828125, + "learning_rate": 3.742e-06, + "loss": 8643.3687, + "step": 18710 + }, + { + "epoch": 0.03781558438410291, + "grad_norm": 61545.09375, + "learning_rate": 3.7440000000000005e-06, + "loss": 24553.1437, + "step": 18720 + }, + { + "epoch": 0.03783578501678673, + "grad_norm": 119518.953125, + "learning_rate": 3.7460000000000002e-06, + "loss": 11716.7141, + "step": 18730 + }, + { + "epoch": 0.03785598564947054, + "grad_norm": 29753.20703125, + "learning_rate": 3.7480000000000004e-06, + "loss": 9987.1758, + "step": 18740 + }, + { + "epoch": 0.037876186282154355, + "grad_norm": 6608.53564453125, + "learning_rate": 3.7500000000000005e-06, + "loss": 20151.1, + "step": 18750 + }, + { + "epoch": 0.037896386914838175, + "grad_norm": 22476.19140625, + "learning_rate": 3.7520000000000002e-06, + "loss": 11413.8133, + "step": 18760 + }, + { + "epoch": 0.03791658754752199, + "grad_norm": 22604.654296875, + "learning_rate": 3.7540000000000004e-06, + "loss": 4143.648, + "step": 18770 + }, + { + "epoch": 0.0379367881802058, + "grad_norm": 16475.169921875, + "learning_rate": 3.756e-06, + "loss": 13147.2766, + "step": 18780 + }, + { + "epoch": 0.03795698881288962, + "grad_norm": 228872.0, + "learning_rate": 3.7580000000000007e-06, + "loss": 12242.0555, + "step": 18790 + }, + { + "epoch": 0.037977189445573434, + "grad_norm": 27332.892578125, + "learning_rate": 3.7600000000000004e-06, + "loss": 5230.3492, + "step": 18800 + }, + { + "epoch": 0.037997390078257254, + "grad_norm": 16476.978515625, + "learning_rate": 3.762e-06, + "loss": 11785.4883, + "step": 18810 + }, + { + "epoch": 0.03801759071094107, + "grad_norm": 8877.462890625, + "learning_rate": 3.7640000000000003e-06, + "loss": 16149.7984, + "step": 18820 + }, + { + "epoch": 0.03803779134362488, + "grad_norm": 3145.625, + "learning_rate": 3.766e-06, + "loss": 6857.5164, + "step": 18830 + }, + { + "epoch": 0.0380579919763087, + "grad_norm": 48797.6796875, + "learning_rate": 3.7680000000000006e-06, + "loss": 16853.2188, + "step": 18840 + }, + { + "epoch": 0.03807819260899251, + "grad_norm": 168626.90625, + "learning_rate": 3.7700000000000003e-06, + "loss": 10535.2391, + "step": 18850 + }, + { + "epoch": 0.038098393241676326, + "grad_norm": 25806.015625, + "learning_rate": 3.772e-06, + "loss": 6326.0754, + "step": 18860 + }, + { + "epoch": 0.038118593874360146, + "grad_norm": 8304.234375, + "learning_rate": 3.7740000000000006e-06, + "loss": 21886.0344, + "step": 18870 + }, + { + "epoch": 0.03813879450704396, + "grad_norm": 16034.05859375, + "learning_rate": 3.7760000000000004e-06, + "loss": 6679.5031, + "step": 18880 + }, + { + "epoch": 0.03815899513972778, + "grad_norm": 10029.650390625, + "learning_rate": 3.7780000000000005e-06, + "loss": 14846.4969, + "step": 18890 + }, + { + "epoch": 0.03817919577241159, + "grad_norm": 200343.609375, + "learning_rate": 3.7800000000000002e-06, + "loss": 13216.7437, + "step": 18900 + }, + { + "epoch": 0.038199396405095405, + "grad_norm": 282126.875, + "learning_rate": 3.782e-06, + "loss": 24993.5906, + "step": 18910 + }, + { + "epoch": 0.038219597037779225, + "grad_norm": 26833.888671875, + "learning_rate": 3.7840000000000005e-06, + "loss": 18478.1219, + "step": 18920 + }, + { + "epoch": 0.03823979767046304, + "grad_norm": 142096.5, + "learning_rate": 3.7860000000000003e-06, + "loss": 12279.1063, + "step": 18930 + }, + { + "epoch": 0.03825999830314685, + "grad_norm": 53160.11328125, + "learning_rate": 3.7880000000000004e-06, + "loss": 10863.2023, + "step": 18940 + }, + { + "epoch": 0.03828019893583067, + "grad_norm": 17252.162109375, + "learning_rate": 3.79e-06, + "loss": 8756.1375, + "step": 18950 + }, + { + "epoch": 0.038300399568514484, + "grad_norm": 280133.375, + "learning_rate": 3.7920000000000003e-06, + "loss": 19940.1797, + "step": 18960 + }, + { + "epoch": 0.038320600201198304, + "grad_norm": 24026.98046875, + "learning_rate": 3.7940000000000004e-06, + "loss": 12046.7164, + "step": 18970 + }, + { + "epoch": 0.03834080083388212, + "grad_norm": 12333.4970703125, + "learning_rate": 3.796e-06, + "loss": 13698.1609, + "step": 18980 + }, + { + "epoch": 0.03836100146656593, + "grad_norm": 34509.37109375, + "learning_rate": 3.7980000000000007e-06, + "loss": 16757.1312, + "step": 18990 + }, + { + "epoch": 0.03838120209924975, + "grad_norm": 12526.1181640625, + "learning_rate": 3.8000000000000005e-06, + "loss": 15912.7984, + "step": 19000 + }, + { + "epoch": 0.038401402731933564, + "grad_norm": 1869.41455078125, + "learning_rate": 3.802e-06, + "loss": 4866.734, + "step": 19010 + }, + { + "epoch": 0.03842160336461738, + "grad_norm": 32723.7734375, + "learning_rate": 3.8040000000000003e-06, + "loss": 13541.575, + "step": 19020 + }, + { + "epoch": 0.0384418039973012, + "grad_norm": 370197.4375, + "learning_rate": 3.806e-06, + "loss": 21044.7, + "step": 19030 + }, + { + "epoch": 0.03846200462998501, + "grad_norm": 49600.48828125, + "learning_rate": 3.8080000000000006e-06, + "loss": 9330.143, + "step": 19040 + }, + { + "epoch": 0.03848220526266883, + "grad_norm": 73124.7421875, + "learning_rate": 3.8100000000000004e-06, + "loss": 18616.3187, + "step": 19050 + }, + { + "epoch": 0.03850240589535264, + "grad_norm": 378420.40625, + "learning_rate": 3.812e-06, + "loss": 18539.7453, + "step": 19060 + }, + { + "epoch": 0.038522606528036456, + "grad_norm": 157510.71875, + "learning_rate": 3.8140000000000007e-06, + "loss": 14230.8156, + "step": 19070 + }, + { + "epoch": 0.038542807160720276, + "grad_norm": 2333.369384765625, + "learning_rate": 3.816e-06, + "loss": 5559.0039, + "step": 19080 + }, + { + "epoch": 0.03856300779340409, + "grad_norm": 38869.2734375, + "learning_rate": 3.818e-06, + "loss": 17234.0953, + "step": 19090 + }, + { + "epoch": 0.0385832084260879, + "grad_norm": 4858.8447265625, + "learning_rate": 3.820000000000001e-06, + "loss": 14546.7516, + "step": 19100 + }, + { + "epoch": 0.03860340905877172, + "grad_norm": 374399.15625, + "learning_rate": 3.822e-06, + "loss": 32097.375, + "step": 19110 + }, + { + "epoch": 0.038623609691455535, + "grad_norm": 43732.8203125, + "learning_rate": 3.824e-06, + "loss": 12242.9023, + "step": 19120 + }, + { + "epoch": 0.038643810324139355, + "grad_norm": 61637.35546875, + "learning_rate": 3.826e-06, + "loss": 10633.4891, + "step": 19130 + }, + { + "epoch": 0.03866401095682317, + "grad_norm": 2589.6484375, + "learning_rate": 3.8280000000000004e-06, + "loss": 6345.6223, + "step": 19140 + }, + { + "epoch": 0.03868421158950698, + "grad_norm": 18671.640625, + "learning_rate": 3.830000000000001e-06, + "loss": 16210.7812, + "step": 19150 + }, + { + "epoch": 0.0387044122221908, + "grad_norm": 8696.7431640625, + "learning_rate": 3.832e-06, + "loss": 14050.5781, + "step": 19160 + }, + { + "epoch": 0.038724612854874614, + "grad_norm": 57809.59375, + "learning_rate": 3.834000000000001e-06, + "loss": 7498.9117, + "step": 19170 + }, + { + "epoch": 0.03874481348755843, + "grad_norm": 21359.751953125, + "learning_rate": 3.836e-06, + "loss": 14632.7156, + "step": 19180 + }, + { + "epoch": 0.03876501412024225, + "grad_norm": 68046.234375, + "learning_rate": 3.838e-06, + "loss": 7022.2109, + "step": 19190 + }, + { + "epoch": 0.03878521475292606, + "grad_norm": 25198.63671875, + "learning_rate": 3.8400000000000005e-06, + "loss": 13879.4844, + "step": 19200 + }, + { + "epoch": 0.03880541538560988, + "grad_norm": 180105.4375, + "learning_rate": 3.842e-06, + "loss": 11255.6734, + "step": 19210 + }, + { + "epoch": 0.03882561601829369, + "grad_norm": 15222.6962890625, + "learning_rate": 3.844000000000001e-06, + "loss": 6811.2242, + "step": 19220 + }, + { + "epoch": 0.038845816650977506, + "grad_norm": 20212.240234375, + "learning_rate": 3.846e-06, + "loss": 11284.5094, + "step": 19230 + }, + { + "epoch": 0.038866017283661326, + "grad_norm": 70617.546875, + "learning_rate": 3.848e-06, + "loss": 12398.6617, + "step": 19240 + }, + { + "epoch": 0.03888621791634514, + "grad_norm": 97230.40625, + "learning_rate": 3.85e-06, + "loss": 16121.4813, + "step": 19250 + }, + { + "epoch": 0.03890641854902895, + "grad_norm": 0.0, + "learning_rate": 3.8520000000000006e-06, + "loss": 6351.8875, + "step": 19260 + }, + { + "epoch": 0.03892661918171277, + "grad_norm": 2159.069091796875, + "learning_rate": 3.854000000000001e-06, + "loss": 6938.9445, + "step": 19270 + }, + { + "epoch": 0.038946819814396585, + "grad_norm": 59112.51953125, + "learning_rate": 3.856e-06, + "loss": 10423.6602, + "step": 19280 + }, + { + "epoch": 0.038967020447080405, + "grad_norm": 2271.201904296875, + "learning_rate": 3.858e-06, + "loss": 4408.616, + "step": 19290 + }, + { + "epoch": 0.03898722107976422, + "grad_norm": 149454.265625, + "learning_rate": 3.86e-06, + "loss": 19615.3828, + "step": 19300 + }, + { + "epoch": 0.03900742171244803, + "grad_norm": 2879.17529296875, + "learning_rate": 3.8620000000000005e-06, + "loss": 10446.368, + "step": 19310 + }, + { + "epoch": 0.03902762234513185, + "grad_norm": 5566.5830078125, + "learning_rate": 3.864000000000001e-06, + "loss": 11429.1086, + "step": 19320 + }, + { + "epoch": 0.039047822977815665, + "grad_norm": 23290.99609375, + "learning_rate": 3.866e-06, + "loss": 13705.6078, + "step": 19330 + }, + { + "epoch": 0.03906802361049948, + "grad_norm": 8318.490234375, + "learning_rate": 3.868e-06, + "loss": 13809.2188, + "step": 19340 + }, + { + "epoch": 0.0390882242431833, + "grad_norm": 883.906494140625, + "learning_rate": 3.87e-06, + "loss": 16917.1766, + "step": 19350 + }, + { + "epoch": 0.03910842487586711, + "grad_norm": 20733.658203125, + "learning_rate": 3.872e-06, + "loss": 5677.6809, + "step": 19360 + }, + { + "epoch": 0.03912862550855093, + "grad_norm": 29309.845703125, + "learning_rate": 3.8740000000000005e-06, + "loss": 18377.0656, + "step": 19370 + }, + { + "epoch": 0.039148826141234744, + "grad_norm": 36606.59765625, + "learning_rate": 3.876000000000001e-06, + "loss": 6151.1367, + "step": 19380 + }, + { + "epoch": 0.03916902677391856, + "grad_norm": 27136.0625, + "learning_rate": 3.878e-06, + "loss": 9178.7711, + "step": 19390 + }, + { + "epoch": 0.03918922740660238, + "grad_norm": 4800.24462890625, + "learning_rate": 3.88e-06, + "loss": 13871.5625, + "step": 19400 + }, + { + "epoch": 0.03920942803928619, + "grad_norm": 395291.46875, + "learning_rate": 3.882e-06, + "loss": 20877.1781, + "step": 19410 + }, + { + "epoch": 0.03922962867197, + "grad_norm": 11841.423828125, + "learning_rate": 3.884e-06, + "loss": 5089.6121, + "step": 19420 + }, + { + "epoch": 0.03924982930465382, + "grad_norm": 4864.7841796875, + "learning_rate": 3.8860000000000006e-06, + "loss": 7845.8977, + "step": 19430 + }, + { + "epoch": 0.039270029937337636, + "grad_norm": 184354.390625, + "learning_rate": 3.888e-06, + "loss": 18131.2672, + "step": 19440 + }, + { + "epoch": 0.039290230570021456, + "grad_norm": 12674.912109375, + "learning_rate": 3.89e-06, + "loss": 3064.2414, + "step": 19450 + }, + { + "epoch": 0.03931043120270527, + "grad_norm": 20357.416015625, + "learning_rate": 3.892e-06, + "loss": 9751.2656, + "step": 19460 + }, + { + "epoch": 0.03933063183538908, + "grad_norm": 11267.412109375, + "learning_rate": 3.894e-06, + "loss": 14429.7094, + "step": 19470 + }, + { + "epoch": 0.0393508324680729, + "grad_norm": 2400.853271484375, + "learning_rate": 3.8960000000000005e-06, + "loss": 9654.7609, + "step": 19480 + }, + { + "epoch": 0.039371033100756715, + "grad_norm": 43792.59375, + "learning_rate": 3.898e-06, + "loss": 6583.2906, + "step": 19490 + }, + { + "epoch": 0.03939123373344053, + "grad_norm": 228446.46875, + "learning_rate": 3.900000000000001e-06, + "loss": 26303.0438, + "step": 19500 + }, + { + "epoch": 0.03941143436612435, + "grad_norm": 6595.0146484375, + "learning_rate": 3.902e-06, + "loss": 13009.2734, + "step": 19510 + }, + { + "epoch": 0.03943163499880816, + "grad_norm": 101425.7890625, + "learning_rate": 3.904e-06, + "loss": 9669.8656, + "step": 19520 + }, + { + "epoch": 0.03945183563149198, + "grad_norm": 188515.828125, + "learning_rate": 3.906e-06, + "loss": 12911.5641, + "step": 19530 + }, + { + "epoch": 0.039472036264175794, + "grad_norm": 27023.373046875, + "learning_rate": 3.9080000000000005e-06, + "loss": 19256.3656, + "step": 19540 + }, + { + "epoch": 0.03949223689685961, + "grad_norm": 71211.078125, + "learning_rate": 3.910000000000001e-06, + "loss": 9943.4523, + "step": 19550 + }, + { + "epoch": 0.03951243752954343, + "grad_norm": 13512.775390625, + "learning_rate": 3.912e-06, + "loss": 3651.7895, + "step": 19560 + }, + { + "epoch": 0.03953263816222724, + "grad_norm": 142433.109375, + "learning_rate": 3.914000000000001e-06, + "loss": 7065.2219, + "step": 19570 + }, + { + "epoch": 0.039552838794911054, + "grad_norm": 4739.43896484375, + "learning_rate": 3.916e-06, + "loss": 24514.2328, + "step": 19580 + }, + { + "epoch": 0.039573039427594874, + "grad_norm": 23313.763671875, + "learning_rate": 3.9180000000000004e-06, + "loss": 6755.3687, + "step": 19590 + }, + { + "epoch": 0.03959324006027869, + "grad_norm": 36637.32421875, + "learning_rate": 3.920000000000001e-06, + "loss": 9541.1227, + "step": 19600 + }, + { + "epoch": 0.03961344069296251, + "grad_norm": 42019.18359375, + "learning_rate": 3.922e-06, + "loss": 30166.5687, + "step": 19610 + }, + { + "epoch": 0.03963364132564632, + "grad_norm": 64568.06640625, + "learning_rate": 3.924000000000001e-06, + "loss": 15263.7406, + "step": 19620 + }, + { + "epoch": 0.03965384195833013, + "grad_norm": 40484.796875, + "learning_rate": 3.926e-06, + "loss": 12199.4359, + "step": 19630 + }, + { + "epoch": 0.03967404259101395, + "grad_norm": 1217.168701171875, + "learning_rate": 3.928e-06, + "loss": 17628.5438, + "step": 19640 + }, + { + "epoch": 0.039694243223697766, + "grad_norm": 78925.6015625, + "learning_rate": 3.9300000000000005e-06, + "loss": 13901.925, + "step": 19650 + }, + { + "epoch": 0.03971444385638158, + "grad_norm": 1505.5421142578125, + "learning_rate": 3.932000000000001e-06, + "loss": 6057.5602, + "step": 19660 + }, + { + "epoch": 0.0397346444890654, + "grad_norm": 83544.3125, + "learning_rate": 3.934000000000001e-06, + "loss": 19350.7531, + "step": 19670 + }, + { + "epoch": 0.03975484512174921, + "grad_norm": 490007.28125, + "learning_rate": 3.936e-06, + "loss": 17145.8937, + "step": 19680 + }, + { + "epoch": 0.03977504575443303, + "grad_norm": 18837.369140625, + "learning_rate": 3.938e-06, + "loss": 10782.2375, + "step": 19690 + }, + { + "epoch": 0.039795246387116845, + "grad_norm": 19478.833984375, + "learning_rate": 3.94e-06, + "loss": 9695.4312, + "step": 19700 + }, + { + "epoch": 0.03981544701980066, + "grad_norm": 12431.0556640625, + "learning_rate": 3.9420000000000005e-06, + "loss": 15933.8047, + "step": 19710 + }, + { + "epoch": 0.03983564765248448, + "grad_norm": 39473.29296875, + "learning_rate": 3.944e-06, + "loss": 13922.6031, + "step": 19720 + }, + { + "epoch": 0.03985584828516829, + "grad_norm": 37167.91796875, + "learning_rate": 3.946e-06, + "loss": 16770.0875, + "step": 19730 + }, + { + "epoch": 0.039876048917852104, + "grad_norm": 22429.83203125, + "learning_rate": 3.948e-06, + "loss": 14607.6766, + "step": 19740 + }, + { + "epoch": 0.039896249550535924, + "grad_norm": 163037.796875, + "learning_rate": 3.95e-06, + "loss": 8699.3844, + "step": 19750 + }, + { + "epoch": 0.03991645018321974, + "grad_norm": 11087.583984375, + "learning_rate": 3.9520000000000004e-06, + "loss": 10472.5477, + "step": 19760 + }, + { + "epoch": 0.03993665081590356, + "grad_norm": 29226.154296875, + "learning_rate": 3.954e-06, + "loss": 5340.1855, + "step": 19770 + }, + { + "epoch": 0.03995685144858737, + "grad_norm": 4486.671875, + "learning_rate": 3.956000000000001e-06, + "loss": 16672.525, + "step": 19780 + }, + { + "epoch": 0.03997705208127118, + "grad_norm": 23203.337890625, + "learning_rate": 3.958e-06, + "loss": 8434.9656, + "step": 19790 + }, + { + "epoch": 0.039997252713955, + "grad_norm": 861.356689453125, + "learning_rate": 3.96e-06, + "loss": 11687.4266, + "step": 19800 + }, + { + "epoch": 0.040017453346638816, + "grad_norm": 10705.0556640625, + "learning_rate": 3.962e-06, + "loss": 17939.3578, + "step": 19810 + }, + { + "epoch": 0.04003765397932263, + "grad_norm": 4102.68798828125, + "learning_rate": 3.964e-06, + "loss": 20227.3984, + "step": 19820 + }, + { + "epoch": 0.04005785461200645, + "grad_norm": 24104.177734375, + "learning_rate": 3.966000000000001e-06, + "loss": 20682.3422, + "step": 19830 + }, + { + "epoch": 0.04007805524469026, + "grad_norm": 215405.546875, + "learning_rate": 3.968e-06, + "loss": 16380.7188, + "step": 19840 + }, + { + "epoch": 0.04009825587737408, + "grad_norm": 6026.5, + "learning_rate": 3.97e-06, + "loss": 11538.5367, + "step": 19850 + }, + { + "epoch": 0.040118456510057895, + "grad_norm": 87194.8984375, + "learning_rate": 3.972e-06, + "loss": 21936.7844, + "step": 19860 + }, + { + "epoch": 0.04013865714274171, + "grad_norm": 1434.62890625, + "learning_rate": 3.974e-06, + "loss": 3927.2664, + "step": 19870 + }, + { + "epoch": 0.04015885777542553, + "grad_norm": 2043.8070068359375, + "learning_rate": 3.9760000000000006e-06, + "loss": 5434.3996, + "step": 19880 + }, + { + "epoch": 0.04017905840810934, + "grad_norm": 124651.3671875, + "learning_rate": 3.978e-06, + "loss": 6669.5758, + "step": 19890 + }, + { + "epoch": 0.040199259040793155, + "grad_norm": 46682.00390625, + "learning_rate": 3.980000000000001e-06, + "loss": 26592.6375, + "step": 19900 + }, + { + "epoch": 0.040219459673476975, + "grad_norm": 75412.171875, + "learning_rate": 3.982e-06, + "loss": 21367.7484, + "step": 19910 + }, + { + "epoch": 0.04023966030616079, + "grad_norm": 27703.607421875, + "learning_rate": 3.984e-06, + "loss": 4927.2043, + "step": 19920 + }, + { + "epoch": 0.04025986093884461, + "grad_norm": 17571.92578125, + "learning_rate": 3.9860000000000005e-06, + "loss": 22451.1297, + "step": 19930 + }, + { + "epoch": 0.04028006157152842, + "grad_norm": 8593.9384765625, + "learning_rate": 3.988000000000001e-06, + "loss": 7165.6492, + "step": 19940 + }, + { + "epoch": 0.040300262204212234, + "grad_norm": 21293.169921875, + "learning_rate": 3.990000000000001e-06, + "loss": 9282.1625, + "step": 19950 + }, + { + "epoch": 0.040320462836896054, + "grad_norm": 14623.568359375, + "learning_rate": 3.992e-06, + "loss": 15874.9172, + "step": 19960 + }, + { + "epoch": 0.04034066346957987, + "grad_norm": 21166.54296875, + "learning_rate": 3.994e-06, + "loss": 11546.6617, + "step": 19970 + }, + { + "epoch": 0.04036086410226368, + "grad_norm": 8017.93212890625, + "learning_rate": 3.996e-06, + "loss": 16563.7219, + "step": 19980 + }, + { + "epoch": 0.0403810647349475, + "grad_norm": 15459.2353515625, + "learning_rate": 3.9980000000000005e-06, + "loss": 5830.0113, + "step": 19990 + }, + { + "epoch": 0.04040126536763131, + "grad_norm": 829.9239501953125, + "learning_rate": 4.000000000000001e-06, + "loss": 7941.8914, + "step": 20000 + }, + { + "epoch": 0.04042146600031513, + "grad_norm": 30664.212890625, + "learning_rate": 4.002e-06, + "loss": 13358.4844, + "step": 20010 + }, + { + "epoch": 0.040441666632998946, + "grad_norm": 3769.098876953125, + "learning_rate": 4.004e-06, + "loss": 4238.5816, + "step": 20020 + }, + { + "epoch": 0.04046186726568276, + "grad_norm": 35927.26171875, + "learning_rate": 4.006e-06, + "loss": 14376.1391, + "step": 20030 + }, + { + "epoch": 0.04048206789836658, + "grad_norm": 72370.6171875, + "learning_rate": 4.008e-06, + "loss": 17591.1437, + "step": 20040 + }, + { + "epoch": 0.04050226853105039, + "grad_norm": 21044.755859375, + "learning_rate": 4.0100000000000006e-06, + "loss": 11678.6297, + "step": 20050 + }, + { + "epoch": 0.040522469163734205, + "grad_norm": 2694.89697265625, + "learning_rate": 4.012000000000001e-06, + "loss": 19917.2234, + "step": 20060 + }, + { + "epoch": 0.040542669796418025, + "grad_norm": 8534.0810546875, + "learning_rate": 4.014e-06, + "loss": 15669.6812, + "step": 20070 + }, + { + "epoch": 0.04056287042910184, + "grad_norm": 40554.4609375, + "learning_rate": 4.016e-06, + "loss": 9859.8328, + "step": 20080 + }, + { + "epoch": 0.04058307106178566, + "grad_norm": 100620.0234375, + "learning_rate": 4.018e-06, + "loss": 9330.0289, + "step": 20090 + }, + { + "epoch": 0.04060327169446947, + "grad_norm": 292.8013610839844, + "learning_rate": 4.0200000000000005e-06, + "loss": 5164.302, + "step": 20100 + }, + { + "epoch": 0.040623472327153284, + "grad_norm": 19894.732421875, + "learning_rate": 4.022000000000001e-06, + "loss": 11659.2891, + "step": 20110 + }, + { + "epoch": 0.040643672959837104, + "grad_norm": 673614.875, + "learning_rate": 4.024e-06, + "loss": 30072.4906, + "step": 20120 + }, + { + "epoch": 0.04066387359252092, + "grad_norm": 2166.123046875, + "learning_rate": 4.026e-06, + "loss": 10695.0922, + "step": 20130 + }, + { + "epoch": 0.04068407422520473, + "grad_norm": 146851.734375, + "learning_rate": 4.028e-06, + "loss": 13049.3406, + "step": 20140 + }, + { + "epoch": 0.04070427485788855, + "grad_norm": 39538.69921875, + "learning_rate": 4.03e-06, + "loss": 13386.4516, + "step": 20150 + }, + { + "epoch": 0.040724475490572364, + "grad_norm": 256753.90625, + "learning_rate": 4.0320000000000005e-06, + "loss": 16107.2125, + "step": 20160 + }, + { + "epoch": 0.040744676123256184, + "grad_norm": 11618.8154296875, + "learning_rate": 4.034e-06, + "loss": 5431.9738, + "step": 20170 + }, + { + "epoch": 0.04076487675594, + "grad_norm": 1949.908447265625, + "learning_rate": 4.036000000000001e-06, + "loss": 12859.0984, + "step": 20180 + }, + { + "epoch": 0.04078507738862381, + "grad_norm": 37490.5234375, + "learning_rate": 4.038e-06, + "loss": 12364.7273, + "step": 20190 + }, + { + "epoch": 0.04080527802130763, + "grad_norm": 1909.9342041015625, + "learning_rate": 4.04e-06, + "loss": 15808.4844, + "step": 20200 + }, + { + "epoch": 0.04082547865399144, + "grad_norm": 110939.6015625, + "learning_rate": 4.0420000000000004e-06, + "loss": 10539.9898, + "step": 20210 + }, + { + "epoch": 0.040845679286675256, + "grad_norm": 18233.8671875, + "learning_rate": 4.044e-06, + "loss": 11950.5594, + "step": 20220 + }, + { + "epoch": 0.040865879919359076, + "grad_norm": 1902.1495361328125, + "learning_rate": 4.046000000000001e-06, + "loss": 4194.7457, + "step": 20230 + }, + { + "epoch": 0.04088608055204289, + "grad_norm": 2279.4375, + "learning_rate": 4.048e-06, + "loss": 22817.8859, + "step": 20240 + }, + { + "epoch": 0.04090628118472671, + "grad_norm": 220886.6875, + "learning_rate": 4.05e-06, + "loss": 11702.2109, + "step": 20250 + }, + { + "epoch": 0.04092648181741052, + "grad_norm": 34932.34765625, + "learning_rate": 4.052e-06, + "loss": 11375.3352, + "step": 20260 + }, + { + "epoch": 0.040946682450094335, + "grad_norm": 18751.203125, + "learning_rate": 4.0540000000000005e-06, + "loss": 13402.9906, + "step": 20270 + }, + { + "epoch": 0.040966883082778155, + "grad_norm": 159262.546875, + "learning_rate": 4.056000000000001e-06, + "loss": 28019.0438, + "step": 20280 + }, + { + "epoch": 0.04098708371546197, + "grad_norm": 39303.5078125, + "learning_rate": 4.058e-06, + "loss": 21292.5672, + "step": 20290 + }, + { + "epoch": 0.04100728434814578, + "grad_norm": 22992.169921875, + "learning_rate": 4.060000000000001e-06, + "loss": 7274.3352, + "step": 20300 + }, + { + "epoch": 0.0410274849808296, + "grad_norm": 47013.765625, + "learning_rate": 4.062e-06, + "loss": 11589.7281, + "step": 20310 + }, + { + "epoch": 0.041047685613513414, + "grad_norm": 110987.03125, + "learning_rate": 4.064e-06, + "loss": 12293.5117, + "step": 20320 + }, + { + "epoch": 0.041067886246197234, + "grad_norm": 25178.734375, + "learning_rate": 4.0660000000000005e-06, + "loss": 24002.6766, + "step": 20330 + }, + { + "epoch": 0.04108808687888105, + "grad_norm": 98149.078125, + "learning_rate": 4.068000000000001e-06, + "loss": 10805.5859, + "step": 20340 + }, + { + "epoch": 0.04110828751156486, + "grad_norm": 8479.25390625, + "learning_rate": 4.07e-06, + "loss": 7065.9508, + "step": 20350 + }, + { + "epoch": 0.04112848814424868, + "grad_norm": 7644.482421875, + "learning_rate": 4.072e-06, + "loss": 5766.9004, + "step": 20360 + }, + { + "epoch": 0.04114868877693249, + "grad_norm": 1959.3966064453125, + "learning_rate": 4.074e-06, + "loss": 27079.2531, + "step": 20370 + }, + { + "epoch": 0.041168889409616306, + "grad_norm": 83776.4921875, + "learning_rate": 4.0760000000000004e-06, + "loss": 19679.0687, + "step": 20380 + }, + { + "epoch": 0.041189090042300126, + "grad_norm": 18246.0859375, + "learning_rate": 4.078000000000001e-06, + "loss": 6469.5367, + "step": 20390 + }, + { + "epoch": 0.04120929067498394, + "grad_norm": 14713.056640625, + "learning_rate": 4.08e-06, + "loss": 15868.7906, + "step": 20400 + }, + { + "epoch": 0.04122949130766776, + "grad_norm": 95092.2265625, + "learning_rate": 4.082e-06, + "loss": 11238.5187, + "step": 20410 + }, + { + "epoch": 0.04124969194035157, + "grad_norm": 20432.869140625, + "learning_rate": 4.084e-06, + "loss": 4496.8926, + "step": 20420 + }, + { + "epoch": 0.041269892573035385, + "grad_norm": 16963.76953125, + "learning_rate": 4.086e-06, + "loss": 19106.8625, + "step": 20430 + }, + { + "epoch": 0.041290093205719205, + "grad_norm": 180953.3125, + "learning_rate": 4.0880000000000005e-06, + "loss": 14445.2641, + "step": 20440 + }, + { + "epoch": 0.04131029383840302, + "grad_norm": 5687.24658203125, + "learning_rate": 4.09e-06, + "loss": 10718.793, + "step": 20450 + }, + { + "epoch": 0.04133049447108683, + "grad_norm": 34702.88671875, + "learning_rate": 4.092000000000001e-06, + "loss": 6377.4199, + "step": 20460 + }, + { + "epoch": 0.04135069510377065, + "grad_norm": 1743.2581787109375, + "learning_rate": 4.094e-06, + "loss": 9755.7891, + "step": 20470 + }, + { + "epoch": 0.041370895736454465, + "grad_norm": 115684.5703125, + "learning_rate": 4.096e-06, + "loss": 11649.5672, + "step": 20480 + }, + { + "epoch": 0.041391096369138285, + "grad_norm": 50389.140625, + "learning_rate": 4.098e-06, + "loss": 17224.8734, + "step": 20490 + }, + { + "epoch": 0.0414112970018221, + "grad_norm": 22753.21875, + "learning_rate": 4.1e-06, + "loss": 24462.5328, + "step": 20500 + }, + { + "epoch": 0.04143149763450591, + "grad_norm": 1389.9984130859375, + "learning_rate": 4.102000000000001e-06, + "loss": 3027.5178, + "step": 20510 + }, + { + "epoch": 0.04145169826718973, + "grad_norm": 33939.375, + "learning_rate": 4.104e-06, + "loss": 7992.3102, + "step": 20520 + }, + { + "epoch": 0.041471898899873544, + "grad_norm": 22884.4453125, + "learning_rate": 4.106e-06, + "loss": 9749.3945, + "step": 20530 + }, + { + "epoch": 0.04149209953255736, + "grad_norm": 12149.7294921875, + "learning_rate": 4.108e-06, + "loss": 10920.6102, + "step": 20540 + }, + { + "epoch": 0.04151230016524118, + "grad_norm": 216002.671875, + "learning_rate": 4.1100000000000005e-06, + "loss": 13830.5844, + "step": 20550 + }, + { + "epoch": 0.04153250079792499, + "grad_norm": 39344.99609375, + "learning_rate": 4.112000000000001e-06, + "loss": 9902.2203, + "step": 20560 + }, + { + "epoch": 0.04155270143060881, + "grad_norm": 43975.76171875, + "learning_rate": 4.114e-06, + "loss": 5834.3555, + "step": 20570 + }, + { + "epoch": 0.04157290206329262, + "grad_norm": 3054.0224609375, + "learning_rate": 4.116000000000001e-06, + "loss": 6427.3898, + "step": 20580 + }, + { + "epoch": 0.041593102695976436, + "grad_norm": 133737.4375, + "learning_rate": 4.118e-06, + "loss": 13569.1281, + "step": 20590 + }, + { + "epoch": 0.041613303328660256, + "grad_norm": 58898.6953125, + "learning_rate": 4.12e-06, + "loss": 19920.8203, + "step": 20600 + }, + { + "epoch": 0.04163350396134407, + "grad_norm": 22634.44140625, + "learning_rate": 4.1220000000000005e-06, + "loss": 7575.3797, + "step": 20610 + }, + { + "epoch": 0.04165370459402788, + "grad_norm": 115233.5390625, + "learning_rate": 4.124e-06, + "loss": 15254.1016, + "step": 20620 + }, + { + "epoch": 0.0416739052267117, + "grad_norm": 22121.78125, + "learning_rate": 4.126000000000001e-06, + "loss": 3660.4527, + "step": 20630 + }, + { + "epoch": 0.041694105859395515, + "grad_norm": 1254.0491943359375, + "learning_rate": 4.128e-06, + "loss": 15536.8469, + "step": 20640 + }, + { + "epoch": 0.041714306492079335, + "grad_norm": 15370.6337890625, + "learning_rate": 4.13e-06, + "loss": 8125.8859, + "step": 20650 + }, + { + "epoch": 0.04173450712476315, + "grad_norm": 14792.462890625, + "learning_rate": 4.132e-06, + "loss": 5611.5402, + "step": 20660 + }, + { + "epoch": 0.04175470775744696, + "grad_norm": 82840.6953125, + "learning_rate": 4.1340000000000006e-06, + "loss": 7855.2922, + "step": 20670 + }, + { + "epoch": 0.04177490839013078, + "grad_norm": 28686.6484375, + "learning_rate": 4.136000000000001e-06, + "loss": 12049.9813, + "step": 20680 + }, + { + "epoch": 0.041795109022814594, + "grad_norm": 34991.625, + "learning_rate": 4.138e-06, + "loss": 12022.3391, + "step": 20690 + }, + { + "epoch": 0.04181530965549841, + "grad_norm": 43741.11328125, + "learning_rate": 4.14e-06, + "loss": 5964.8035, + "step": 20700 + }, + { + "epoch": 0.04183551028818223, + "grad_norm": 66386.2734375, + "learning_rate": 4.142e-06, + "loss": 7516.1594, + "step": 20710 + }, + { + "epoch": 0.04185571092086604, + "grad_norm": 18655.724609375, + "learning_rate": 4.1440000000000005e-06, + "loss": 16339.2125, + "step": 20720 + }, + { + "epoch": 0.04187591155354986, + "grad_norm": 23545.857421875, + "learning_rate": 4.146000000000001e-06, + "loss": 25116.3812, + "step": 20730 + }, + { + "epoch": 0.041896112186233674, + "grad_norm": 73599.375, + "learning_rate": 4.148000000000001e-06, + "loss": 13143.9641, + "step": 20740 + }, + { + "epoch": 0.04191631281891749, + "grad_norm": 112719.6953125, + "learning_rate": 4.15e-06, + "loss": 15219.8563, + "step": 20750 + }, + { + "epoch": 0.04193651345160131, + "grad_norm": 6048.029296875, + "learning_rate": 4.152e-06, + "loss": 15226.1297, + "step": 20760 + }, + { + "epoch": 0.04195671408428512, + "grad_norm": 27796.548828125, + "learning_rate": 4.154e-06, + "loss": 18155.8187, + "step": 20770 + }, + { + "epoch": 0.04197691471696893, + "grad_norm": 2799.111083984375, + "learning_rate": 4.1560000000000005e-06, + "loss": 2258.3342, + "step": 20780 + }, + { + "epoch": 0.04199711534965275, + "grad_norm": 32946.33984375, + "learning_rate": 4.158000000000001e-06, + "loss": 18698.0391, + "step": 20790 + }, + { + "epoch": 0.042017315982336566, + "grad_norm": 2718.86328125, + "learning_rate": 4.16e-06, + "loss": 13044.0063, + "step": 20800 + }, + { + "epoch": 0.042037516615020386, + "grad_norm": 80312.9140625, + "learning_rate": 4.162e-06, + "loss": 17490.7562, + "step": 20810 + }, + { + "epoch": 0.0420577172477042, + "grad_norm": 5824.21728515625, + "learning_rate": 4.164e-06, + "loss": 3901.3684, + "step": 20820 + }, + { + "epoch": 0.04207791788038801, + "grad_norm": 2027.311767578125, + "learning_rate": 4.1660000000000004e-06, + "loss": 14344.6031, + "step": 20830 + }, + { + "epoch": 0.04209811851307183, + "grad_norm": 2944.544921875, + "learning_rate": 4.168000000000001e-06, + "loss": 9711.9469, + "step": 20840 + }, + { + "epoch": 0.042118319145755645, + "grad_norm": 116369.734375, + "learning_rate": 4.17e-06, + "loss": 14112.7156, + "step": 20850 + }, + { + "epoch": 0.04213851977843946, + "grad_norm": 14804.2822265625, + "learning_rate": 4.172000000000001e-06, + "loss": 11922.993, + "step": 20860 + }, + { + "epoch": 0.04215872041112328, + "grad_norm": 7048.44970703125, + "learning_rate": 4.174e-06, + "loss": 7998.5188, + "step": 20870 + }, + { + "epoch": 0.04217892104380709, + "grad_norm": 47892.5, + "learning_rate": 4.176e-06, + "loss": 23666.3703, + "step": 20880 + }, + { + "epoch": 0.04219912167649091, + "grad_norm": 130807.9296875, + "learning_rate": 4.1780000000000005e-06, + "loss": 13156.6578, + "step": 20890 + }, + { + "epoch": 0.042219322309174724, + "grad_norm": 8433.388671875, + "learning_rate": 4.18e-06, + "loss": 6292.5273, + "step": 20900 + }, + { + "epoch": 0.04223952294185854, + "grad_norm": 11974.6181640625, + "learning_rate": 4.182000000000001e-06, + "loss": 8817.1938, + "step": 20910 + }, + { + "epoch": 0.04225972357454236, + "grad_norm": 30001.994140625, + "learning_rate": 4.184e-06, + "loss": 12887.6242, + "step": 20920 + }, + { + "epoch": 0.04227992420722617, + "grad_norm": 3090.501953125, + "learning_rate": 4.186e-06, + "loss": 13270.6812, + "step": 20930 + }, + { + "epoch": 0.04230012483990998, + "grad_norm": 269.6260986328125, + "learning_rate": 4.188e-06, + "loss": 2923.4391, + "step": 20940 + }, + { + "epoch": 0.0423203254725938, + "grad_norm": 8306.419921875, + "learning_rate": 4.1900000000000005e-06, + "loss": 12051.9023, + "step": 20950 + }, + { + "epoch": 0.042340526105277616, + "grad_norm": 30135.591796875, + "learning_rate": 4.192000000000001e-06, + "loss": 9140.5953, + "step": 20960 + }, + { + "epoch": 0.042360726737961436, + "grad_norm": 156607.953125, + "learning_rate": 4.194e-06, + "loss": 7086.3617, + "step": 20970 + }, + { + "epoch": 0.04238092737064525, + "grad_norm": 7695.5576171875, + "learning_rate": 4.196e-06, + "loss": 12938.8234, + "step": 20980 + }, + { + "epoch": 0.04240112800332906, + "grad_norm": 31040.923828125, + "learning_rate": 4.198e-06, + "loss": 14847.6641, + "step": 20990 + }, + { + "epoch": 0.04242132863601288, + "grad_norm": 139109.234375, + "learning_rate": 4.2000000000000004e-06, + "loss": 13542.6703, + "step": 21000 + }, + { + "epoch": 0.042441529268696696, + "grad_norm": 30914.525390625, + "learning_rate": 4.202000000000001e-06, + "loss": 6439.7383, + "step": 21010 + }, + { + "epoch": 0.04246172990138051, + "grad_norm": 1714.904541015625, + "learning_rate": 4.204e-06, + "loss": 12743.4711, + "step": 21020 + }, + { + "epoch": 0.04248193053406433, + "grad_norm": 41995.66015625, + "learning_rate": 4.206e-06, + "loss": 6314.2531, + "step": 21030 + }, + { + "epoch": 0.04250213116674814, + "grad_norm": 36586.640625, + "learning_rate": 4.208e-06, + "loss": 10529.4109, + "step": 21040 + }, + { + "epoch": 0.04252233179943196, + "grad_norm": 34526.8046875, + "learning_rate": 4.21e-06, + "loss": 13935.5875, + "step": 21050 + }, + { + "epoch": 0.042542532432115775, + "grad_norm": 6119.7587890625, + "learning_rate": 4.2120000000000005e-06, + "loss": 9054.8391, + "step": 21060 + }, + { + "epoch": 0.04256273306479959, + "grad_norm": 40456.3046875, + "learning_rate": 4.214000000000001e-06, + "loss": 6948.5422, + "step": 21070 + }, + { + "epoch": 0.04258293369748341, + "grad_norm": 7634.2080078125, + "learning_rate": 4.216e-06, + "loss": 8836.1703, + "step": 21080 + }, + { + "epoch": 0.04260313433016722, + "grad_norm": 3140.942626953125, + "learning_rate": 4.218e-06, + "loss": 7654.1922, + "step": 21090 + }, + { + "epoch": 0.042623334962851034, + "grad_norm": 6410.732421875, + "learning_rate": 4.22e-06, + "loss": 8469.3078, + "step": 21100 + }, + { + "epoch": 0.042643535595534854, + "grad_norm": 256694.109375, + "learning_rate": 4.222e-06, + "loss": 10554.8531, + "step": 21110 + }, + { + "epoch": 0.04266373622821867, + "grad_norm": 52062.56640625, + "learning_rate": 4.2240000000000006e-06, + "loss": 9150.7969, + "step": 21120 + }, + { + "epoch": 0.04268393686090249, + "grad_norm": 23197.201171875, + "learning_rate": 4.226e-06, + "loss": 6208.334, + "step": 21130 + }, + { + "epoch": 0.0427041374935863, + "grad_norm": 44334.18359375, + "learning_rate": 4.228000000000001e-06, + "loss": 7877.2109, + "step": 21140 + }, + { + "epoch": 0.04272433812627011, + "grad_norm": 33509.66015625, + "learning_rate": 4.23e-06, + "loss": 12645.1609, + "step": 21150 + }, + { + "epoch": 0.04274453875895393, + "grad_norm": 69084.359375, + "learning_rate": 4.232e-06, + "loss": 16003.4688, + "step": 21160 + }, + { + "epoch": 0.042764739391637746, + "grad_norm": 91257.0546875, + "learning_rate": 4.2340000000000005e-06, + "loss": 10619.8172, + "step": 21170 + }, + { + "epoch": 0.04278494002432156, + "grad_norm": 16756.958984375, + "learning_rate": 4.236e-06, + "loss": 9752.6953, + "step": 21180 + }, + { + "epoch": 0.04280514065700538, + "grad_norm": 9474.5595703125, + "learning_rate": 4.238000000000001e-06, + "loss": 4324.1895, + "step": 21190 + }, + { + "epoch": 0.04282534128968919, + "grad_norm": 23461.73828125, + "learning_rate": 4.24e-06, + "loss": 19610.5016, + "step": 21200 + }, + { + "epoch": 0.04284554192237301, + "grad_norm": 85599.921875, + "learning_rate": 4.242e-06, + "loss": 17368.2906, + "step": 21210 + }, + { + "epoch": 0.042865742555056825, + "grad_norm": 21658.755859375, + "learning_rate": 4.244e-06, + "loss": 19999.6828, + "step": 21220 + }, + { + "epoch": 0.04288594318774064, + "grad_norm": 118096.28125, + "learning_rate": 4.2460000000000005e-06, + "loss": 22762.6813, + "step": 21230 + }, + { + "epoch": 0.04290614382042446, + "grad_norm": 31201.52734375, + "learning_rate": 4.248000000000001e-06, + "loss": 13444.7344, + "step": 21240 + }, + { + "epoch": 0.04292634445310827, + "grad_norm": 8187.58056640625, + "learning_rate": 4.25e-06, + "loss": 10936.7641, + "step": 21250 + }, + { + "epoch": 0.042946545085792084, + "grad_norm": 63697.5078125, + "learning_rate": 4.252000000000001e-06, + "loss": 6295.0422, + "step": 21260 + }, + { + "epoch": 0.042966745718475904, + "grad_norm": 1474.8662109375, + "learning_rate": 4.254e-06, + "loss": 14998.4516, + "step": 21270 + }, + { + "epoch": 0.04298694635115972, + "grad_norm": 99035.8515625, + "learning_rate": 4.256e-06, + "loss": 7381.1781, + "step": 21280 + }, + { + "epoch": 0.04300714698384353, + "grad_norm": 29660.5, + "learning_rate": 4.2580000000000006e-06, + "loss": 20989.8906, + "step": 21290 + }, + { + "epoch": 0.04302734761652735, + "grad_norm": 41784.48046875, + "learning_rate": 4.26e-06, + "loss": 11433.1961, + "step": 21300 + }, + { + "epoch": 0.043047548249211164, + "grad_norm": 31726.56640625, + "learning_rate": 4.262000000000001e-06, + "loss": 15486.1734, + "step": 21310 + }, + { + "epoch": 0.043067748881894984, + "grad_norm": 1717.00390625, + "learning_rate": 4.264e-06, + "loss": 11146.4664, + "step": 21320 + }, + { + "epoch": 0.0430879495145788, + "grad_norm": 43853.265625, + "learning_rate": 4.266e-06, + "loss": 9636.8516, + "step": 21330 + }, + { + "epoch": 0.04310815014726261, + "grad_norm": 1612.3543701171875, + "learning_rate": 4.2680000000000005e-06, + "loss": 13886.1531, + "step": 21340 + }, + { + "epoch": 0.04312835077994643, + "grad_norm": 2741.07275390625, + "learning_rate": 4.270000000000001e-06, + "loss": 6378.1805, + "step": 21350 + }, + { + "epoch": 0.04314855141263024, + "grad_norm": 20510.453125, + "learning_rate": 4.272000000000001e-06, + "loss": 21804.1984, + "step": 21360 + }, + { + "epoch": 0.043168752045314056, + "grad_norm": 11803.6865234375, + "learning_rate": 4.274e-06, + "loss": 10562.6859, + "step": 21370 + }, + { + "epoch": 0.043188952677997876, + "grad_norm": 2499.533447265625, + "learning_rate": 4.276e-06, + "loss": 19543.9906, + "step": 21380 + }, + { + "epoch": 0.04320915331068169, + "grad_norm": 117143.3125, + "learning_rate": 4.278e-06, + "loss": 21986.6031, + "step": 21390 + }, + { + "epoch": 0.04322935394336551, + "grad_norm": 31093.31640625, + "learning_rate": 4.2800000000000005e-06, + "loss": 23096.2344, + "step": 21400 + }, + { + "epoch": 0.04324955457604932, + "grad_norm": 85155.3203125, + "learning_rate": 4.282000000000001e-06, + "loss": 9922.85, + "step": 21410 + }, + { + "epoch": 0.043269755208733135, + "grad_norm": 1749.982177734375, + "learning_rate": 4.284e-06, + "loss": 14299.0172, + "step": 21420 + }, + { + "epoch": 0.043289955841416955, + "grad_norm": 3096.843017578125, + "learning_rate": 4.286e-06, + "loss": 31658.4719, + "step": 21430 + }, + { + "epoch": 0.04331015647410077, + "grad_norm": 41550.62890625, + "learning_rate": 4.288e-06, + "loss": 10407.0125, + "step": 21440 + }, + { + "epoch": 0.04333035710678458, + "grad_norm": 15614.3828125, + "learning_rate": 4.2900000000000004e-06, + "loss": 6186.8297, + "step": 21450 + }, + { + "epoch": 0.0433505577394684, + "grad_norm": 85688.8984375, + "learning_rate": 4.292000000000001e-06, + "loss": 22421.1312, + "step": 21460 + }, + { + "epoch": 0.043370758372152214, + "grad_norm": 73303.28125, + "learning_rate": 4.294000000000001e-06, + "loss": 8494.3734, + "step": 21470 + }, + { + "epoch": 0.043390959004836034, + "grad_norm": 16907.33984375, + "learning_rate": 4.296e-06, + "loss": 12043.9852, + "step": 21480 + }, + { + "epoch": 0.04341115963751985, + "grad_norm": 20779.703125, + "learning_rate": 4.298e-06, + "loss": 19333.0047, + "step": 21490 + }, + { + "epoch": 0.04343136027020366, + "grad_norm": 22458.55859375, + "learning_rate": 4.3e-06, + "loss": 19712.4719, + "step": 21500 + }, + { + "epoch": 0.04345156090288748, + "grad_norm": 32785.5546875, + "learning_rate": 4.3020000000000005e-06, + "loss": 6423.275, + "step": 21510 + }, + { + "epoch": 0.04347176153557129, + "grad_norm": 355232.34375, + "learning_rate": 4.304000000000001e-06, + "loss": 39198.6062, + "step": 21520 + }, + { + "epoch": 0.043491962168255106, + "grad_norm": 18588.626953125, + "learning_rate": 4.306e-06, + "loss": 8591.7648, + "step": 21530 + }, + { + "epoch": 0.043512162800938926, + "grad_norm": 15749.2998046875, + "learning_rate": 4.308000000000001e-06, + "loss": 16972.1328, + "step": 21540 + }, + { + "epoch": 0.04353236343362274, + "grad_norm": 6551.7314453125, + "learning_rate": 4.31e-06, + "loss": 9718.9797, + "step": 21550 + }, + { + "epoch": 0.04355256406630656, + "grad_norm": 9424.5263671875, + "learning_rate": 4.312e-06, + "loss": 4919.3328, + "step": 21560 + }, + { + "epoch": 0.04357276469899037, + "grad_norm": 22464.9140625, + "learning_rate": 4.3140000000000005e-06, + "loss": 11921.3633, + "step": 21570 + }, + { + "epoch": 0.043592965331674186, + "grad_norm": 49063.19921875, + "learning_rate": 4.316e-06, + "loss": 12917.9055, + "step": 21580 + }, + { + "epoch": 0.043613165964358006, + "grad_norm": 167337.9375, + "learning_rate": 4.318000000000001e-06, + "loss": 14237.2188, + "step": 21590 + }, + { + "epoch": 0.04363336659704182, + "grad_norm": 100200.5234375, + "learning_rate": 4.32e-06, + "loss": 12499.0258, + "step": 21600 + }, + { + "epoch": 0.04365356722972563, + "grad_norm": 66631.046875, + "learning_rate": 4.322e-06, + "loss": 17944.1766, + "step": 21610 + }, + { + "epoch": 0.04367376786240945, + "grad_norm": 10976.515625, + "learning_rate": 4.3240000000000004e-06, + "loss": 17598.2031, + "step": 21620 + }, + { + "epoch": 0.043693968495093265, + "grad_norm": 21437.3515625, + "learning_rate": 4.326000000000001e-06, + "loss": 23867.6109, + "step": 21630 + }, + { + "epoch": 0.043714169127777085, + "grad_norm": 64572.828125, + "learning_rate": 4.328000000000001e-06, + "loss": 10591.2, + "step": 21640 + }, + { + "epoch": 0.0437343697604609, + "grad_norm": 3180.4130859375, + "learning_rate": 4.33e-06, + "loss": 14655.6891, + "step": 21650 + }, + { + "epoch": 0.04375457039314471, + "grad_norm": 1905.07275390625, + "learning_rate": 4.332e-06, + "loss": 15500.425, + "step": 21660 + }, + { + "epoch": 0.04377477102582853, + "grad_norm": 0.0, + "learning_rate": 4.334e-06, + "loss": 8904.1523, + "step": 21670 + }, + { + "epoch": 0.043794971658512344, + "grad_norm": 318263.875, + "learning_rate": 4.3360000000000005e-06, + "loss": 13068.8578, + "step": 21680 + }, + { + "epoch": 0.04381517229119616, + "grad_norm": 24284.251953125, + "learning_rate": 4.338000000000001e-06, + "loss": 22771.5281, + "step": 21690 + }, + { + "epoch": 0.04383537292387998, + "grad_norm": 19764.05859375, + "learning_rate": 4.34e-06, + "loss": 19807.2766, + "step": 21700 + }, + { + "epoch": 0.04385557355656379, + "grad_norm": 26999.052734375, + "learning_rate": 4.342e-06, + "loss": 11951.9992, + "step": 21710 + }, + { + "epoch": 0.04387577418924761, + "grad_norm": 22866.236328125, + "learning_rate": 4.344e-06, + "loss": 8139.7625, + "step": 21720 + }, + { + "epoch": 0.04389597482193142, + "grad_norm": 170535.59375, + "learning_rate": 4.346e-06, + "loss": 19918.5141, + "step": 21730 + }, + { + "epoch": 0.043916175454615236, + "grad_norm": 20284.21875, + "learning_rate": 4.3480000000000006e-06, + "loss": 17538.5109, + "step": 21740 + }, + { + "epoch": 0.043936376087299056, + "grad_norm": 148842.21875, + "learning_rate": 4.350000000000001e-06, + "loss": 22193.3812, + "step": 21750 + }, + { + "epoch": 0.04395657671998287, + "grad_norm": 183993.734375, + "learning_rate": 4.352e-06, + "loss": 12120.5016, + "step": 21760 + }, + { + "epoch": 0.04397677735266668, + "grad_norm": 8657.44140625, + "learning_rate": 4.354e-06, + "loss": 13770.1422, + "step": 21770 + }, + { + "epoch": 0.0439969779853505, + "grad_norm": 2019.0943603515625, + "learning_rate": 4.356e-06, + "loss": 7950.6875, + "step": 21780 + }, + { + "epoch": 0.044017178618034315, + "grad_norm": 23906.865234375, + "learning_rate": 4.3580000000000005e-06, + "loss": 11283.1977, + "step": 21790 + }, + { + "epoch": 0.044037379250718135, + "grad_norm": 23132.623046875, + "learning_rate": 4.360000000000001e-06, + "loss": 13281.6406, + "step": 21800 + }, + { + "epoch": 0.04405757988340195, + "grad_norm": 621.9696044921875, + "learning_rate": 4.362e-06, + "loss": 4077.6203, + "step": 21810 + }, + { + "epoch": 0.04407778051608576, + "grad_norm": 58676.375, + "learning_rate": 4.364e-06, + "loss": 7872.6359, + "step": 21820 + }, + { + "epoch": 0.04409798114876958, + "grad_norm": 281243.3125, + "learning_rate": 4.366e-06, + "loss": 19035.9469, + "step": 21830 + }, + { + "epoch": 0.044118181781453394, + "grad_norm": 18871.080078125, + "learning_rate": 4.368e-06, + "loss": 6085.5828, + "step": 21840 + }, + { + "epoch": 0.04413838241413721, + "grad_norm": 98236.703125, + "learning_rate": 4.3700000000000005e-06, + "loss": 13880.4125, + "step": 21850 + }, + { + "epoch": 0.04415858304682103, + "grad_norm": 22750.599609375, + "learning_rate": 4.372e-06, + "loss": 5749.2172, + "step": 21860 + }, + { + "epoch": 0.04417878367950484, + "grad_norm": 10277.16015625, + "learning_rate": 4.374000000000001e-06, + "loss": 11020.4305, + "step": 21870 + }, + { + "epoch": 0.04419898431218866, + "grad_norm": 59725.453125, + "learning_rate": 4.376e-06, + "loss": 26248.4844, + "step": 21880 + }, + { + "epoch": 0.044219184944872474, + "grad_norm": 125661.484375, + "learning_rate": 4.378e-06, + "loss": 8892.6797, + "step": 21890 + }, + { + "epoch": 0.04423938557755629, + "grad_norm": 41888.84375, + "learning_rate": 4.38e-06, + "loss": 4305.673, + "step": 21900 + }, + { + "epoch": 0.04425958621024011, + "grad_norm": 35684.9375, + "learning_rate": 4.382e-06, + "loss": 3610.3301, + "step": 21910 + }, + { + "epoch": 0.04427978684292392, + "grad_norm": 8210.5576171875, + "learning_rate": 4.384000000000001e-06, + "loss": 9119.2836, + "step": 21920 + }, + { + "epoch": 0.04429998747560773, + "grad_norm": 20171.61328125, + "learning_rate": 4.386e-06, + "loss": 8857.5922, + "step": 21930 + }, + { + "epoch": 0.04432018810829155, + "grad_norm": 2586.271484375, + "learning_rate": 4.388e-06, + "loss": 6533.2273, + "step": 21940 + }, + { + "epoch": 0.044340388740975366, + "grad_norm": 222419.703125, + "learning_rate": 4.39e-06, + "loss": 13825.2188, + "step": 21950 + }, + { + "epoch": 0.044360589373659186, + "grad_norm": 1487.197265625, + "learning_rate": 4.3920000000000005e-06, + "loss": 20913.9422, + "step": 21960 + }, + { + "epoch": 0.044380790006343, + "grad_norm": 142774.546875, + "learning_rate": 4.394000000000001e-06, + "loss": 21317.4938, + "step": 21970 + }, + { + "epoch": 0.04440099063902681, + "grad_norm": 2521.980224609375, + "learning_rate": 4.396e-06, + "loss": 19447.7562, + "step": 21980 + }, + { + "epoch": 0.04442119127171063, + "grad_norm": 39835.7109375, + "learning_rate": 4.398000000000001e-06, + "loss": 24528.5703, + "step": 21990 + }, + { + "epoch": 0.044441391904394445, + "grad_norm": 2506.085205078125, + "learning_rate": 4.4e-06, + "loss": 4247.1438, + "step": 22000 + }, + { + "epoch": 0.04446159253707826, + "grad_norm": 137625.046875, + "learning_rate": 4.402e-06, + "loss": 16272.7797, + "step": 22010 + }, + { + "epoch": 0.04448179316976208, + "grad_norm": 370462.28125, + "learning_rate": 4.4040000000000005e-06, + "loss": 25849.4062, + "step": 22020 + }, + { + "epoch": 0.04450199380244589, + "grad_norm": 2894.3984375, + "learning_rate": 4.406000000000001e-06, + "loss": 15562.6516, + "step": 22030 + }, + { + "epoch": 0.04452219443512971, + "grad_norm": 89519.046875, + "learning_rate": 4.408000000000001e-06, + "loss": 8052.7766, + "step": 22040 + }, + { + "epoch": 0.044542395067813524, + "grad_norm": 343601.8125, + "learning_rate": 4.41e-06, + "loss": 22358.3203, + "step": 22050 + }, + { + "epoch": 0.04456259570049734, + "grad_norm": 15848.3486328125, + "learning_rate": 4.412e-06, + "loss": 14482.5172, + "step": 22060 + }, + { + "epoch": 0.04458279633318116, + "grad_norm": 35708.921875, + "learning_rate": 4.4140000000000004e-06, + "loss": 14014.6516, + "step": 22070 + }, + { + "epoch": 0.04460299696586497, + "grad_norm": 56153.34765625, + "learning_rate": 4.416000000000001e-06, + "loss": 9818.1492, + "step": 22080 + }, + { + "epoch": 0.04462319759854878, + "grad_norm": 9963.20703125, + "learning_rate": 4.418000000000001e-06, + "loss": 13191.1063, + "step": 22090 + }, + { + "epoch": 0.0446433982312326, + "grad_norm": 86954.3984375, + "learning_rate": 4.42e-06, + "loss": 10410.3062, + "step": 22100 + }, + { + "epoch": 0.044663598863916416, + "grad_norm": 22645.3984375, + "learning_rate": 4.422e-06, + "loss": 8568.1883, + "step": 22110 + }, + { + "epoch": 0.044683799496600236, + "grad_norm": 2992.58349609375, + "learning_rate": 4.424e-06, + "loss": 4958.7031, + "step": 22120 + }, + { + "epoch": 0.04470400012928405, + "grad_norm": 23164.978515625, + "learning_rate": 4.4260000000000005e-06, + "loss": 6911.3813, + "step": 22130 + }, + { + "epoch": 0.04472420076196786, + "grad_norm": 509.5948791503906, + "learning_rate": 4.428000000000001e-06, + "loss": 9239.3203, + "step": 22140 + }, + { + "epoch": 0.04474440139465168, + "grad_norm": 128388.640625, + "learning_rate": 4.430000000000001e-06, + "loss": 12262.6703, + "step": 22150 + }, + { + "epoch": 0.044764602027335496, + "grad_norm": 30027.6875, + "learning_rate": 4.432e-06, + "loss": 9843.7359, + "step": 22160 + }, + { + "epoch": 0.04478480266001931, + "grad_norm": 7572.00244140625, + "learning_rate": 4.434e-06, + "loss": 10178.6969, + "step": 22170 + }, + { + "epoch": 0.04480500329270313, + "grad_norm": 172932.4375, + "learning_rate": 4.436e-06, + "loss": 11016.9062, + "step": 22180 + }, + { + "epoch": 0.04482520392538694, + "grad_norm": 16828.158203125, + "learning_rate": 4.438e-06, + "loss": 7405.7188, + "step": 22190 + }, + { + "epoch": 0.04484540455807076, + "grad_norm": 88544.5625, + "learning_rate": 4.440000000000001e-06, + "loss": 22122.2562, + "step": 22200 + }, + { + "epoch": 0.044865605190754575, + "grad_norm": 0.0, + "learning_rate": 4.442e-06, + "loss": 16921.8078, + "step": 22210 + }, + { + "epoch": 0.04488580582343839, + "grad_norm": 114444.2265625, + "learning_rate": 4.444e-06, + "loss": 13026.8211, + "step": 22220 + }, + { + "epoch": 0.04490600645612221, + "grad_norm": 410151.21875, + "learning_rate": 4.446e-06, + "loss": 18073.4188, + "step": 22230 + }, + { + "epoch": 0.04492620708880602, + "grad_norm": 25376.658203125, + "learning_rate": 4.4480000000000004e-06, + "loss": 8200.7547, + "step": 22240 + }, + { + "epoch": 0.044946407721489834, + "grad_norm": 2573.40771484375, + "learning_rate": 4.450000000000001e-06, + "loss": 2582.7209, + "step": 22250 + }, + { + "epoch": 0.044966608354173654, + "grad_norm": 25378.966796875, + "learning_rate": 4.452e-06, + "loss": 10779.5156, + "step": 22260 + }, + { + "epoch": 0.04498680898685747, + "grad_norm": 35221.1796875, + "learning_rate": 4.454000000000001e-06, + "loss": 8401.7344, + "step": 22270 + }, + { + "epoch": 0.04500700961954129, + "grad_norm": 10618.6435546875, + "learning_rate": 4.456e-06, + "loss": 17495.2391, + "step": 22280 + }, + { + "epoch": 0.0450272102522251, + "grad_norm": 470344.40625, + "learning_rate": 4.458e-06, + "loss": 36151.7438, + "step": 22290 + }, + { + "epoch": 0.04504741088490891, + "grad_norm": 8741.7177734375, + "learning_rate": 4.4600000000000005e-06, + "loss": 10645.7602, + "step": 22300 + }, + { + "epoch": 0.04506761151759273, + "grad_norm": 2574.076416015625, + "learning_rate": 4.462e-06, + "loss": 17353.8812, + "step": 22310 + }, + { + "epoch": 0.045087812150276546, + "grad_norm": 57246.84765625, + "learning_rate": 4.464000000000001e-06, + "loss": 17351.9375, + "step": 22320 + }, + { + "epoch": 0.04510801278296036, + "grad_norm": 44556.13671875, + "learning_rate": 4.466e-06, + "loss": 13917.8891, + "step": 22330 + }, + { + "epoch": 0.04512821341564418, + "grad_norm": 27071.501953125, + "learning_rate": 4.468e-06, + "loss": 8869.032, + "step": 22340 + }, + { + "epoch": 0.04514841404832799, + "grad_norm": 23963.7421875, + "learning_rate": 4.47e-06, + "loss": 21010.4109, + "step": 22350 + }, + { + "epoch": 0.04516861468101181, + "grad_norm": 76169.421875, + "learning_rate": 4.4720000000000006e-06, + "loss": 7687.3008, + "step": 22360 + }, + { + "epoch": 0.045188815313695625, + "grad_norm": 9651.1611328125, + "learning_rate": 4.474000000000001e-06, + "loss": 8302.1203, + "step": 22370 + }, + { + "epoch": 0.04520901594637944, + "grad_norm": 28242.08984375, + "learning_rate": 4.476e-06, + "loss": 17652.9594, + "step": 22380 + }, + { + "epoch": 0.04522921657906326, + "grad_norm": 384.629638671875, + "learning_rate": 4.478e-06, + "loss": 19880.2078, + "step": 22390 + }, + { + "epoch": 0.04524941721174707, + "grad_norm": 13215.3564453125, + "learning_rate": 4.48e-06, + "loss": 22434.4234, + "step": 22400 + }, + { + "epoch": 0.045269617844430884, + "grad_norm": 39647.8203125, + "learning_rate": 4.4820000000000005e-06, + "loss": 15670.4359, + "step": 22410 + }, + { + "epoch": 0.045289818477114704, + "grad_norm": 53902.640625, + "learning_rate": 4.484000000000001e-06, + "loss": 8310.7734, + "step": 22420 + }, + { + "epoch": 0.04531001910979852, + "grad_norm": 3304.2978515625, + "learning_rate": 4.486000000000001e-06, + "loss": 13313.9719, + "step": 22430 + }, + { + "epoch": 0.04533021974248234, + "grad_norm": 28249.509765625, + "learning_rate": 4.488e-06, + "loss": 12831.893, + "step": 22440 + }, + { + "epoch": 0.04535042037516615, + "grad_norm": 14902.943359375, + "learning_rate": 4.49e-06, + "loss": 12316.8711, + "step": 22450 + }, + { + "epoch": 0.045370621007849964, + "grad_norm": 26437.6328125, + "learning_rate": 4.492e-06, + "loss": 10896.0797, + "step": 22460 + }, + { + "epoch": 0.045390821640533784, + "grad_norm": 306896.75, + "learning_rate": 4.4940000000000005e-06, + "loss": 14650.9531, + "step": 22470 + }, + { + "epoch": 0.0454110222732176, + "grad_norm": 186634.46875, + "learning_rate": 4.496000000000001e-06, + "loss": 28835.8281, + "step": 22480 + }, + { + "epoch": 0.04543122290590141, + "grad_norm": 30199.931640625, + "learning_rate": 4.498e-06, + "loss": 8533.6633, + "step": 22490 + }, + { + "epoch": 0.04545142353858523, + "grad_norm": 11017.78125, + "learning_rate": 4.5e-06, + "loss": 10962.5344, + "step": 22500 + }, + { + "epoch": 0.04547162417126904, + "grad_norm": 161771.65625, + "learning_rate": 4.502e-06, + "loss": 10332.8281, + "step": 22510 + }, + { + "epoch": 0.04549182480395286, + "grad_norm": 46667.03515625, + "learning_rate": 4.504e-06, + "loss": 18335.9406, + "step": 22520 + }, + { + "epoch": 0.045512025436636676, + "grad_norm": 1677.186767578125, + "learning_rate": 4.5060000000000006e-06, + "loss": 15088.0203, + "step": 22530 + }, + { + "epoch": 0.04553222606932049, + "grad_norm": 206390.09375, + "learning_rate": 4.508e-06, + "loss": 13442.9266, + "step": 22540 + }, + { + "epoch": 0.04555242670200431, + "grad_norm": 4472.89306640625, + "learning_rate": 4.510000000000001e-06, + "loss": 7626.568, + "step": 22550 + }, + { + "epoch": 0.04557262733468812, + "grad_norm": 1434.3111572265625, + "learning_rate": 4.512e-06, + "loss": 30650.4094, + "step": 22560 + }, + { + "epoch": 0.045592827967371935, + "grad_norm": 3322.546875, + "learning_rate": 4.514e-06, + "loss": 17247.3609, + "step": 22570 + }, + { + "epoch": 0.045613028600055755, + "grad_norm": 40714.9609375, + "learning_rate": 4.5160000000000005e-06, + "loss": 7190.3266, + "step": 22580 + }, + { + "epoch": 0.04563322923273957, + "grad_norm": 64316.7734375, + "learning_rate": 4.518e-06, + "loss": 10807.5266, + "step": 22590 + }, + { + "epoch": 0.04565342986542339, + "grad_norm": 4700.7880859375, + "learning_rate": 4.520000000000001e-06, + "loss": 3208.1447, + "step": 22600 + }, + { + "epoch": 0.0456736304981072, + "grad_norm": 5608.775390625, + "learning_rate": 4.522e-06, + "loss": 9423.9117, + "step": 22610 + }, + { + "epoch": 0.045693831130791014, + "grad_norm": 8235.2041015625, + "learning_rate": 4.524e-06, + "loss": 10215.4422, + "step": 22620 + }, + { + "epoch": 0.045714031763474834, + "grad_norm": 46429.953125, + "learning_rate": 4.526e-06, + "loss": 10002.0, + "step": 22630 + }, + { + "epoch": 0.04573423239615865, + "grad_norm": 12061.98828125, + "learning_rate": 4.5280000000000005e-06, + "loss": 8069.5516, + "step": 22640 + }, + { + "epoch": 0.04575443302884246, + "grad_norm": 26459.775390625, + "learning_rate": 4.530000000000001e-06, + "loss": 14492.95, + "step": 22650 + }, + { + "epoch": 0.04577463366152628, + "grad_norm": 336208.125, + "learning_rate": 4.532e-06, + "loss": 10865.3195, + "step": 22660 + }, + { + "epoch": 0.04579483429421009, + "grad_norm": 1580.9244384765625, + "learning_rate": 4.534000000000001e-06, + "loss": 9718.0656, + "step": 22670 + }, + { + "epoch": 0.04581503492689391, + "grad_norm": 196138.859375, + "learning_rate": 4.536e-06, + "loss": 14109.75, + "step": 22680 + }, + { + "epoch": 0.045835235559577726, + "grad_norm": 2821.609375, + "learning_rate": 4.5380000000000004e-06, + "loss": 14006.4766, + "step": 22690 + }, + { + "epoch": 0.04585543619226154, + "grad_norm": 1183.8643798828125, + "learning_rate": 4.540000000000001e-06, + "loss": 26664.725, + "step": 22700 + }, + { + "epoch": 0.04587563682494536, + "grad_norm": 100677.2265625, + "learning_rate": 4.542e-06, + "loss": 16388.6609, + "step": 22710 + }, + { + "epoch": 0.04589583745762917, + "grad_norm": 12260.84375, + "learning_rate": 4.544000000000001e-06, + "loss": 16228.1344, + "step": 22720 + }, + { + "epoch": 0.045916038090312986, + "grad_norm": 110546.875, + "learning_rate": 4.546e-06, + "loss": 17217.1031, + "step": 22730 + }, + { + "epoch": 0.045936238722996806, + "grad_norm": 441.9888610839844, + "learning_rate": 4.548e-06, + "loss": 4431.3254, + "step": 22740 + }, + { + "epoch": 0.04595643935568062, + "grad_norm": 5190.494140625, + "learning_rate": 4.5500000000000005e-06, + "loss": 8309.3727, + "step": 22750 + }, + { + "epoch": 0.04597663998836444, + "grad_norm": 2384.53271484375, + "learning_rate": 4.552000000000001e-06, + "loss": 14981.8281, + "step": 22760 + }, + { + "epoch": 0.04599684062104825, + "grad_norm": 4087.918212890625, + "learning_rate": 4.554000000000001e-06, + "loss": 15983.2844, + "step": 22770 + }, + { + "epoch": 0.046017041253732065, + "grad_norm": 17260.669921875, + "learning_rate": 4.556e-06, + "loss": 10630.0906, + "step": 22780 + }, + { + "epoch": 0.046037241886415885, + "grad_norm": 32316.52734375, + "learning_rate": 4.558e-06, + "loss": 7501.1687, + "step": 22790 + }, + { + "epoch": 0.0460574425190997, + "grad_norm": 92411.1015625, + "learning_rate": 4.56e-06, + "loss": 19955.9, + "step": 22800 + }, + { + "epoch": 0.04607764315178351, + "grad_norm": 41275.3984375, + "learning_rate": 4.5620000000000005e-06, + "loss": 8052.0461, + "step": 22810 + }, + { + "epoch": 0.04609784378446733, + "grad_norm": 18561.72265625, + "learning_rate": 4.564e-06, + "loss": 7513.7203, + "step": 22820 + }, + { + "epoch": 0.046118044417151144, + "grad_norm": 4575.74560546875, + "learning_rate": 4.566000000000001e-06, + "loss": 14703.1625, + "step": 22830 + }, + { + "epoch": 0.046138245049834964, + "grad_norm": 36347.6640625, + "learning_rate": 4.568e-06, + "loss": 19191.0062, + "step": 22840 + }, + { + "epoch": 0.04615844568251878, + "grad_norm": 65948.34375, + "learning_rate": 4.57e-06, + "loss": 10696.8766, + "step": 22850 + }, + { + "epoch": 0.04617864631520259, + "grad_norm": 61017.45703125, + "learning_rate": 4.5720000000000004e-06, + "loss": 13021.5047, + "step": 22860 + }, + { + "epoch": 0.04619884694788641, + "grad_norm": 348983.59375, + "learning_rate": 4.574e-06, + "loss": 14541.7563, + "step": 22870 + }, + { + "epoch": 0.04621904758057022, + "grad_norm": 34017.1953125, + "learning_rate": 4.576000000000001e-06, + "loss": 10857.4195, + "step": 22880 + }, + { + "epoch": 0.046239248213254036, + "grad_norm": 3246.06689453125, + "learning_rate": 4.578e-06, + "loss": 14351.1891, + "step": 22890 + }, + { + "epoch": 0.046259448845937856, + "grad_norm": 76350.6484375, + "learning_rate": 4.58e-06, + "loss": 9696.0352, + "step": 22900 + }, + { + "epoch": 0.04627964947862167, + "grad_norm": 38696.31640625, + "learning_rate": 4.582e-06, + "loss": 20514.1875, + "step": 22910 + }, + { + "epoch": 0.04629985011130549, + "grad_norm": 23374.693359375, + "learning_rate": 4.5840000000000005e-06, + "loss": 15390.0922, + "step": 22920 + }, + { + "epoch": 0.0463200507439893, + "grad_norm": 18700.576171875, + "learning_rate": 4.586000000000001e-06, + "loss": 22375.1578, + "step": 22930 + }, + { + "epoch": 0.046340251376673115, + "grad_norm": 29480.603515625, + "learning_rate": 4.588e-06, + "loss": 4178.4625, + "step": 22940 + }, + { + "epoch": 0.046360452009356935, + "grad_norm": 25715.578125, + "learning_rate": 4.590000000000001e-06, + "loss": 12328.3344, + "step": 22950 + }, + { + "epoch": 0.04638065264204075, + "grad_norm": 4740.72216796875, + "learning_rate": 4.592e-06, + "loss": 22019.1734, + "step": 22960 + }, + { + "epoch": 0.04640085327472456, + "grad_norm": 42083.79296875, + "learning_rate": 4.594e-06, + "loss": 8078.1469, + "step": 22970 + }, + { + "epoch": 0.04642105390740838, + "grad_norm": 22471.78125, + "learning_rate": 4.5960000000000006e-06, + "loss": 10118.9062, + "step": 22980 + }, + { + "epoch": 0.046441254540092194, + "grad_norm": 41308.140625, + "learning_rate": 4.598e-06, + "loss": 12690.7687, + "step": 22990 + }, + { + "epoch": 0.046461455172776014, + "grad_norm": 67031.5, + "learning_rate": 4.600000000000001e-06, + "loss": 14768.2281, + "step": 23000 + }, + { + "epoch": 0.04648165580545983, + "grad_norm": 21305.5625, + "learning_rate": 4.602e-06, + "loss": 7931.907, + "step": 23010 + }, + { + "epoch": 0.04650185643814364, + "grad_norm": 28971.8515625, + "learning_rate": 4.604e-06, + "loss": 12958.4672, + "step": 23020 + }, + { + "epoch": 0.04652205707082746, + "grad_norm": 1144.9761962890625, + "learning_rate": 4.6060000000000005e-06, + "loss": 17498.4328, + "step": 23030 + }, + { + "epoch": 0.046542257703511274, + "grad_norm": 2679.82861328125, + "learning_rate": 4.608000000000001e-06, + "loss": 8609.5883, + "step": 23040 + }, + { + "epoch": 0.04656245833619509, + "grad_norm": 50155.05859375, + "learning_rate": 4.610000000000001e-06, + "loss": 11407.9875, + "step": 23050 + }, + { + "epoch": 0.04658265896887891, + "grad_norm": 9550.69921875, + "learning_rate": 4.612e-06, + "loss": 5148.0551, + "step": 23060 + }, + { + "epoch": 0.04660285960156272, + "grad_norm": 1348.2547607421875, + "learning_rate": 4.614e-06, + "loss": 21226.9641, + "step": 23070 + }, + { + "epoch": 0.04662306023424654, + "grad_norm": 21961.93359375, + "learning_rate": 4.616e-06, + "loss": 8191.9172, + "step": 23080 + }, + { + "epoch": 0.04664326086693035, + "grad_norm": 80573.9375, + "learning_rate": 4.6180000000000005e-06, + "loss": 9877.7406, + "step": 23090 + }, + { + "epoch": 0.046663461499614166, + "grad_norm": 28072.294921875, + "learning_rate": 4.620000000000001e-06, + "loss": 18852.0219, + "step": 23100 + }, + { + "epoch": 0.046683662132297986, + "grad_norm": 40290.80859375, + "learning_rate": 4.622e-06, + "loss": 20566.5344, + "step": 23110 + }, + { + "epoch": 0.0467038627649818, + "grad_norm": 80584.9453125, + "learning_rate": 4.624e-06, + "loss": 17608.2969, + "step": 23120 + }, + { + "epoch": 0.04672406339766561, + "grad_norm": 75388.8828125, + "learning_rate": 4.626e-06, + "loss": 13571.5172, + "step": 23130 + }, + { + "epoch": 0.04674426403034943, + "grad_norm": 641.8245849609375, + "learning_rate": 4.628e-06, + "loss": 16700.4313, + "step": 23140 + }, + { + "epoch": 0.046764464663033245, + "grad_norm": 88915.2265625, + "learning_rate": 4.6300000000000006e-06, + "loss": 6681.0086, + "step": 23150 + }, + { + "epoch": 0.046784665295717065, + "grad_norm": 9814.966796875, + "learning_rate": 4.632000000000001e-06, + "loss": 6161.5977, + "step": 23160 + }, + { + "epoch": 0.04680486592840088, + "grad_norm": 1226.950439453125, + "learning_rate": 4.634e-06, + "loss": 11999.7164, + "step": 23170 + }, + { + "epoch": 0.04682506656108469, + "grad_norm": 10530.9560546875, + "learning_rate": 4.636e-06, + "loss": 7544.5773, + "step": 23180 + }, + { + "epoch": 0.04684526719376851, + "grad_norm": 3445.498779296875, + "learning_rate": 4.638e-06, + "loss": 11770.6172, + "step": 23190 + }, + { + "epoch": 0.046865467826452324, + "grad_norm": 64231.1328125, + "learning_rate": 4.6400000000000005e-06, + "loss": 9008.7891, + "step": 23200 + }, + { + "epoch": 0.04688566845913614, + "grad_norm": 161235.125, + "learning_rate": 4.642000000000001e-06, + "loss": 7835.0492, + "step": 23210 + }, + { + "epoch": 0.04690586909181996, + "grad_norm": 1471.0379638671875, + "learning_rate": 4.644e-06, + "loss": 2715.652, + "step": 23220 + }, + { + "epoch": 0.04692606972450377, + "grad_norm": 15864.935546875, + "learning_rate": 4.646000000000001e-06, + "loss": 13016.9031, + "step": 23230 + }, + { + "epoch": 0.04694627035718759, + "grad_norm": 8128.00244140625, + "learning_rate": 4.648e-06, + "loss": 19299.5484, + "step": 23240 + }, + { + "epoch": 0.0469664709898714, + "grad_norm": 30822.337890625, + "learning_rate": 4.65e-06, + "loss": 16186.225, + "step": 23250 + }, + { + "epoch": 0.046986671622555216, + "grad_norm": 81676.625, + "learning_rate": 4.6520000000000005e-06, + "loss": 12368.2734, + "step": 23260 + }, + { + "epoch": 0.047006872255239036, + "grad_norm": 2715.097412109375, + "learning_rate": 4.654e-06, + "loss": 21662.5938, + "step": 23270 + }, + { + "epoch": 0.04702707288792285, + "grad_norm": 8161.8916015625, + "learning_rate": 4.656000000000001e-06, + "loss": 4275.3871, + "step": 23280 + }, + { + "epoch": 0.04704727352060666, + "grad_norm": 77004.6015625, + "learning_rate": 4.658e-06, + "loss": 18927.3047, + "step": 23290 + }, + { + "epoch": 0.04706747415329048, + "grad_norm": 143609.234375, + "learning_rate": 4.66e-06, + "loss": 10165.1039, + "step": 23300 + }, + { + "epoch": 0.047087674785974296, + "grad_norm": 8845.984375, + "learning_rate": 4.6620000000000004e-06, + "loss": 3688.909, + "step": 23310 + }, + { + "epoch": 0.047107875418658116, + "grad_norm": 2374.22216796875, + "learning_rate": 4.664000000000001e-06, + "loss": 6006.9266, + "step": 23320 + }, + { + "epoch": 0.04712807605134193, + "grad_norm": 3284.995361328125, + "learning_rate": 4.666000000000001e-06, + "loss": 6135.2309, + "step": 23330 + }, + { + "epoch": 0.04714827668402574, + "grad_norm": 4358.5419921875, + "learning_rate": 4.668e-06, + "loss": 6882.8219, + "step": 23340 + }, + { + "epoch": 0.04716847731670956, + "grad_norm": 4814.08642578125, + "learning_rate": 4.670000000000001e-06, + "loss": 19053.8953, + "step": 23350 + }, + { + "epoch": 0.047188677949393375, + "grad_norm": 21208.869140625, + "learning_rate": 4.672e-06, + "loss": 15273.7719, + "step": 23360 + }, + { + "epoch": 0.04720887858207719, + "grad_norm": 71826.9296875, + "learning_rate": 4.6740000000000005e-06, + "loss": 9454.1539, + "step": 23370 + }, + { + "epoch": 0.04722907921476101, + "grad_norm": 159275.109375, + "learning_rate": 4.676000000000001e-06, + "loss": 30309.8812, + "step": 23380 + }, + { + "epoch": 0.04724927984744482, + "grad_norm": 324810.6875, + "learning_rate": 4.678e-06, + "loss": 14763.05, + "step": 23390 + }, + { + "epoch": 0.04726948048012864, + "grad_norm": 1016.7255249023438, + "learning_rate": 4.680000000000001e-06, + "loss": 9449.3992, + "step": 23400 + }, + { + "epoch": 0.047289681112812454, + "grad_norm": 109321.609375, + "learning_rate": 4.682e-06, + "loss": 10746.3305, + "step": 23410 + }, + { + "epoch": 0.04730988174549627, + "grad_norm": 2391.586181640625, + "learning_rate": 4.684e-06, + "loss": 12563.8727, + "step": 23420 + }, + { + "epoch": 0.04733008237818009, + "grad_norm": 8655.45703125, + "learning_rate": 4.6860000000000005e-06, + "loss": 8827.8094, + "step": 23430 + }, + { + "epoch": 0.0473502830108639, + "grad_norm": 7321.69482421875, + "learning_rate": 4.688000000000001e-06, + "loss": 11698.3008, + "step": 23440 + }, + { + "epoch": 0.04737048364354771, + "grad_norm": 148912.6875, + "learning_rate": 4.69e-06, + "loss": 20568.0375, + "step": 23450 + }, + { + "epoch": 0.04739068427623153, + "grad_norm": 1838.76123046875, + "learning_rate": 4.692e-06, + "loss": 5796.5871, + "step": 23460 + }, + { + "epoch": 0.047410884908915346, + "grad_norm": 18355.5703125, + "learning_rate": 4.694e-06, + "loss": 10434.507, + "step": 23470 + }, + { + "epoch": 0.047431085541599166, + "grad_norm": 21350.595703125, + "learning_rate": 4.6960000000000004e-06, + "loss": 9902.3641, + "step": 23480 + }, + { + "epoch": 0.04745128617428298, + "grad_norm": 2083.095947265625, + "learning_rate": 4.698000000000001e-06, + "loss": 19982.7453, + "step": 23490 + }, + { + "epoch": 0.04747148680696679, + "grad_norm": 33590.94921875, + "learning_rate": 4.7e-06, + "loss": 24202.0312, + "step": 23500 + }, + { + "epoch": 0.04749168743965061, + "grad_norm": 23567.181640625, + "learning_rate": 4.702e-06, + "loss": 9080.9617, + "step": 23510 + }, + { + "epoch": 0.047511888072334425, + "grad_norm": 29948.134765625, + "learning_rate": 4.704e-06, + "loss": 8343.3469, + "step": 23520 + }, + { + "epoch": 0.04753208870501824, + "grad_norm": 24219.95703125, + "learning_rate": 4.706e-06, + "loss": 8231.0211, + "step": 23530 + }, + { + "epoch": 0.04755228933770206, + "grad_norm": 191684.953125, + "learning_rate": 4.7080000000000005e-06, + "loss": 15435.0094, + "step": 23540 + }, + { + "epoch": 0.04757248997038587, + "grad_norm": 18752.8125, + "learning_rate": 4.71e-06, + "loss": 2462.8844, + "step": 23550 + }, + { + "epoch": 0.04759269060306969, + "grad_norm": 15627.9853515625, + "learning_rate": 4.712000000000001e-06, + "loss": 26914.9219, + "step": 23560 + }, + { + "epoch": 0.047612891235753504, + "grad_norm": 1579.1776123046875, + "learning_rate": 4.714e-06, + "loss": 13565.9422, + "step": 23570 + }, + { + "epoch": 0.04763309186843732, + "grad_norm": 25788.5390625, + "learning_rate": 4.716e-06, + "loss": 11697.5406, + "step": 23580 + }, + { + "epoch": 0.04765329250112114, + "grad_norm": 25529.337890625, + "learning_rate": 4.718e-06, + "loss": 9955.7977, + "step": 23590 + }, + { + "epoch": 0.04767349313380495, + "grad_norm": 9248.3896484375, + "learning_rate": 4.7200000000000005e-06, + "loss": 16705.0719, + "step": 23600 + }, + { + "epoch": 0.047693693766488764, + "grad_norm": 72947.2109375, + "learning_rate": 4.722000000000001e-06, + "loss": 14608.1375, + "step": 23610 + }, + { + "epoch": 0.047713894399172584, + "grad_norm": 427.451416015625, + "learning_rate": 4.724e-06, + "loss": 9661.0063, + "step": 23620 + }, + { + "epoch": 0.0477340950318564, + "grad_norm": 29963.341796875, + "learning_rate": 4.726000000000001e-06, + "loss": 11170.2398, + "step": 23630 + }, + { + "epoch": 0.04775429566454022, + "grad_norm": 12767.6240234375, + "learning_rate": 4.728e-06, + "loss": 11940.8078, + "step": 23640 + }, + { + "epoch": 0.04777449629722403, + "grad_norm": 6286.42138671875, + "learning_rate": 4.7300000000000005e-06, + "loss": 6759.2586, + "step": 23650 + }, + { + "epoch": 0.04779469692990784, + "grad_norm": 21107.64453125, + "learning_rate": 4.732000000000001e-06, + "loss": 16965.5969, + "step": 23660 + }, + { + "epoch": 0.04781489756259166, + "grad_norm": 60198.2421875, + "learning_rate": 4.734e-06, + "loss": 16779.4703, + "step": 23670 + }, + { + "epoch": 0.047835098195275476, + "grad_norm": 12704.859375, + "learning_rate": 4.736000000000001e-06, + "loss": 6125.7387, + "step": 23680 + }, + { + "epoch": 0.04785529882795929, + "grad_norm": 29059.59375, + "learning_rate": 4.738e-06, + "loss": 10311.0219, + "step": 23690 + }, + { + "epoch": 0.04787549946064311, + "grad_norm": 34664.56640625, + "learning_rate": 4.74e-06, + "loss": 16973.9422, + "step": 23700 + }, + { + "epoch": 0.04789570009332692, + "grad_norm": 51086.94921875, + "learning_rate": 4.7420000000000005e-06, + "loss": 6623.8656, + "step": 23710 + }, + { + "epoch": 0.04791590072601074, + "grad_norm": 59169.39453125, + "learning_rate": 4.744000000000001e-06, + "loss": 13993.7969, + "step": 23720 + }, + { + "epoch": 0.047936101358694555, + "grad_norm": 5734.86669921875, + "learning_rate": 4.746000000000001e-06, + "loss": 9868.8828, + "step": 23730 + }, + { + "epoch": 0.04795630199137837, + "grad_norm": 40927.16015625, + "learning_rate": 4.748e-06, + "loss": 14370.7109, + "step": 23740 + }, + { + "epoch": 0.04797650262406219, + "grad_norm": 1718.968994140625, + "learning_rate": 4.75e-06, + "loss": 11170.8398, + "step": 23750 + }, + { + "epoch": 0.047996703256746, + "grad_norm": 6452.52490234375, + "learning_rate": 4.752e-06, + "loss": 15147.7656, + "step": 23760 + }, + { + "epoch": 0.048016903889429814, + "grad_norm": 29070.18359375, + "learning_rate": 4.7540000000000006e-06, + "loss": 7266.8664, + "step": 23770 + }, + { + "epoch": 0.048037104522113634, + "grad_norm": 25682.05859375, + "learning_rate": 4.756000000000001e-06, + "loss": 12568.8297, + "step": 23780 + }, + { + "epoch": 0.04805730515479745, + "grad_norm": 27769.265625, + "learning_rate": 4.758e-06, + "loss": 22595.1797, + "step": 23790 + }, + { + "epoch": 0.04807750578748127, + "grad_norm": 325421.90625, + "learning_rate": 4.76e-06, + "loss": 19454.1891, + "step": 23800 + }, + { + "epoch": 0.04809770642016508, + "grad_norm": 11147.845703125, + "learning_rate": 4.762e-06, + "loss": 7608.4828, + "step": 23810 + }, + { + "epoch": 0.04811790705284889, + "grad_norm": 24122.28515625, + "learning_rate": 4.7640000000000005e-06, + "loss": 7041.4867, + "step": 23820 + }, + { + "epoch": 0.04813810768553271, + "grad_norm": 44334.6015625, + "learning_rate": 4.766000000000001e-06, + "loss": 6824.4852, + "step": 23830 + }, + { + "epoch": 0.048158308318216526, + "grad_norm": 38598.953125, + "learning_rate": 4.768000000000001e-06, + "loss": 13959.0656, + "step": 23840 + }, + { + "epoch": 0.04817850895090034, + "grad_norm": 34688.57421875, + "learning_rate": 4.77e-06, + "loss": 12073.4797, + "step": 23850 + }, + { + "epoch": 0.04819870958358416, + "grad_norm": 31821.056640625, + "learning_rate": 4.772e-06, + "loss": 11485.2125, + "step": 23860 + }, + { + "epoch": 0.04821891021626797, + "grad_norm": 46690.38671875, + "learning_rate": 4.774e-06, + "loss": 9187.6172, + "step": 23870 + }, + { + "epoch": 0.04823911084895179, + "grad_norm": 2461.8369140625, + "learning_rate": 4.7760000000000005e-06, + "loss": 29322.7875, + "step": 23880 + }, + { + "epoch": 0.048259311481635606, + "grad_norm": 126258.015625, + "learning_rate": 4.778000000000001e-06, + "loss": 22302.8875, + "step": 23890 + }, + { + "epoch": 0.04827951211431942, + "grad_norm": 123655.6171875, + "learning_rate": 4.78e-06, + "loss": 13625.0875, + "step": 23900 + }, + { + "epoch": 0.04829971274700324, + "grad_norm": 64700.15234375, + "learning_rate": 4.782e-06, + "loss": 7999.3523, + "step": 23910 + }, + { + "epoch": 0.04831991337968705, + "grad_norm": 36786.9140625, + "learning_rate": 4.784e-06, + "loss": 7567.5203, + "step": 23920 + }, + { + "epoch": 0.048340114012370865, + "grad_norm": 3881.15283203125, + "learning_rate": 4.7860000000000004e-06, + "loss": 5421.7355, + "step": 23930 + }, + { + "epoch": 0.048360314645054685, + "grad_norm": 35485.87890625, + "learning_rate": 4.7880000000000006e-06, + "loss": 7554.2594, + "step": 23940 + }, + { + "epoch": 0.0483805152777385, + "grad_norm": 118464.828125, + "learning_rate": 4.79e-06, + "loss": 12847.8578, + "step": 23950 + }, + { + "epoch": 0.04840071591042232, + "grad_norm": 34706.5390625, + "learning_rate": 4.792000000000001e-06, + "loss": 10827.0945, + "step": 23960 + }, + { + "epoch": 0.04842091654310613, + "grad_norm": 165970.90625, + "learning_rate": 4.794e-06, + "loss": 10640.8578, + "step": 23970 + }, + { + "epoch": 0.048441117175789944, + "grad_norm": 140924.859375, + "learning_rate": 4.796e-06, + "loss": 17665.2766, + "step": 23980 + }, + { + "epoch": 0.048461317808473764, + "grad_norm": 22314.314453125, + "learning_rate": 4.7980000000000005e-06, + "loss": 5914.3824, + "step": 23990 + }, + { + "epoch": 0.04848151844115758, + "grad_norm": 40976.42578125, + "learning_rate": 4.800000000000001e-06, + "loss": 7745.1898, + "step": 24000 + }, + { + "epoch": 0.04850171907384139, + "grad_norm": 14420.771484375, + "learning_rate": 4.802000000000001e-06, + "loss": 7684.6906, + "step": 24010 + }, + { + "epoch": 0.04852191970652521, + "grad_norm": 38370.23046875, + "learning_rate": 4.804e-06, + "loss": 11040.3391, + "step": 24020 + }, + { + "epoch": 0.04854212033920902, + "grad_norm": 11093.9423828125, + "learning_rate": 4.806000000000001e-06, + "loss": 5948.3215, + "step": 24030 + }, + { + "epoch": 0.04856232097189284, + "grad_norm": 42289.0234375, + "learning_rate": 4.808e-06, + "loss": 6719.4359, + "step": 24040 + }, + { + "epoch": 0.048582521604576656, + "grad_norm": 2023.2171630859375, + "learning_rate": 4.8100000000000005e-06, + "loss": 17488.7875, + "step": 24050 + }, + { + "epoch": 0.04860272223726047, + "grad_norm": 1933.554443359375, + "learning_rate": 4.812000000000001e-06, + "loss": 20440.5125, + "step": 24060 + }, + { + "epoch": 0.04862292286994429, + "grad_norm": 10182.2001953125, + "learning_rate": 4.814e-06, + "loss": 11195.2109, + "step": 24070 + }, + { + "epoch": 0.0486431235026281, + "grad_norm": 50129.6484375, + "learning_rate": 4.816e-06, + "loss": 16221.0391, + "step": 24080 + }, + { + "epoch": 0.048663324135311915, + "grad_norm": 37076.734375, + "learning_rate": 4.818e-06, + "loss": 19123.725, + "step": 24090 + }, + { + "epoch": 0.048683524767995735, + "grad_norm": 3933.16162109375, + "learning_rate": 4.8200000000000004e-06, + "loss": 5634.1617, + "step": 24100 + }, + { + "epoch": 0.04870372540067955, + "grad_norm": 68415.8125, + "learning_rate": 4.822000000000001e-06, + "loss": 13764.8484, + "step": 24110 + }, + { + "epoch": 0.04872392603336337, + "grad_norm": 143105.765625, + "learning_rate": 4.824000000000001e-06, + "loss": 10329.1336, + "step": 24120 + }, + { + "epoch": 0.04874412666604718, + "grad_norm": 23112.0859375, + "learning_rate": 4.826e-06, + "loss": 4023.4281, + "step": 24130 + }, + { + "epoch": 0.048764327298730994, + "grad_norm": 10511.9189453125, + "learning_rate": 4.828e-06, + "loss": 16081.8047, + "step": 24140 + }, + { + "epoch": 0.048784527931414814, + "grad_norm": 120663.6328125, + "learning_rate": 4.83e-06, + "loss": 7563.8695, + "step": 24150 + }, + { + "epoch": 0.04880472856409863, + "grad_norm": 28077.93359375, + "learning_rate": 4.8320000000000005e-06, + "loss": 6809.0758, + "step": 24160 + }, + { + "epoch": 0.04882492919678244, + "grad_norm": 10382.9365234375, + "learning_rate": 4.834000000000001e-06, + "loss": 20945.5656, + "step": 24170 + }, + { + "epoch": 0.04884512982946626, + "grad_norm": 143145.265625, + "learning_rate": 4.836e-06, + "loss": 11884.2938, + "step": 24180 + }, + { + "epoch": 0.048865330462150074, + "grad_norm": 9273.5576171875, + "learning_rate": 4.838e-06, + "loss": 26250.0844, + "step": 24190 + }, + { + "epoch": 0.048885531094833894, + "grad_norm": 46239.3046875, + "learning_rate": 4.84e-06, + "loss": 15258.4438, + "step": 24200 + }, + { + "epoch": 0.04890573172751771, + "grad_norm": 200640.21875, + "learning_rate": 4.842e-06, + "loss": 14408.8797, + "step": 24210 + }, + { + "epoch": 0.04892593236020152, + "grad_norm": 407570.40625, + "learning_rate": 4.8440000000000005e-06, + "loss": 24519.0781, + "step": 24220 + }, + { + "epoch": 0.04894613299288534, + "grad_norm": 426401.9375, + "learning_rate": 4.846e-06, + "loss": 23764.1031, + "step": 24230 + }, + { + "epoch": 0.04896633362556915, + "grad_norm": 71593.1484375, + "learning_rate": 4.848000000000001e-06, + "loss": 16046.0562, + "step": 24240 + }, + { + "epoch": 0.048986534258252966, + "grad_norm": 217402.359375, + "learning_rate": 4.85e-06, + "loss": 14323.9422, + "step": 24250 + }, + { + "epoch": 0.049006734890936786, + "grad_norm": 100344.34375, + "learning_rate": 4.852e-06, + "loss": 16728.8719, + "step": 24260 + }, + { + "epoch": 0.0490269355236206, + "grad_norm": 13458.359375, + "learning_rate": 4.8540000000000005e-06, + "loss": 9454.2484, + "step": 24270 + }, + { + "epoch": 0.04904713615630442, + "grad_norm": 1094.37939453125, + "learning_rate": 4.856e-06, + "loss": 11921.0516, + "step": 24280 + }, + { + "epoch": 0.04906733678898823, + "grad_norm": 23913.73828125, + "learning_rate": 4.858000000000001e-06, + "loss": 9970.4266, + "step": 24290 + }, + { + "epoch": 0.049087537421672045, + "grad_norm": 17119.38671875, + "learning_rate": 4.86e-06, + "loss": 20556.8703, + "step": 24300 + }, + { + "epoch": 0.049107738054355865, + "grad_norm": 20256.79296875, + "learning_rate": 4.862e-06, + "loss": 14644.8703, + "step": 24310 + }, + { + "epoch": 0.04912793868703968, + "grad_norm": 129648.40625, + "learning_rate": 4.864e-06, + "loss": 11426.9844, + "step": 24320 + }, + { + "epoch": 0.04914813931972349, + "grad_norm": 121289.7578125, + "learning_rate": 4.8660000000000005e-06, + "loss": 20554.7891, + "step": 24330 + }, + { + "epoch": 0.04916833995240731, + "grad_norm": 1405.122314453125, + "learning_rate": 4.868000000000001e-06, + "loss": 13504.3937, + "step": 24340 + }, + { + "epoch": 0.049188540585091124, + "grad_norm": 21306.25, + "learning_rate": 4.87e-06, + "loss": 19906.6797, + "step": 24350 + }, + { + "epoch": 0.04920874121777494, + "grad_norm": 168365.875, + "learning_rate": 4.872000000000001e-06, + "loss": 22894.7719, + "step": 24360 + }, + { + "epoch": 0.04922894185045876, + "grad_norm": 93076.4296875, + "learning_rate": 4.874e-06, + "loss": 28799.2406, + "step": 24370 + }, + { + "epoch": 0.04924914248314257, + "grad_norm": 46802.04296875, + "learning_rate": 4.876e-06, + "loss": 10774.6242, + "step": 24380 + }, + { + "epoch": 0.04926934311582639, + "grad_norm": 31638.791015625, + "learning_rate": 4.8780000000000006e-06, + "loss": 14479.575, + "step": 24390 + }, + { + "epoch": 0.0492895437485102, + "grad_norm": 1642.509521484375, + "learning_rate": 4.880000000000001e-06, + "loss": 6223.1387, + "step": 24400 + }, + { + "epoch": 0.049309744381194016, + "grad_norm": 833.550048828125, + "learning_rate": 4.882000000000001e-06, + "loss": 6806.4242, + "step": 24410 + }, + { + "epoch": 0.049329945013877836, + "grad_norm": 125003.328125, + "learning_rate": 4.884e-06, + "loss": 8518.0688, + "step": 24420 + }, + { + "epoch": 0.04935014564656165, + "grad_norm": 4740.2109375, + "learning_rate": 4.886e-06, + "loss": 14458.7906, + "step": 24430 + }, + { + "epoch": 0.04937034627924546, + "grad_norm": 143323.15625, + "learning_rate": 4.8880000000000005e-06, + "loss": 9062.6656, + "step": 24440 + }, + { + "epoch": 0.04939054691192928, + "grad_norm": 59633.5859375, + "learning_rate": 4.890000000000001e-06, + "loss": 9735.6141, + "step": 24450 + }, + { + "epoch": 0.049410747544613096, + "grad_norm": 7454.650390625, + "learning_rate": 4.892000000000001e-06, + "loss": 16578.8937, + "step": 24460 + }, + { + "epoch": 0.049430948177296916, + "grad_norm": 336908.875, + "learning_rate": 4.894e-06, + "loss": 25893.7844, + "step": 24470 + }, + { + "epoch": 0.04945114880998073, + "grad_norm": 33221.79296875, + "learning_rate": 4.896e-06, + "loss": 13218.0047, + "step": 24480 + }, + { + "epoch": 0.04947134944266454, + "grad_norm": 12556.8740234375, + "learning_rate": 4.898e-06, + "loss": 14049.4297, + "step": 24490 + }, + { + "epoch": 0.04949155007534836, + "grad_norm": 15566.0478515625, + "learning_rate": 4.9000000000000005e-06, + "loss": 9968.6, + "step": 24500 + }, + { + "epoch": 0.049511750708032175, + "grad_norm": 76394.53125, + "learning_rate": 4.902000000000001e-06, + "loss": 20117.2578, + "step": 24510 + }, + { + "epoch": 0.04953195134071599, + "grad_norm": 28780.216796875, + "learning_rate": 4.904000000000001e-06, + "loss": 6365.5051, + "step": 24520 + }, + { + "epoch": 0.04955215197339981, + "grad_norm": 5069.5048828125, + "learning_rate": 4.906e-06, + "loss": 8924.2305, + "step": 24530 + }, + { + "epoch": 0.04957235260608362, + "grad_norm": 72742.25, + "learning_rate": 4.908e-06, + "loss": 10490.8672, + "step": 24540 + }, + { + "epoch": 0.04959255323876744, + "grad_norm": 51288.3203125, + "learning_rate": 4.9100000000000004e-06, + "loss": 7527.807, + "step": 24550 + }, + { + "epoch": 0.049612753871451254, + "grad_norm": 6051.56884765625, + "learning_rate": 4.9120000000000006e-06, + "loss": 17143.3734, + "step": 24560 + }, + { + "epoch": 0.04963295450413507, + "grad_norm": 4514.349609375, + "learning_rate": 4.914000000000001e-06, + "loss": 10223.757, + "step": 24570 + }, + { + "epoch": 0.04965315513681889, + "grad_norm": 16684.3671875, + "learning_rate": 4.916e-06, + "loss": 5363.0145, + "step": 24580 + }, + { + "epoch": 0.0496733557695027, + "grad_norm": 33762.53515625, + "learning_rate": 4.918e-06, + "loss": 21868.5531, + "step": 24590 + }, + { + "epoch": 0.04969355640218651, + "grad_norm": 74856.9609375, + "learning_rate": 4.92e-06, + "loss": 10649.3609, + "step": 24600 + }, + { + "epoch": 0.04971375703487033, + "grad_norm": 39262.671875, + "learning_rate": 4.9220000000000005e-06, + "loss": 6226.7812, + "step": 24610 + }, + { + "epoch": 0.049733957667554146, + "grad_norm": 69105.078125, + "learning_rate": 4.924000000000001e-06, + "loss": 10437.8555, + "step": 24620 + }, + { + "epoch": 0.049754158300237966, + "grad_norm": 30235.287109375, + "learning_rate": 4.926e-06, + "loss": 6776.0055, + "step": 24630 + }, + { + "epoch": 0.04977435893292178, + "grad_norm": 135081.78125, + "learning_rate": 4.928000000000001e-06, + "loss": 20367.3438, + "step": 24640 + }, + { + "epoch": 0.04979455956560559, + "grad_norm": 23038.484375, + "learning_rate": 4.93e-06, + "loss": 10841.8633, + "step": 24650 + }, + { + "epoch": 0.04981476019828941, + "grad_norm": 19243.173828125, + "learning_rate": 4.932e-06, + "loss": 9251.1102, + "step": 24660 + }, + { + "epoch": 0.049834960830973225, + "grad_norm": 6807.44140625, + "learning_rate": 4.9340000000000005e-06, + "loss": 38173.6375, + "step": 24670 + }, + { + "epoch": 0.04985516146365704, + "grad_norm": 4647.669921875, + "learning_rate": 4.936e-06, + "loss": 13536.8047, + "step": 24680 + }, + { + "epoch": 0.04987536209634086, + "grad_norm": 86933.9921875, + "learning_rate": 4.938000000000001e-06, + "loss": 9596.0312, + "step": 24690 + }, + { + "epoch": 0.04989556272902467, + "grad_norm": 6546.63671875, + "learning_rate": 4.94e-06, + "loss": 20153.3922, + "step": 24700 + }, + { + "epoch": 0.04991576336170849, + "grad_norm": 23437.8828125, + "learning_rate": 4.942e-06, + "loss": 6427.3035, + "step": 24710 + }, + { + "epoch": 0.049935963994392304, + "grad_norm": 10344.53125, + "learning_rate": 4.9440000000000004e-06, + "loss": 18768.7234, + "step": 24720 + }, + { + "epoch": 0.04995616462707612, + "grad_norm": 5729.1005859375, + "learning_rate": 4.946000000000001e-06, + "loss": 20144.4172, + "step": 24730 + }, + { + "epoch": 0.04997636525975994, + "grad_norm": 55263.8828125, + "learning_rate": 4.948000000000001e-06, + "loss": 18688.4359, + "step": 24740 + }, + { + "epoch": 0.04999656589244375, + "grad_norm": 8905.50390625, + "learning_rate": 4.95e-06, + "loss": 6418.3332, + "step": 24750 + }, + { + "epoch": 0.050016766525127564, + "grad_norm": 4878.67578125, + "learning_rate": 4.952e-06, + "loss": 12428.65, + "step": 24760 + }, + { + "epoch": 0.050036967157811384, + "grad_norm": 285000.875, + "learning_rate": 4.954e-06, + "loss": 25435.2266, + "step": 24770 + }, + { + "epoch": 0.0500571677904952, + "grad_norm": 103737.28125, + "learning_rate": 4.9560000000000005e-06, + "loss": 10262.8031, + "step": 24780 + }, + { + "epoch": 0.05007736842317902, + "grad_norm": 2308.703125, + "learning_rate": 4.958000000000001e-06, + "loss": 12477.2758, + "step": 24790 + }, + { + "epoch": 0.05009756905586283, + "grad_norm": 2145.08642578125, + "learning_rate": 4.960000000000001e-06, + "loss": 25851.425, + "step": 24800 + }, + { + "epoch": 0.05011776968854664, + "grad_norm": 9366.5771484375, + "learning_rate": 4.962e-06, + "loss": 11274.4078, + "step": 24810 + }, + { + "epoch": 0.05013797032123046, + "grad_norm": 4847.2939453125, + "learning_rate": 4.964e-06, + "loss": 15960.4141, + "step": 24820 + }, + { + "epoch": 0.050158170953914276, + "grad_norm": 199138.34375, + "learning_rate": 4.966e-06, + "loss": 14157.0969, + "step": 24830 + }, + { + "epoch": 0.05017837158659809, + "grad_norm": 153029.40625, + "learning_rate": 4.9680000000000005e-06, + "loss": 16921.7031, + "step": 24840 + }, + { + "epoch": 0.05019857221928191, + "grad_norm": 61843.62109375, + "learning_rate": 4.970000000000001e-06, + "loss": 19716.25, + "step": 24850 + }, + { + "epoch": 0.05021877285196572, + "grad_norm": 37475.14453125, + "learning_rate": 4.972e-06, + "loss": 10229.2766, + "step": 24860 + }, + { + "epoch": 0.05023897348464954, + "grad_norm": 176114.640625, + "learning_rate": 4.974e-06, + "loss": 24445.3719, + "step": 24870 + }, + { + "epoch": 0.050259174117333355, + "grad_norm": 396275.5625, + "learning_rate": 4.976e-06, + "loss": 18472.4188, + "step": 24880 + }, + { + "epoch": 0.05027937475001717, + "grad_norm": 1973.982666015625, + "learning_rate": 4.9780000000000005e-06, + "loss": 12623.3266, + "step": 24890 + }, + { + "epoch": 0.05029957538270099, + "grad_norm": 43510.9140625, + "learning_rate": 4.980000000000001e-06, + "loss": 11238.318, + "step": 24900 + }, + { + "epoch": 0.0503197760153848, + "grad_norm": 9371.13671875, + "learning_rate": 4.982e-06, + "loss": 4557.9527, + "step": 24910 + }, + { + "epoch": 0.050339976648068614, + "grad_norm": 6553.54248046875, + "learning_rate": 4.984000000000001e-06, + "loss": 5231.0922, + "step": 24920 + }, + { + "epoch": 0.050360177280752434, + "grad_norm": 9241.03515625, + "learning_rate": 4.986e-06, + "loss": 5389.7633, + "step": 24930 + }, + { + "epoch": 0.05038037791343625, + "grad_norm": 1712.3138427734375, + "learning_rate": 4.988e-06, + "loss": 15643.2641, + "step": 24940 + }, + { + "epoch": 0.05040057854612007, + "grad_norm": 1548.5107421875, + "learning_rate": 4.9900000000000005e-06, + "loss": 14620.3766, + "step": 24950 + }, + { + "epoch": 0.05042077917880388, + "grad_norm": 25301.435546875, + "learning_rate": 4.992e-06, + "loss": 12799.8211, + "step": 24960 + }, + { + "epoch": 0.05044097981148769, + "grad_norm": 35397.31640625, + "learning_rate": 4.994000000000001e-06, + "loss": 17539.6578, + "step": 24970 + }, + { + "epoch": 0.05046118044417151, + "grad_norm": 4883.978515625, + "learning_rate": 4.996e-06, + "loss": 20543.8391, + "step": 24980 + }, + { + "epoch": 0.050481381076855326, + "grad_norm": 3507.330810546875, + "learning_rate": 4.998e-06, + "loss": 14232.0312, + "step": 24990 + }, + { + "epoch": 0.05050158170953914, + "grad_norm": 3310.845703125, + "learning_rate": 5e-06, + "loss": 8078.6977, + "step": 25000 + }, + { + "epoch": 0.05052178234222296, + "grad_norm": 370.85638427734375, + "learning_rate": 5.0020000000000006e-06, + "loss": 8345.4547, + "step": 25010 + }, + { + "epoch": 0.05054198297490677, + "grad_norm": 30423.130859375, + "learning_rate": 5.004e-06, + "loss": 11710.4188, + "step": 25020 + }, + { + "epoch": 0.05056218360759059, + "grad_norm": 25572.412109375, + "learning_rate": 5.006000000000001e-06, + "loss": 10759.5812, + "step": 25030 + }, + { + "epoch": 0.050582384240274406, + "grad_norm": 6360.5390625, + "learning_rate": 5.008000000000001e-06, + "loss": 21205.5406, + "step": 25040 + }, + { + "epoch": 0.05060258487295822, + "grad_norm": 6360.8935546875, + "learning_rate": 5.01e-06, + "loss": 6032.2426, + "step": 25050 + }, + { + "epoch": 0.05062278550564204, + "grad_norm": 2541.238525390625, + "learning_rate": 5.0120000000000005e-06, + "loss": 8851.3391, + "step": 25060 + }, + { + "epoch": 0.05064298613832585, + "grad_norm": 4885.91259765625, + "learning_rate": 5.014e-06, + "loss": 3543.8469, + "step": 25070 + }, + { + "epoch": 0.050663186771009665, + "grad_norm": 107559.265625, + "learning_rate": 5.016000000000001e-06, + "loss": 13988.8891, + "step": 25080 + }, + { + "epoch": 0.050683387403693485, + "grad_norm": 272846.5625, + "learning_rate": 5.018000000000001e-06, + "loss": 22972.4422, + "step": 25090 + }, + { + "epoch": 0.0507035880363773, + "grad_norm": 8133.431640625, + "learning_rate": 5.02e-06, + "loss": 6061.7793, + "step": 25100 + }, + { + "epoch": 0.05072378866906112, + "grad_norm": 27681.029296875, + "learning_rate": 5.022e-06, + "loss": 12251.35, + "step": 25110 + }, + { + "epoch": 0.05074398930174493, + "grad_norm": 738.0137939453125, + "learning_rate": 5.024e-06, + "loss": 15189.9203, + "step": 25120 + }, + { + "epoch": 0.050764189934428744, + "grad_norm": 43034.2578125, + "learning_rate": 5.026000000000001e-06, + "loss": 16326.3297, + "step": 25130 + }, + { + "epoch": 0.050784390567112564, + "grad_norm": 65604.2890625, + "learning_rate": 5.028000000000001e-06, + "loss": 9822.2117, + "step": 25140 + }, + { + "epoch": 0.05080459119979638, + "grad_norm": 327510.28125, + "learning_rate": 5.03e-06, + "loss": 24069.6937, + "step": 25150 + }, + { + "epoch": 0.05082479183248019, + "grad_norm": 6429.92236328125, + "learning_rate": 5.032e-06, + "loss": 5968.3828, + "step": 25160 + }, + { + "epoch": 0.05084499246516401, + "grad_norm": 39630.8359375, + "learning_rate": 5.0339999999999996e-06, + "loss": 16853.3422, + "step": 25170 + }, + { + "epoch": 0.05086519309784782, + "grad_norm": 6430.59423828125, + "learning_rate": 5.0360000000000006e-06, + "loss": 15525.2516, + "step": 25180 + }, + { + "epoch": 0.05088539373053164, + "grad_norm": 10039.9765625, + "learning_rate": 5.038000000000001e-06, + "loss": 9261.668, + "step": 25190 + }, + { + "epoch": 0.050905594363215456, + "grad_norm": 42261.65625, + "learning_rate": 5.04e-06, + "loss": 6311.5063, + "step": 25200 + }, + { + "epoch": 0.05092579499589927, + "grad_norm": 6332.99462890625, + "learning_rate": 5.042e-06, + "loss": 4775.6984, + "step": 25210 + }, + { + "epoch": 0.05094599562858309, + "grad_norm": 34915.20703125, + "learning_rate": 5.044e-06, + "loss": 10371.6078, + "step": 25220 + }, + { + "epoch": 0.0509661962612669, + "grad_norm": 434750.96875, + "learning_rate": 5.0460000000000005e-06, + "loss": 22739.9125, + "step": 25230 + }, + { + "epoch": 0.050986396893950715, + "grad_norm": 8926.6025390625, + "learning_rate": 5.048000000000001e-06, + "loss": 18736.5187, + "step": 25240 + }, + { + "epoch": 0.051006597526634535, + "grad_norm": 34657.7578125, + "learning_rate": 5.050000000000001e-06, + "loss": 12334.7289, + "step": 25250 + }, + { + "epoch": 0.05102679815931835, + "grad_norm": 41748.75, + "learning_rate": 5.052e-06, + "loss": 15522.125, + "step": 25260 + }, + { + "epoch": 0.05104699879200217, + "grad_norm": 2166.33740234375, + "learning_rate": 5.054e-06, + "loss": 17382.0766, + "step": 25270 + }, + { + "epoch": 0.05106719942468598, + "grad_norm": 14902.4267578125, + "learning_rate": 5.056000000000001e-06, + "loss": 11779.0063, + "step": 25280 + }, + { + "epoch": 0.051087400057369795, + "grad_norm": 28887.189453125, + "learning_rate": 5.0580000000000005e-06, + "loss": 15005.6453, + "step": 25290 + }, + { + "epoch": 0.051107600690053615, + "grad_norm": 361366.46875, + "learning_rate": 5.060000000000001e-06, + "loss": 23116.2812, + "step": 25300 + }, + { + "epoch": 0.05112780132273743, + "grad_norm": 0.0, + "learning_rate": 5.062e-06, + "loss": 5940.5121, + "step": 25310 + }, + { + "epoch": 0.05114800195542124, + "grad_norm": 117666.015625, + "learning_rate": 5.064e-06, + "loss": 11701.2219, + "step": 25320 + }, + { + "epoch": 0.05116820258810506, + "grad_norm": 2692.321044921875, + "learning_rate": 5.066000000000001e-06, + "loss": 10942.6344, + "step": 25330 + }, + { + "epoch": 0.051188403220788874, + "grad_norm": 35986.01953125, + "learning_rate": 5.0680000000000004e-06, + "loss": 9216.1938, + "step": 25340 + }, + { + "epoch": 0.051208603853472694, + "grad_norm": 17862.76953125, + "learning_rate": 5.070000000000001e-06, + "loss": 9496.7781, + "step": 25350 + }, + { + "epoch": 0.05122880448615651, + "grad_norm": 231174.234375, + "learning_rate": 5.072e-06, + "loss": 12022.8859, + "step": 25360 + }, + { + "epoch": 0.05124900511884032, + "grad_norm": 8932.296875, + "learning_rate": 5.074e-06, + "loss": 4369.618, + "step": 25370 + }, + { + "epoch": 0.05126920575152414, + "grad_norm": 29950.67578125, + "learning_rate": 5.076000000000001e-06, + "loss": 18218.3063, + "step": 25380 + }, + { + "epoch": 0.05128940638420795, + "grad_norm": 11691.8798828125, + "learning_rate": 5.078e-06, + "loss": 7024.1031, + "step": 25390 + }, + { + "epoch": 0.051309607016891766, + "grad_norm": 6010.5029296875, + "learning_rate": 5.0800000000000005e-06, + "loss": 11861.6422, + "step": 25400 + }, + { + "epoch": 0.051329807649575586, + "grad_norm": 5813.7587890625, + "learning_rate": 5.082000000000001e-06, + "loss": 4758.3938, + "step": 25410 + }, + { + "epoch": 0.0513500082822594, + "grad_norm": 10445.462890625, + "learning_rate": 5.084e-06, + "loss": 6221.8781, + "step": 25420 + }, + { + "epoch": 0.05137020891494322, + "grad_norm": 19709.259765625, + "learning_rate": 5.086000000000001e-06, + "loss": 7380.1227, + "step": 25430 + }, + { + "epoch": 0.05139040954762703, + "grad_norm": 44676.5234375, + "learning_rate": 5.088000000000001e-06, + "loss": 6653.5211, + "step": 25440 + }, + { + "epoch": 0.051410610180310845, + "grad_norm": 160843.109375, + "learning_rate": 5.09e-06, + "loss": 11961.6445, + "step": 25450 + }, + { + "epoch": 0.051430810812994665, + "grad_norm": 23654.603515625, + "learning_rate": 5.0920000000000005e-06, + "loss": 20625.5172, + "step": 25460 + }, + { + "epoch": 0.05145101144567848, + "grad_norm": 69916.1796875, + "learning_rate": 5.094e-06, + "loss": 13551.8375, + "step": 25470 + }, + { + "epoch": 0.05147121207836229, + "grad_norm": 130901.7734375, + "learning_rate": 5.096000000000001e-06, + "loss": 11021.9875, + "step": 25480 + }, + { + "epoch": 0.05149141271104611, + "grad_norm": 5816.00244140625, + "learning_rate": 5.098000000000001e-06, + "loss": 26272.3063, + "step": 25490 + }, + { + "epoch": 0.051511613343729924, + "grad_norm": 1790.9010009765625, + "learning_rate": 5.1e-06, + "loss": 4584.4328, + "step": 25500 + }, + { + "epoch": 0.051531813976413744, + "grad_norm": 79919.9921875, + "learning_rate": 5.1020000000000004e-06, + "loss": 22606.3766, + "step": 25510 + }, + { + "epoch": 0.05155201460909756, + "grad_norm": 6112.62451171875, + "learning_rate": 5.104e-06, + "loss": 13564.9859, + "step": 25520 + }, + { + "epoch": 0.05157221524178137, + "grad_norm": 104642.484375, + "learning_rate": 5.106000000000001e-06, + "loss": 18000.6063, + "step": 25530 + }, + { + "epoch": 0.05159241587446519, + "grad_norm": 149346.421875, + "learning_rate": 5.108000000000001e-06, + "loss": 21750.3937, + "step": 25540 + }, + { + "epoch": 0.051612616507149, + "grad_norm": 12424.41796875, + "learning_rate": 5.11e-06, + "loss": 10970.3797, + "step": 25550 + }, + { + "epoch": 0.051632817139832816, + "grad_norm": 2465.284912109375, + "learning_rate": 5.112e-06, + "loss": 10940.4219, + "step": 25560 + }, + { + "epoch": 0.051653017772516636, + "grad_norm": 200911.515625, + "learning_rate": 5.114e-06, + "loss": 12991.9734, + "step": 25570 + }, + { + "epoch": 0.05167321840520045, + "grad_norm": 22090.921875, + "learning_rate": 5.116000000000001e-06, + "loss": 7191.2086, + "step": 25580 + }, + { + "epoch": 0.05169341903788427, + "grad_norm": 88893.9296875, + "learning_rate": 5.118000000000001e-06, + "loss": 12900.0844, + "step": 25590 + }, + { + "epoch": 0.05171361967056808, + "grad_norm": 4372.64794921875, + "learning_rate": 5.12e-06, + "loss": 7242.6727, + "step": 25600 + }, + { + "epoch": 0.051733820303251896, + "grad_norm": 18411.896484375, + "learning_rate": 5.122e-06, + "loss": 24538.7969, + "step": 25610 + }, + { + "epoch": 0.051754020935935716, + "grad_norm": 2268.743408203125, + "learning_rate": 5.124e-06, + "loss": 30706.7687, + "step": 25620 + }, + { + "epoch": 0.05177422156861953, + "grad_norm": 269365.65625, + "learning_rate": 5.126e-06, + "loss": 15258.9625, + "step": 25630 + }, + { + "epoch": 0.05179442220130334, + "grad_norm": 36566.3828125, + "learning_rate": 5.128000000000001e-06, + "loss": 12236.9281, + "step": 25640 + }, + { + "epoch": 0.05181462283398716, + "grad_norm": 14732.513671875, + "learning_rate": 5.130000000000001e-06, + "loss": 12476.7492, + "step": 25650 + }, + { + "epoch": 0.051834823466670975, + "grad_norm": 7004.845703125, + "learning_rate": 5.132e-06, + "loss": 15319.775, + "step": 25660 + }, + { + "epoch": 0.051855024099354795, + "grad_norm": 5821.22509765625, + "learning_rate": 5.134e-06, + "loss": 9497.1516, + "step": 25670 + }, + { + "epoch": 0.05187522473203861, + "grad_norm": 3173.912841796875, + "learning_rate": 5.136e-06, + "loss": 8414.3625, + "step": 25680 + }, + { + "epoch": 0.05189542536472242, + "grad_norm": 4703.51806640625, + "learning_rate": 5.138000000000001e-06, + "loss": 6933.0266, + "step": 25690 + }, + { + "epoch": 0.05191562599740624, + "grad_norm": 16775.591796875, + "learning_rate": 5.140000000000001e-06, + "loss": 20660.1594, + "step": 25700 + }, + { + "epoch": 0.051935826630090054, + "grad_norm": 71053.125, + "learning_rate": 5.142e-06, + "loss": 10313.6703, + "step": 25710 + }, + { + "epoch": 0.05195602726277387, + "grad_norm": 2014.94384765625, + "learning_rate": 5.144e-06, + "loss": 9796.4844, + "step": 25720 + }, + { + "epoch": 0.05197622789545769, + "grad_norm": 10659.05078125, + "learning_rate": 5.1459999999999995e-06, + "loss": 8357.0492, + "step": 25730 + }, + { + "epoch": 0.0519964285281415, + "grad_norm": 138967.9375, + "learning_rate": 5.1480000000000005e-06, + "loss": 14098.7078, + "step": 25740 + }, + { + "epoch": 0.05201662916082532, + "grad_norm": 27900.484375, + "learning_rate": 5.150000000000001e-06, + "loss": 14420.6812, + "step": 25750 + }, + { + "epoch": 0.05203682979350913, + "grad_norm": 206.68634033203125, + "learning_rate": 5.152e-06, + "loss": 9972.8, + "step": 25760 + }, + { + "epoch": 0.052057030426192946, + "grad_norm": 19216.373046875, + "learning_rate": 5.154e-06, + "loss": 12201.2227, + "step": 25770 + }, + { + "epoch": 0.052077231058876766, + "grad_norm": 610739.6875, + "learning_rate": 5.156e-06, + "loss": 18208.5469, + "step": 25780 + }, + { + "epoch": 0.05209743169156058, + "grad_norm": 17770.341796875, + "learning_rate": 5.158e-06, + "loss": 9401.4219, + "step": 25790 + }, + { + "epoch": 0.05211763232424439, + "grad_norm": 1785.43359375, + "learning_rate": 5.1600000000000006e-06, + "loss": 6072.6891, + "step": 25800 + }, + { + "epoch": 0.05213783295692821, + "grad_norm": 5627.8857421875, + "learning_rate": 5.162000000000001e-06, + "loss": 13991.1031, + "step": 25810 + }, + { + "epoch": 0.052158033589612025, + "grad_norm": 6912.095703125, + "learning_rate": 5.164e-06, + "loss": 12948.1461, + "step": 25820 + }, + { + "epoch": 0.052178234222295845, + "grad_norm": 23830.4296875, + "learning_rate": 5.166e-06, + "loss": 17434.1531, + "step": 25830 + }, + { + "epoch": 0.05219843485497966, + "grad_norm": 226462.875, + "learning_rate": 5.168000000000001e-06, + "loss": 20669.2484, + "step": 25840 + }, + { + "epoch": 0.05221863548766347, + "grad_norm": 7110.3681640625, + "learning_rate": 5.1700000000000005e-06, + "loss": 10316.7234, + "step": 25850 + }, + { + "epoch": 0.05223883612034729, + "grad_norm": 18566.810546875, + "learning_rate": 5.172000000000001e-06, + "loss": 18647.3406, + "step": 25860 + }, + { + "epoch": 0.052259036753031105, + "grad_norm": 101133.7421875, + "learning_rate": 5.174e-06, + "loss": 9597.4578, + "step": 25870 + }, + { + "epoch": 0.05227923738571492, + "grad_norm": 14749.4443359375, + "learning_rate": 5.176e-06, + "loss": 18755.4906, + "step": 25880 + }, + { + "epoch": 0.05229943801839874, + "grad_norm": 90081.734375, + "learning_rate": 5.178000000000001e-06, + "loss": 10222.3961, + "step": 25890 + }, + { + "epoch": 0.05231963865108255, + "grad_norm": 25550.73828125, + "learning_rate": 5.18e-06, + "loss": 19141.6234, + "step": 25900 + }, + { + "epoch": 0.05233983928376637, + "grad_norm": 2930.631103515625, + "learning_rate": 5.1820000000000005e-06, + "loss": 16261.525, + "step": 25910 + }, + { + "epoch": 0.052360039916450184, + "grad_norm": 208640.28125, + "learning_rate": 5.184e-06, + "loss": 15613.5203, + "step": 25920 + }, + { + "epoch": 0.052380240549134, + "grad_norm": 14621.40234375, + "learning_rate": 5.186e-06, + "loss": 13426.4953, + "step": 25930 + }, + { + "epoch": 0.05240044118181782, + "grad_norm": 6915.93408203125, + "learning_rate": 5.188000000000001e-06, + "loss": 20758.2422, + "step": 25940 + }, + { + "epoch": 0.05242064181450163, + "grad_norm": 25688.96875, + "learning_rate": 5.19e-06, + "loss": 5222.7156, + "step": 25950 + }, + { + "epoch": 0.05244084244718544, + "grad_norm": 1268.0750732421875, + "learning_rate": 5.1920000000000004e-06, + "loss": 5811.6648, + "step": 25960 + }, + { + "epoch": 0.05246104307986926, + "grad_norm": 397.329833984375, + "learning_rate": 5.194e-06, + "loss": 3626.0297, + "step": 25970 + }, + { + "epoch": 0.052481243712553076, + "grad_norm": 14493.48046875, + "learning_rate": 5.196e-06, + "loss": 5404.2516, + "step": 25980 + }, + { + "epoch": 0.052501444345236896, + "grad_norm": 1292.994140625, + "learning_rate": 5.198000000000001e-06, + "loss": 12578.8844, + "step": 25990 + }, + { + "epoch": 0.05252164497792071, + "grad_norm": 14560.65234375, + "learning_rate": 5.2e-06, + "loss": 6576.007, + "step": 26000 + }, + { + "epoch": 0.05254184561060452, + "grad_norm": 126506.671875, + "learning_rate": 5.202e-06, + "loss": 9370.7133, + "step": 26010 + }, + { + "epoch": 0.05256204624328834, + "grad_norm": 103318.328125, + "learning_rate": 5.2040000000000005e-06, + "loss": 15678.0422, + "step": 26020 + }, + { + "epoch": 0.052582246875972155, + "grad_norm": 14826.470703125, + "learning_rate": 5.206e-06, + "loss": 7489.4453, + "step": 26030 + }, + { + "epoch": 0.05260244750865597, + "grad_norm": 64580.3671875, + "learning_rate": 5.208000000000001e-06, + "loss": 6468.1996, + "step": 26040 + }, + { + "epoch": 0.05262264814133979, + "grad_norm": 97534.546875, + "learning_rate": 5.210000000000001e-06, + "loss": 8503.0289, + "step": 26050 + }, + { + "epoch": 0.0526428487740236, + "grad_norm": 3460.0419921875, + "learning_rate": 5.212e-06, + "loss": 17721.8125, + "step": 26060 + }, + { + "epoch": 0.05266304940670742, + "grad_norm": 12390.8349609375, + "learning_rate": 5.214e-06, + "loss": 6901.3672, + "step": 26070 + }, + { + "epoch": 0.052683250039391234, + "grad_norm": 101240.21875, + "learning_rate": 5.216e-06, + "loss": 16977.9828, + "step": 26080 + }, + { + "epoch": 0.05270345067207505, + "grad_norm": 17138.048828125, + "learning_rate": 5.218000000000001e-06, + "loss": 6298.4902, + "step": 26090 + }, + { + "epoch": 0.05272365130475887, + "grad_norm": 5405.44580078125, + "learning_rate": 5.220000000000001e-06, + "loss": 3159.3684, + "step": 26100 + }, + { + "epoch": 0.05274385193744268, + "grad_norm": 39289.203125, + "learning_rate": 5.222e-06, + "loss": 5120.5656, + "step": 26110 + }, + { + "epoch": 0.05276405257012649, + "grad_norm": 18063.466796875, + "learning_rate": 5.224e-06, + "loss": 11818.8453, + "step": 26120 + }, + { + "epoch": 0.05278425320281031, + "grad_norm": 47995.89453125, + "learning_rate": 5.226e-06, + "loss": 13016.7914, + "step": 26130 + }, + { + "epoch": 0.052804453835494126, + "grad_norm": 57492.234375, + "learning_rate": 5.228000000000001e-06, + "loss": 21119.8531, + "step": 26140 + }, + { + "epoch": 0.052824654468177946, + "grad_norm": 12066.2060546875, + "learning_rate": 5.230000000000001e-06, + "loss": 10171.8047, + "step": 26150 + }, + { + "epoch": 0.05284485510086176, + "grad_norm": 18858.5390625, + "learning_rate": 5.232e-06, + "loss": 6720.4422, + "step": 26160 + }, + { + "epoch": 0.05286505573354557, + "grad_norm": 24530.46484375, + "learning_rate": 5.234e-06, + "loss": 12749.7484, + "step": 26170 + }, + { + "epoch": 0.05288525636622939, + "grad_norm": 1681.6297607421875, + "learning_rate": 5.236e-06, + "loss": 2487.6422, + "step": 26180 + }, + { + "epoch": 0.052905456998913206, + "grad_norm": 2967.7333984375, + "learning_rate": 5.2380000000000005e-06, + "loss": 10586.1164, + "step": 26190 + }, + { + "epoch": 0.05292565763159702, + "grad_norm": 1401.4981689453125, + "learning_rate": 5.240000000000001e-06, + "loss": 7282.7688, + "step": 26200 + }, + { + "epoch": 0.05294585826428084, + "grad_norm": 140352.671875, + "learning_rate": 5.242000000000001e-06, + "loss": 10703.3422, + "step": 26210 + }, + { + "epoch": 0.05296605889696465, + "grad_norm": 16226.0859375, + "learning_rate": 5.244e-06, + "loss": 7263.1875, + "step": 26220 + }, + { + "epoch": 0.05298625952964847, + "grad_norm": 127.29179382324219, + "learning_rate": 5.246e-06, + "loss": 29839.75, + "step": 26230 + }, + { + "epoch": 0.053006460162332285, + "grad_norm": 113988.4140625, + "learning_rate": 5.248000000000001e-06, + "loss": 6612.7437, + "step": 26240 + }, + { + "epoch": 0.0530266607950161, + "grad_norm": 1162.721923828125, + "learning_rate": 5.2500000000000006e-06, + "loss": 14716.1453, + "step": 26250 + }, + { + "epoch": 0.05304686142769992, + "grad_norm": 1327.751220703125, + "learning_rate": 5.252000000000001e-06, + "loss": 7821.4922, + "step": 26260 + }, + { + "epoch": 0.05306706206038373, + "grad_norm": 31932.158203125, + "learning_rate": 5.254e-06, + "loss": 19954.4062, + "step": 26270 + }, + { + "epoch": 0.053087262693067544, + "grad_norm": 1294.8968505859375, + "learning_rate": 5.256e-06, + "loss": 10439.3922, + "step": 26280 + }, + { + "epoch": 0.053107463325751364, + "grad_norm": 2490.867431640625, + "learning_rate": 5.258000000000001e-06, + "loss": 12536.1031, + "step": 26290 + }, + { + "epoch": 0.05312766395843518, + "grad_norm": 15506.7392578125, + "learning_rate": 5.2600000000000005e-06, + "loss": 16776.9656, + "step": 26300 + }, + { + "epoch": 0.053147864591119, + "grad_norm": 1575.5194091796875, + "learning_rate": 5.262000000000001e-06, + "loss": 11225.3023, + "step": 26310 + }, + { + "epoch": 0.05316806522380281, + "grad_norm": 348439.09375, + "learning_rate": 5.264e-06, + "loss": 28859.2156, + "step": 26320 + }, + { + "epoch": 0.05318826585648662, + "grad_norm": 54644.78515625, + "learning_rate": 5.266e-06, + "loss": 15626.4328, + "step": 26330 + }, + { + "epoch": 0.05320846648917044, + "grad_norm": 4025.986328125, + "learning_rate": 5.268000000000001e-06, + "loss": 7537.0523, + "step": 26340 + }, + { + "epoch": 0.053228667121854256, + "grad_norm": 224935.25, + "learning_rate": 5.27e-06, + "loss": 15592.7656, + "step": 26350 + }, + { + "epoch": 0.05324886775453807, + "grad_norm": 194320.078125, + "learning_rate": 5.2720000000000005e-06, + "loss": 19391.4141, + "step": 26360 + }, + { + "epoch": 0.05326906838722189, + "grad_norm": 4389.66845703125, + "learning_rate": 5.274e-06, + "loss": 9767.0531, + "step": 26370 + }, + { + "epoch": 0.0532892690199057, + "grad_norm": 19011.783203125, + "learning_rate": 5.276e-06, + "loss": 7481.5484, + "step": 26380 + }, + { + "epoch": 0.05330946965258952, + "grad_norm": 102456.2109375, + "learning_rate": 5.278000000000001e-06, + "loss": 6629.075, + "step": 26390 + }, + { + "epoch": 0.053329670285273335, + "grad_norm": 11091.21484375, + "learning_rate": 5.28e-06, + "loss": 8802.6609, + "step": 26400 + }, + { + "epoch": 0.05334987091795715, + "grad_norm": 4044.063232421875, + "learning_rate": 5.282e-06, + "loss": 7675.0016, + "step": 26410 + }, + { + "epoch": 0.05337007155064097, + "grad_norm": 22845.01171875, + "learning_rate": 5.2840000000000006e-06, + "loss": 24793.3125, + "step": 26420 + }, + { + "epoch": 0.05339027218332478, + "grad_norm": 27732.390625, + "learning_rate": 5.286e-06, + "loss": 2716.7146, + "step": 26430 + }, + { + "epoch": 0.053410472816008595, + "grad_norm": 4622.34619140625, + "learning_rate": 5.288000000000001e-06, + "loss": 13804.05, + "step": 26440 + }, + { + "epoch": 0.053430673448692415, + "grad_norm": 209.47177124023438, + "learning_rate": 5.290000000000001e-06, + "loss": 17182.2453, + "step": 26450 + }, + { + "epoch": 0.05345087408137623, + "grad_norm": 7330.50732421875, + "learning_rate": 5.292e-06, + "loss": 14760.7141, + "step": 26460 + }, + { + "epoch": 0.05347107471406005, + "grad_norm": 24303.822265625, + "learning_rate": 5.2940000000000005e-06, + "loss": 19406.2125, + "step": 26470 + }, + { + "epoch": 0.05349127534674386, + "grad_norm": 30117.365234375, + "learning_rate": 5.296e-06, + "loss": 4874.1504, + "step": 26480 + }, + { + "epoch": 0.053511475979427674, + "grad_norm": 6340.8681640625, + "learning_rate": 5.298000000000001e-06, + "loss": 11407.6422, + "step": 26490 + }, + { + "epoch": 0.053531676612111494, + "grad_norm": 8653.0419921875, + "learning_rate": 5.300000000000001e-06, + "loss": 11171.882, + "step": 26500 + }, + { + "epoch": 0.05355187724479531, + "grad_norm": 10370.9228515625, + "learning_rate": 5.302e-06, + "loss": 9797.3273, + "step": 26510 + }, + { + "epoch": 0.05357207787747912, + "grad_norm": 31312.65625, + "learning_rate": 5.304e-06, + "loss": 5748.491, + "step": 26520 + }, + { + "epoch": 0.05359227851016294, + "grad_norm": 25104.009765625, + "learning_rate": 5.306e-06, + "loss": 11356.5547, + "step": 26530 + }, + { + "epoch": 0.05361247914284675, + "grad_norm": 267438.09375, + "learning_rate": 5.308000000000001e-06, + "loss": 8878.0023, + "step": 26540 + }, + { + "epoch": 0.05363267977553057, + "grad_norm": 6243.6640625, + "learning_rate": 5.310000000000001e-06, + "loss": 16268.0578, + "step": 26550 + }, + { + "epoch": 0.053652880408214386, + "grad_norm": 22993.76171875, + "learning_rate": 5.312e-06, + "loss": 16514.6781, + "step": 26560 + }, + { + "epoch": 0.0536730810408982, + "grad_norm": 2725.68994140625, + "learning_rate": 5.314e-06, + "loss": 9620.2805, + "step": 26570 + }, + { + "epoch": 0.05369328167358202, + "grad_norm": 17313.16015625, + "learning_rate": 5.3160000000000004e-06, + "loss": 4389.4102, + "step": 26580 + }, + { + "epoch": 0.05371348230626583, + "grad_norm": 10044.794921875, + "learning_rate": 5.318000000000001e-06, + "loss": 10180.5, + "step": 26590 + }, + { + "epoch": 0.053733682938949645, + "grad_norm": 255889.625, + "learning_rate": 5.320000000000001e-06, + "loss": 25346.1969, + "step": 26600 + }, + { + "epoch": 0.053753883571633465, + "grad_norm": 11301.59765625, + "learning_rate": 5.322000000000001e-06, + "loss": 8843.3414, + "step": 26610 + }, + { + "epoch": 0.05377408420431728, + "grad_norm": 6708.64111328125, + "learning_rate": 5.324e-06, + "loss": 4791.5082, + "step": 26620 + }, + { + "epoch": 0.0537942848370011, + "grad_norm": 240066.296875, + "learning_rate": 5.326e-06, + "loss": 13931.4016, + "step": 26630 + }, + { + "epoch": 0.05381448546968491, + "grad_norm": 8196.658203125, + "learning_rate": 5.328000000000001e-06, + "loss": 8964.8828, + "step": 26640 + }, + { + "epoch": 0.053834686102368724, + "grad_norm": 60816.27734375, + "learning_rate": 5.330000000000001e-06, + "loss": 19797.3297, + "step": 26650 + }, + { + "epoch": 0.053854886735052544, + "grad_norm": 4148.3349609375, + "learning_rate": 5.332000000000001e-06, + "loss": 18604.9047, + "step": 26660 + }, + { + "epoch": 0.05387508736773636, + "grad_norm": 35700.28515625, + "learning_rate": 5.334e-06, + "loss": 14198.8406, + "step": 26670 + }, + { + "epoch": 0.05389528800042017, + "grad_norm": 14347.0107421875, + "learning_rate": 5.336e-06, + "loss": 17503.9125, + "step": 26680 + }, + { + "epoch": 0.05391548863310399, + "grad_norm": 5388.6640625, + "learning_rate": 5.338000000000001e-06, + "loss": 15267.2062, + "step": 26690 + }, + { + "epoch": 0.0539356892657878, + "grad_norm": 9058.7900390625, + "learning_rate": 5.3400000000000005e-06, + "loss": 8706.9883, + "step": 26700 + }, + { + "epoch": 0.05395588989847162, + "grad_norm": 17192.57421875, + "learning_rate": 5.342000000000001e-06, + "loss": 12162.2281, + "step": 26710 + }, + { + "epoch": 0.053976090531155436, + "grad_norm": 21954.6328125, + "learning_rate": 5.344e-06, + "loss": 4353.6062, + "step": 26720 + }, + { + "epoch": 0.05399629116383925, + "grad_norm": 12036.0927734375, + "learning_rate": 5.346e-06, + "loss": 14100.4594, + "step": 26730 + }, + { + "epoch": 0.05401649179652307, + "grad_norm": 149200.71875, + "learning_rate": 5.348000000000001e-06, + "loss": 24364.3125, + "step": 26740 + }, + { + "epoch": 0.05403669242920688, + "grad_norm": 4828.2763671875, + "learning_rate": 5.3500000000000004e-06, + "loss": 4531.7613, + "step": 26750 + }, + { + "epoch": 0.054056893061890696, + "grad_norm": 35187.55078125, + "learning_rate": 5.352000000000001e-06, + "loss": 18541.9156, + "step": 26760 + }, + { + "epoch": 0.054077093694574516, + "grad_norm": 73553.96875, + "learning_rate": 5.354e-06, + "loss": 4982.6383, + "step": 26770 + }, + { + "epoch": 0.05409729432725833, + "grad_norm": 29370.20703125, + "learning_rate": 5.356e-06, + "loss": 4667.057, + "step": 26780 + }, + { + "epoch": 0.05411749495994215, + "grad_norm": 11576.9248046875, + "learning_rate": 5.358000000000001e-06, + "loss": 7961.7945, + "step": 26790 + }, + { + "epoch": 0.05413769559262596, + "grad_norm": 4994.83251953125, + "learning_rate": 5.36e-06, + "loss": 5043.6754, + "step": 26800 + }, + { + "epoch": 0.054157896225309775, + "grad_norm": 247700.203125, + "learning_rate": 5.3620000000000005e-06, + "loss": 20514.8797, + "step": 26810 + }, + { + "epoch": 0.054178096857993595, + "grad_norm": 52401.171875, + "learning_rate": 5.364000000000001e-06, + "loss": 5482.3008, + "step": 26820 + }, + { + "epoch": 0.05419829749067741, + "grad_norm": 7046.8857421875, + "learning_rate": 5.366e-06, + "loss": 9448.7992, + "step": 26830 + }, + { + "epoch": 0.05421849812336122, + "grad_norm": 24482.998046875, + "learning_rate": 5.368000000000001e-06, + "loss": 13482.4531, + "step": 26840 + }, + { + "epoch": 0.05423869875604504, + "grad_norm": 12837.7119140625, + "learning_rate": 5.370000000000001e-06, + "loss": 9468.5523, + "step": 26850 + }, + { + "epoch": 0.054258899388728854, + "grad_norm": 69353.328125, + "learning_rate": 5.372e-06, + "loss": 9226.6719, + "step": 26860 + }, + { + "epoch": 0.054279100021412674, + "grad_norm": 26869.318359375, + "learning_rate": 5.3740000000000006e-06, + "loss": 13354.1969, + "step": 26870 + }, + { + "epoch": 0.05429930065409649, + "grad_norm": 24223.576171875, + "learning_rate": 5.376e-06, + "loss": 9501.6211, + "step": 26880 + }, + { + "epoch": 0.0543195012867803, + "grad_norm": 65922.515625, + "learning_rate": 5.378e-06, + "loss": 22543.7313, + "step": 26890 + }, + { + "epoch": 0.05433970191946412, + "grad_norm": 1969.2037353515625, + "learning_rate": 5.380000000000001e-06, + "loss": 7821.8156, + "step": 26900 + }, + { + "epoch": 0.05435990255214793, + "grad_norm": 172635.359375, + "learning_rate": 5.382e-06, + "loss": 11372.3, + "step": 26910 + }, + { + "epoch": 0.054380103184831746, + "grad_norm": 4700.14306640625, + "learning_rate": 5.3840000000000005e-06, + "loss": 5195.9129, + "step": 26920 + }, + { + "epoch": 0.054400303817515566, + "grad_norm": 15160.1796875, + "learning_rate": 5.386e-06, + "loss": 8107.7859, + "step": 26930 + }, + { + "epoch": 0.05442050445019938, + "grad_norm": 33897.00390625, + "learning_rate": 5.388e-06, + "loss": 6985.4898, + "step": 26940 + }, + { + "epoch": 0.0544407050828832, + "grad_norm": 41578.859375, + "learning_rate": 5.390000000000001e-06, + "loss": 11450.9547, + "step": 26950 + }, + { + "epoch": 0.05446090571556701, + "grad_norm": 14301.6962890625, + "learning_rate": 5.392e-06, + "loss": 8823.9555, + "step": 26960 + }, + { + "epoch": 0.054481106348250825, + "grad_norm": 233074.921875, + "learning_rate": 5.394e-06, + "loss": 14681.5938, + "step": 26970 + }, + { + "epoch": 0.054501306980934645, + "grad_norm": 6986.1328125, + "learning_rate": 5.3960000000000005e-06, + "loss": 7347.4172, + "step": 26980 + }, + { + "epoch": 0.05452150761361846, + "grad_norm": 16820.494140625, + "learning_rate": 5.398e-06, + "loss": 21651.2406, + "step": 26990 + }, + { + "epoch": 0.05454170824630227, + "grad_norm": 37880.7890625, + "learning_rate": 5.400000000000001e-06, + "loss": 7420.6516, + "step": 27000 + }, + { + "epoch": 0.05456190887898609, + "grad_norm": 52717.94921875, + "learning_rate": 5.402000000000001e-06, + "loss": 7503.3031, + "step": 27010 + }, + { + "epoch": 0.054582109511669905, + "grad_norm": 30466.96875, + "learning_rate": 5.404e-06, + "loss": 9589.4523, + "step": 27020 + }, + { + "epoch": 0.054602310144353725, + "grad_norm": 170622.0625, + "learning_rate": 5.406e-06, + "loss": 16405.75, + "step": 27030 + }, + { + "epoch": 0.05462251077703754, + "grad_norm": 3378.230712890625, + "learning_rate": 5.408e-06, + "loss": 2716.7674, + "step": 27040 + }, + { + "epoch": 0.05464271140972135, + "grad_norm": 135665.125, + "learning_rate": 5.410000000000001e-06, + "loss": 8610.8156, + "step": 27050 + }, + { + "epoch": 0.05466291204240517, + "grad_norm": 23054.17578125, + "learning_rate": 5.412000000000001e-06, + "loss": 9497.0242, + "step": 27060 + }, + { + "epoch": 0.054683112675088984, + "grad_norm": 19477.306640625, + "learning_rate": 5.414e-06, + "loss": 18053.0781, + "step": 27070 + }, + { + "epoch": 0.0547033133077728, + "grad_norm": 1639.8040771484375, + "learning_rate": 5.416e-06, + "loss": 12937.2812, + "step": 27080 + }, + { + "epoch": 0.05472351394045662, + "grad_norm": 7222.04541015625, + "learning_rate": 5.418e-06, + "loss": 15170.675, + "step": 27090 + }, + { + "epoch": 0.05474371457314043, + "grad_norm": 1134.728759765625, + "learning_rate": 5.420000000000001e-06, + "loss": 14265.7672, + "step": 27100 + }, + { + "epoch": 0.05476391520582425, + "grad_norm": 215223.328125, + "learning_rate": 5.422000000000001e-06, + "loss": 8164.8289, + "step": 27110 + }, + { + "epoch": 0.05478411583850806, + "grad_norm": 7720.19921875, + "learning_rate": 5.424e-06, + "loss": 17667.0609, + "step": 27120 + }, + { + "epoch": 0.054804316471191876, + "grad_norm": 50630.61328125, + "learning_rate": 5.426e-06, + "loss": 17529.7625, + "step": 27130 + }, + { + "epoch": 0.054824517103875696, + "grad_norm": 36848.44921875, + "learning_rate": 5.4279999999999995e-06, + "loss": 11060.8414, + "step": 27140 + }, + { + "epoch": 0.05484471773655951, + "grad_norm": 11068.5029296875, + "learning_rate": 5.4300000000000005e-06, + "loss": 4294.1055, + "step": 27150 + }, + { + "epoch": 0.05486491836924332, + "grad_norm": 232247.25, + "learning_rate": 5.432000000000001e-06, + "loss": 10952.7375, + "step": 27160 + }, + { + "epoch": 0.05488511900192714, + "grad_norm": 121962.71875, + "learning_rate": 5.434e-06, + "loss": 33145.725, + "step": 27170 + }, + { + "epoch": 0.054905319634610955, + "grad_norm": 26250.05859375, + "learning_rate": 5.436e-06, + "loss": 6609.4414, + "step": 27180 + }, + { + "epoch": 0.054925520267294775, + "grad_norm": 12197.8046875, + "learning_rate": 5.438e-06, + "loss": 2627.5387, + "step": 27190 + }, + { + "epoch": 0.05494572089997859, + "grad_norm": 4441.82666015625, + "learning_rate": 5.4400000000000004e-06, + "loss": 12187.5711, + "step": 27200 + }, + { + "epoch": 0.0549659215326624, + "grad_norm": 38187.8515625, + "learning_rate": 5.442000000000001e-06, + "loss": 16175.2547, + "step": 27210 + }, + { + "epoch": 0.05498612216534622, + "grad_norm": 22542.759765625, + "learning_rate": 5.444000000000001e-06, + "loss": 20932.7219, + "step": 27220 + }, + { + "epoch": 0.055006322798030034, + "grad_norm": 7094.7626953125, + "learning_rate": 5.446e-06, + "loss": 13201.1672, + "step": 27230 + }, + { + "epoch": 0.05502652343071385, + "grad_norm": 3497.937744140625, + "learning_rate": 5.448e-06, + "loss": 6963.2617, + "step": 27240 + }, + { + "epoch": 0.05504672406339767, + "grad_norm": 3431.715087890625, + "learning_rate": 5.450000000000001e-06, + "loss": 11633.4383, + "step": 27250 + }, + { + "epoch": 0.05506692469608148, + "grad_norm": 136876.15625, + "learning_rate": 5.4520000000000005e-06, + "loss": 8031.2266, + "step": 27260 + }, + { + "epoch": 0.0550871253287653, + "grad_norm": 13463.146484375, + "learning_rate": 5.454000000000001e-06, + "loss": 4055.3867, + "step": 27270 + }, + { + "epoch": 0.05510732596144911, + "grad_norm": 70568.203125, + "learning_rate": 5.456e-06, + "loss": 11574.2578, + "step": 27280 + }, + { + "epoch": 0.055127526594132926, + "grad_norm": 80122.953125, + "learning_rate": 5.458e-06, + "loss": 10309.6781, + "step": 27290 + }, + { + "epoch": 0.055147727226816746, + "grad_norm": 45370.08984375, + "learning_rate": 5.460000000000001e-06, + "loss": 13492.7078, + "step": 27300 + }, + { + "epoch": 0.05516792785950056, + "grad_norm": 2769.674072265625, + "learning_rate": 5.462e-06, + "loss": 6081.5277, + "step": 27310 + }, + { + "epoch": 0.05518812849218437, + "grad_norm": 854.1117553710938, + "learning_rate": 5.4640000000000005e-06, + "loss": 11359.5344, + "step": 27320 + }, + { + "epoch": 0.05520832912486819, + "grad_norm": 53338.171875, + "learning_rate": 5.466e-06, + "loss": 9377.2086, + "step": 27330 + }, + { + "epoch": 0.055228529757552006, + "grad_norm": 48033.58984375, + "learning_rate": 5.468e-06, + "loss": 7456.5203, + "step": 27340 + }, + { + "epoch": 0.05524873039023582, + "grad_norm": 201697.21875, + "learning_rate": 5.470000000000001e-06, + "loss": 8192.0125, + "step": 27350 + }, + { + "epoch": 0.05526893102291964, + "grad_norm": 16388.814453125, + "learning_rate": 5.472e-06, + "loss": 5795.766, + "step": 27360 + }, + { + "epoch": 0.05528913165560345, + "grad_norm": 12076.5625, + "learning_rate": 5.4740000000000004e-06, + "loss": 8363.9289, + "step": 27370 + }, + { + "epoch": 0.05530933228828727, + "grad_norm": 29197.775390625, + "learning_rate": 5.476000000000001e-06, + "loss": 5202.8754, + "step": 27380 + }, + { + "epoch": 0.055329532920971085, + "grad_norm": 28137.474609375, + "learning_rate": 5.478e-06, + "loss": 9914.2945, + "step": 27390 + }, + { + "epoch": 0.0553497335536549, + "grad_norm": 12476.796875, + "learning_rate": 5.480000000000001e-06, + "loss": 9660.1625, + "step": 27400 + }, + { + "epoch": 0.05536993418633872, + "grad_norm": 109376.5703125, + "learning_rate": 5.482000000000001e-06, + "loss": 11022.95, + "step": 27410 + }, + { + "epoch": 0.05539013481902253, + "grad_norm": 32124.5703125, + "learning_rate": 5.484e-06, + "loss": 18502.6906, + "step": 27420 + }, + { + "epoch": 0.055410335451706344, + "grad_norm": 88147.796875, + "learning_rate": 5.4860000000000005e-06, + "loss": 8903.3813, + "step": 27430 + }, + { + "epoch": 0.055430536084390164, + "grad_norm": 10288.9658203125, + "learning_rate": 5.488e-06, + "loss": 8482.9352, + "step": 27440 + }, + { + "epoch": 0.05545073671707398, + "grad_norm": 44609.75, + "learning_rate": 5.490000000000001e-06, + "loss": 7883.7672, + "step": 27450 + }, + { + "epoch": 0.0554709373497578, + "grad_norm": 61872.62109375, + "learning_rate": 5.492000000000001e-06, + "loss": 15651.1562, + "step": 27460 + }, + { + "epoch": 0.05549113798244161, + "grad_norm": 145527.5625, + "learning_rate": 5.494e-06, + "loss": 8453.8797, + "step": 27470 + }, + { + "epoch": 0.05551133861512542, + "grad_norm": 81266.09375, + "learning_rate": 5.496e-06, + "loss": 9885.7313, + "step": 27480 + }, + { + "epoch": 0.05553153924780924, + "grad_norm": 28551.197265625, + "learning_rate": 5.498e-06, + "loss": 3404.2102, + "step": 27490 + }, + { + "epoch": 0.055551739880493056, + "grad_norm": 2340.665771484375, + "learning_rate": 5.500000000000001e-06, + "loss": 7205.268, + "step": 27500 + }, + { + "epoch": 0.05557194051317687, + "grad_norm": 109434.265625, + "learning_rate": 5.502000000000001e-06, + "loss": 9656.2875, + "step": 27510 + }, + { + "epoch": 0.05559214114586069, + "grad_norm": 260317.546875, + "learning_rate": 5.504e-06, + "loss": 11945.8625, + "step": 27520 + }, + { + "epoch": 0.0556123417785445, + "grad_norm": 28424.40234375, + "learning_rate": 5.506e-06, + "loss": 3819.1129, + "step": 27530 + }, + { + "epoch": 0.05563254241122832, + "grad_norm": 41974.39453125, + "learning_rate": 5.508e-06, + "loss": 11334.6023, + "step": 27540 + }, + { + "epoch": 0.055652743043912135, + "grad_norm": 177471.625, + "learning_rate": 5.510000000000001e-06, + "loss": 10290.0234, + "step": 27550 + }, + { + "epoch": 0.05567294367659595, + "grad_norm": 8435.634765625, + "learning_rate": 5.512000000000001e-06, + "loss": 10384.6609, + "step": 27560 + }, + { + "epoch": 0.05569314430927977, + "grad_norm": 406.8714599609375, + "learning_rate": 5.514e-06, + "loss": 9612.0516, + "step": 27570 + }, + { + "epoch": 0.05571334494196358, + "grad_norm": 1187.46142578125, + "learning_rate": 5.516e-06, + "loss": 12017.9375, + "step": 27580 + }, + { + "epoch": 0.055733545574647395, + "grad_norm": 14947.388671875, + "learning_rate": 5.518e-06, + "loss": 13231.3844, + "step": 27590 + }, + { + "epoch": 0.055753746207331215, + "grad_norm": 6477.578125, + "learning_rate": 5.5200000000000005e-06, + "loss": 10510.132, + "step": 27600 + }, + { + "epoch": 0.05577394684001503, + "grad_norm": 16543.611328125, + "learning_rate": 5.522000000000001e-06, + "loss": 8026.5469, + "step": 27610 + }, + { + "epoch": 0.05579414747269885, + "grad_norm": 8969.900390625, + "learning_rate": 5.524000000000001e-06, + "loss": 5272.2629, + "step": 27620 + }, + { + "epoch": 0.05581434810538266, + "grad_norm": 116415.4375, + "learning_rate": 5.526e-06, + "loss": 16293.7, + "step": 27630 + }, + { + "epoch": 0.055834548738066474, + "grad_norm": 31630.615234375, + "learning_rate": 5.528e-06, + "loss": 10327.4148, + "step": 27640 + }, + { + "epoch": 0.055854749370750294, + "grad_norm": 25651.31640625, + "learning_rate": 5.530000000000001e-06, + "loss": 12183.8578, + "step": 27650 + }, + { + "epoch": 0.05587495000343411, + "grad_norm": 31368.232421875, + "learning_rate": 5.5320000000000006e-06, + "loss": 5617.2688, + "step": 27660 + }, + { + "epoch": 0.05589515063611792, + "grad_norm": 10501.0380859375, + "learning_rate": 5.534000000000001e-06, + "loss": 12965.743, + "step": 27670 + }, + { + "epoch": 0.05591535126880174, + "grad_norm": 9305.162109375, + "learning_rate": 5.536e-06, + "loss": 13713.3891, + "step": 27680 + }, + { + "epoch": 0.05593555190148555, + "grad_norm": 63809.9140625, + "learning_rate": 5.538e-06, + "loss": 8682.568, + "step": 27690 + }, + { + "epoch": 0.05595575253416937, + "grad_norm": 13427.4365234375, + "learning_rate": 5.540000000000001e-06, + "loss": 16566.4297, + "step": 27700 + }, + { + "epoch": 0.055975953166853186, + "grad_norm": 6614.44873046875, + "learning_rate": 5.5420000000000005e-06, + "loss": 8618.6406, + "step": 27710 + }, + { + "epoch": 0.055996153799537, + "grad_norm": 40951.2734375, + "learning_rate": 5.544000000000001e-06, + "loss": 8182.4711, + "step": 27720 + }, + { + "epoch": 0.05601635443222082, + "grad_norm": 12698.2685546875, + "learning_rate": 5.546e-06, + "loss": 6473.3023, + "step": 27730 + }, + { + "epoch": 0.05603655506490463, + "grad_norm": 7247.17138671875, + "learning_rate": 5.548e-06, + "loss": 15756.3516, + "step": 27740 + }, + { + "epoch": 0.056056755697588445, + "grad_norm": 158511.546875, + "learning_rate": 5.550000000000001e-06, + "loss": 9079.2891, + "step": 27750 + }, + { + "epoch": 0.056076956330272265, + "grad_norm": 82452.203125, + "learning_rate": 5.552e-06, + "loss": 12825.6023, + "step": 27760 + }, + { + "epoch": 0.05609715696295608, + "grad_norm": 893.5892944335938, + "learning_rate": 5.5540000000000005e-06, + "loss": 6573.2563, + "step": 27770 + }, + { + "epoch": 0.0561173575956399, + "grad_norm": 6051.7021484375, + "learning_rate": 5.556000000000001e-06, + "loss": 8995.2539, + "step": 27780 + }, + { + "epoch": 0.05613755822832371, + "grad_norm": 4455.888671875, + "learning_rate": 5.558e-06, + "loss": 6180.9023, + "step": 27790 + }, + { + "epoch": 0.056157758861007524, + "grad_norm": 5886.0625, + "learning_rate": 5.560000000000001e-06, + "loss": 19095.8937, + "step": 27800 + }, + { + "epoch": 0.056177959493691344, + "grad_norm": 5853.09716796875, + "learning_rate": 5.562000000000001e-06, + "loss": 4366.3438, + "step": 27810 + }, + { + "epoch": 0.05619816012637516, + "grad_norm": 7316.79296875, + "learning_rate": 5.5640000000000004e-06, + "loss": 16702.3109, + "step": 27820 + }, + { + "epoch": 0.05621836075905897, + "grad_norm": 144745.390625, + "learning_rate": 5.566000000000001e-06, + "loss": 11352.1016, + "step": 27830 + }, + { + "epoch": 0.05623856139174279, + "grad_norm": 536.3584594726562, + "learning_rate": 5.568e-06, + "loss": 10636.25, + "step": 27840 + }, + { + "epoch": 0.0562587620244266, + "grad_norm": 26078.76171875, + "learning_rate": 5.570000000000001e-06, + "loss": 9579.5375, + "step": 27850 + }, + { + "epoch": 0.05627896265711042, + "grad_norm": 43643.83984375, + "learning_rate": 5.572000000000001e-06, + "loss": 12236.9625, + "step": 27860 + }, + { + "epoch": 0.056299163289794237, + "grad_norm": 81969.859375, + "learning_rate": 5.574e-06, + "loss": 16124.1531, + "step": 27870 + }, + { + "epoch": 0.05631936392247805, + "grad_norm": 20493.609375, + "learning_rate": 5.5760000000000005e-06, + "loss": 10919.5094, + "step": 27880 + }, + { + "epoch": 0.05633956455516187, + "grad_norm": 58822.37890625, + "learning_rate": 5.578e-06, + "loss": 19013.7719, + "step": 27890 + }, + { + "epoch": 0.05635976518784568, + "grad_norm": 9183.201171875, + "learning_rate": 5.580000000000001e-06, + "loss": 7865.607, + "step": 27900 + }, + { + "epoch": 0.056379965820529496, + "grad_norm": 27151.65625, + "learning_rate": 5.582000000000001e-06, + "loss": 12329.1812, + "step": 27910 + }, + { + "epoch": 0.056400166453213316, + "grad_norm": 4192.90869140625, + "learning_rate": 5.584e-06, + "loss": 9058.1695, + "step": 27920 + }, + { + "epoch": 0.05642036708589713, + "grad_norm": 9118.126953125, + "learning_rate": 5.586e-06, + "loss": 6672.8648, + "step": 27930 + }, + { + "epoch": 0.05644056771858095, + "grad_norm": 3078.388427734375, + "learning_rate": 5.588e-06, + "loss": 5753.4648, + "step": 27940 + }, + { + "epoch": 0.05646076835126476, + "grad_norm": 757.169677734375, + "learning_rate": 5.590000000000001e-06, + "loss": 18311.9891, + "step": 27950 + }, + { + "epoch": 0.056480968983948575, + "grad_norm": 105974.109375, + "learning_rate": 5.592000000000001e-06, + "loss": 6642.6859, + "step": 27960 + }, + { + "epoch": 0.056501169616632395, + "grad_norm": 46332.61328125, + "learning_rate": 5.594e-06, + "loss": 10905.3414, + "step": 27970 + }, + { + "epoch": 0.05652137024931621, + "grad_norm": 1389.7501220703125, + "learning_rate": 5.596e-06, + "loss": 7881.8578, + "step": 27980 + }, + { + "epoch": 0.05654157088200002, + "grad_norm": 11118.5048828125, + "learning_rate": 5.5980000000000004e-06, + "loss": 12743.1555, + "step": 27990 + }, + { + "epoch": 0.05656177151468384, + "grad_norm": 3395.685546875, + "learning_rate": 5.600000000000001e-06, + "loss": 7874.3148, + "step": 28000 + }, + { + "epoch": 0.056581972147367654, + "grad_norm": 156606.46875, + "learning_rate": 5.602000000000001e-06, + "loss": 15454.5766, + "step": 28010 + }, + { + "epoch": 0.056602172780051474, + "grad_norm": 69784.328125, + "learning_rate": 5.604000000000001e-06, + "loss": 17952.5312, + "step": 28020 + }, + { + "epoch": 0.05662237341273529, + "grad_norm": 47946.08984375, + "learning_rate": 5.606e-06, + "loss": 17389.9422, + "step": 28030 + }, + { + "epoch": 0.0566425740454191, + "grad_norm": 62600.05859375, + "learning_rate": 5.608e-06, + "loss": 16762.3703, + "step": 28040 + }, + { + "epoch": 0.05666277467810292, + "grad_norm": 34098.8984375, + "learning_rate": 5.610000000000001e-06, + "loss": 11340.9641, + "step": 28050 + }, + { + "epoch": 0.05668297531078673, + "grad_norm": 7392.97705078125, + "learning_rate": 5.612000000000001e-06, + "loss": 9280.1297, + "step": 28060 + }, + { + "epoch": 0.056703175943470546, + "grad_norm": 14520.298828125, + "learning_rate": 5.614000000000001e-06, + "loss": 9583.4055, + "step": 28070 + }, + { + "epoch": 0.056723376576154366, + "grad_norm": 33571.74609375, + "learning_rate": 5.616e-06, + "loss": 12572.6109, + "step": 28080 + }, + { + "epoch": 0.05674357720883818, + "grad_norm": 1115.7999267578125, + "learning_rate": 5.618e-06, + "loss": 11840.7047, + "step": 28090 + }, + { + "epoch": 0.056763777841522, + "grad_norm": 14522.8447265625, + "learning_rate": 5.620000000000001e-06, + "loss": 8705.1547, + "step": 28100 + }, + { + "epoch": 0.05678397847420581, + "grad_norm": 530.9173583984375, + "learning_rate": 5.6220000000000006e-06, + "loss": 10181.6063, + "step": 28110 + }, + { + "epoch": 0.056804179106889625, + "grad_norm": 43706.0, + "learning_rate": 5.624000000000001e-06, + "loss": 10967.2617, + "step": 28120 + }, + { + "epoch": 0.056824379739573445, + "grad_norm": 14791.74609375, + "learning_rate": 5.626e-06, + "loss": 26876.8594, + "step": 28130 + }, + { + "epoch": 0.05684458037225726, + "grad_norm": 142858.453125, + "learning_rate": 5.628e-06, + "loss": 11308.1914, + "step": 28140 + }, + { + "epoch": 0.05686478100494107, + "grad_norm": 18258.07421875, + "learning_rate": 5.63e-06, + "loss": 7433.4336, + "step": 28150 + }, + { + "epoch": 0.05688498163762489, + "grad_norm": 3315.376953125, + "learning_rate": 5.6320000000000005e-06, + "loss": 4704.9195, + "step": 28160 + }, + { + "epoch": 0.056905182270308705, + "grad_norm": 1341.24609375, + "learning_rate": 5.634000000000001e-06, + "loss": 12062.9977, + "step": 28170 + }, + { + "epoch": 0.056925382902992525, + "grad_norm": 188742.84375, + "learning_rate": 5.636000000000001e-06, + "loss": 6111.2379, + "step": 28180 + }, + { + "epoch": 0.05694558353567634, + "grad_norm": 65583.359375, + "learning_rate": 5.638e-06, + "loss": 21686.8516, + "step": 28190 + }, + { + "epoch": 0.05696578416836015, + "grad_norm": 90649.0234375, + "learning_rate": 5.64e-06, + "loss": 16611.9938, + "step": 28200 + }, + { + "epoch": 0.05698598480104397, + "grad_norm": 19197.40625, + "learning_rate": 5.642000000000001e-06, + "loss": 6872.6062, + "step": 28210 + }, + { + "epoch": 0.057006185433727784, + "grad_norm": 339172.9375, + "learning_rate": 5.6440000000000005e-06, + "loss": 19929.4203, + "step": 28220 + }, + { + "epoch": 0.0570263860664116, + "grad_norm": 36265.70703125, + "learning_rate": 5.646000000000001e-06, + "loss": 9304.9688, + "step": 28230 + }, + { + "epoch": 0.05704658669909542, + "grad_norm": 9800.3212890625, + "learning_rate": 5.648e-06, + "loss": 9176.0766, + "step": 28240 + }, + { + "epoch": 0.05706678733177923, + "grad_norm": 15868.2021484375, + "learning_rate": 5.65e-06, + "loss": 20485.1125, + "step": 28250 + }, + { + "epoch": 0.05708698796446305, + "grad_norm": 40005.59765625, + "learning_rate": 5.652000000000001e-06, + "loss": 10188.8719, + "step": 28260 + }, + { + "epoch": 0.05710718859714686, + "grad_norm": 87482.84375, + "learning_rate": 5.654e-06, + "loss": 16066.4375, + "step": 28270 + }, + { + "epoch": 0.057127389229830676, + "grad_norm": 146168.5625, + "learning_rate": 5.6560000000000006e-06, + "loss": 11500.2266, + "step": 28280 + }, + { + "epoch": 0.057147589862514496, + "grad_norm": 3067.65625, + "learning_rate": 5.658e-06, + "loss": 6658.4125, + "step": 28290 + }, + { + "epoch": 0.05716779049519831, + "grad_norm": 2367.560546875, + "learning_rate": 5.66e-06, + "loss": 9180.1656, + "step": 28300 + }, + { + "epoch": 0.05718799112788212, + "grad_norm": 402.2611999511719, + "learning_rate": 5.662000000000001e-06, + "loss": 9228.5359, + "step": 28310 + }, + { + "epoch": 0.05720819176056594, + "grad_norm": 2984.81591796875, + "learning_rate": 5.664e-06, + "loss": 2486.0828, + "step": 28320 + }, + { + "epoch": 0.057228392393249755, + "grad_norm": 155814.359375, + "learning_rate": 5.6660000000000005e-06, + "loss": 12978.6297, + "step": 28330 + }, + { + "epoch": 0.057248593025933575, + "grad_norm": 164361.234375, + "learning_rate": 5.668e-06, + "loss": 12547.3023, + "step": 28340 + }, + { + "epoch": 0.05726879365861739, + "grad_norm": 6704.34912109375, + "learning_rate": 5.67e-06, + "loss": 10641.2203, + "step": 28350 + }, + { + "epoch": 0.0572889942913012, + "grad_norm": 14143.2060546875, + "learning_rate": 5.672000000000001e-06, + "loss": 8154.0336, + "step": 28360 + }, + { + "epoch": 0.05730919492398502, + "grad_norm": 157984.625, + "learning_rate": 5.674e-06, + "loss": 18522.2219, + "step": 28370 + }, + { + "epoch": 0.057329395556668834, + "grad_norm": 13677.6650390625, + "learning_rate": 5.676e-06, + "loss": 8508.7883, + "step": 28380 + }, + { + "epoch": 0.05734959618935265, + "grad_norm": 6906.0205078125, + "learning_rate": 5.6780000000000005e-06, + "loss": 6938.3773, + "step": 28390 + }, + { + "epoch": 0.05736979682203647, + "grad_norm": 13612.5263671875, + "learning_rate": 5.68e-06, + "loss": 15767.1844, + "step": 28400 + }, + { + "epoch": 0.05738999745472028, + "grad_norm": 2102.100830078125, + "learning_rate": 5.682000000000001e-06, + "loss": 2763.7225, + "step": 28410 + }, + { + "epoch": 0.0574101980874041, + "grad_norm": 3770.612548828125, + "learning_rate": 5.684000000000001e-06, + "loss": 17804.7469, + "step": 28420 + }, + { + "epoch": 0.05743039872008791, + "grad_norm": 6073.357421875, + "learning_rate": 5.686e-06, + "loss": 14523.7219, + "step": 28430 + }, + { + "epoch": 0.057450599352771727, + "grad_norm": 28966.5546875, + "learning_rate": 5.6880000000000004e-06, + "loss": 5070.8617, + "step": 28440 + }, + { + "epoch": 0.057470799985455547, + "grad_norm": 750.9326782226562, + "learning_rate": 5.69e-06, + "loss": 6645.4406, + "step": 28450 + }, + { + "epoch": 0.05749100061813936, + "grad_norm": 47259.23046875, + "learning_rate": 5.692000000000001e-06, + "loss": 14015.8359, + "step": 28460 + }, + { + "epoch": 0.05751120125082317, + "grad_norm": 6408.39208984375, + "learning_rate": 5.694000000000001e-06, + "loss": 14969.8094, + "step": 28470 + }, + { + "epoch": 0.05753140188350699, + "grad_norm": 28572.10546875, + "learning_rate": 5.696e-06, + "loss": 12844.7586, + "step": 28480 + }, + { + "epoch": 0.057551602516190806, + "grad_norm": 0.0, + "learning_rate": 5.698e-06, + "loss": 14674.8078, + "step": 28490 + }, + { + "epoch": 0.057571803148874626, + "grad_norm": 83855.3828125, + "learning_rate": 5.7e-06, + "loss": 14140.0828, + "step": 28500 + }, + { + "epoch": 0.05759200378155844, + "grad_norm": 3104.73193359375, + "learning_rate": 5.702000000000001e-06, + "loss": 9647.3641, + "step": 28510 + }, + { + "epoch": 0.05761220441424225, + "grad_norm": 17360.78515625, + "learning_rate": 5.704000000000001e-06, + "loss": 7807.5453, + "step": 28520 + }, + { + "epoch": 0.05763240504692607, + "grad_norm": 16160.08984375, + "learning_rate": 5.706e-06, + "loss": 10629.3609, + "step": 28530 + }, + { + "epoch": 0.057652605679609885, + "grad_norm": 2821.56298828125, + "learning_rate": 5.708e-06, + "loss": 12423.6211, + "step": 28540 + }, + { + "epoch": 0.0576728063122937, + "grad_norm": 5042.34619140625, + "learning_rate": 5.71e-06, + "loss": 17660.4297, + "step": 28550 + }, + { + "epoch": 0.05769300694497752, + "grad_norm": 38469.125, + "learning_rate": 5.7120000000000005e-06, + "loss": 19859.2578, + "step": 28560 + }, + { + "epoch": 0.05771320757766133, + "grad_norm": 3105.802734375, + "learning_rate": 5.714000000000001e-06, + "loss": 6636.7961, + "step": 28570 + }, + { + "epoch": 0.05773340821034515, + "grad_norm": 6278.72900390625, + "learning_rate": 5.716000000000001e-06, + "loss": 21150.0344, + "step": 28580 + }, + { + "epoch": 0.057753608843028964, + "grad_norm": 11622.953125, + "learning_rate": 5.718e-06, + "loss": 18166.0109, + "step": 28590 + }, + { + "epoch": 0.05777380947571278, + "grad_norm": 1068.9044189453125, + "learning_rate": 5.72e-06, + "loss": 14080.9344, + "step": 28600 + }, + { + "epoch": 0.0577940101083966, + "grad_norm": 43290.328125, + "learning_rate": 5.722000000000001e-06, + "loss": 8286.5453, + "step": 28610 + }, + { + "epoch": 0.05781421074108041, + "grad_norm": 101015.3359375, + "learning_rate": 5.724000000000001e-06, + "loss": 12264.2359, + "step": 28620 + }, + { + "epoch": 0.05783441137376422, + "grad_norm": 1453.2080078125, + "learning_rate": 5.726000000000001e-06, + "loss": 9708.8859, + "step": 28630 + }, + { + "epoch": 0.05785461200644804, + "grad_norm": 10891.08984375, + "learning_rate": 5.728e-06, + "loss": 6330.7246, + "step": 28640 + }, + { + "epoch": 0.057874812639131856, + "grad_norm": 25258.076171875, + "learning_rate": 5.73e-06, + "loss": 3951.709, + "step": 28650 + }, + { + "epoch": 0.057895013271815676, + "grad_norm": 45975.00390625, + "learning_rate": 5.732000000000001e-06, + "loss": 7809.3172, + "step": 28660 + }, + { + "epoch": 0.05791521390449949, + "grad_norm": 63887.3046875, + "learning_rate": 5.7340000000000005e-06, + "loss": 10808.3188, + "step": 28670 + }, + { + "epoch": 0.0579354145371833, + "grad_norm": 48920.53125, + "learning_rate": 5.736000000000001e-06, + "loss": 6105.1152, + "step": 28680 + }, + { + "epoch": 0.05795561516986712, + "grad_norm": 86082.7421875, + "learning_rate": 5.738e-06, + "loss": 9269.8922, + "step": 28690 + }, + { + "epoch": 0.057975815802550935, + "grad_norm": 6249.43359375, + "learning_rate": 5.74e-06, + "loss": 27333.2531, + "step": 28700 + }, + { + "epoch": 0.05799601643523475, + "grad_norm": 32886.2734375, + "learning_rate": 5.742000000000001e-06, + "loss": 11458.3523, + "step": 28710 + }, + { + "epoch": 0.05801621706791857, + "grad_norm": 15708.7294921875, + "learning_rate": 5.744e-06, + "loss": 8339.8523, + "step": 28720 + }, + { + "epoch": 0.05803641770060238, + "grad_norm": 2389.906982421875, + "learning_rate": 5.7460000000000006e-06, + "loss": 4924.4312, + "step": 28730 + }, + { + "epoch": 0.0580566183332862, + "grad_norm": 102686.171875, + "learning_rate": 5.748e-06, + "loss": 9520.3328, + "step": 28740 + }, + { + "epoch": 0.058076818965970015, + "grad_norm": 18586.380859375, + "learning_rate": 5.75e-06, + "loss": 15712.8656, + "step": 28750 + }, + { + "epoch": 0.05809701959865383, + "grad_norm": 20459.412109375, + "learning_rate": 5.752000000000001e-06, + "loss": 18824.3328, + "step": 28760 + }, + { + "epoch": 0.05811722023133765, + "grad_norm": 45200.56640625, + "learning_rate": 5.754e-06, + "loss": 11832.6773, + "step": 28770 + }, + { + "epoch": 0.05813742086402146, + "grad_norm": 163692.75, + "learning_rate": 5.7560000000000005e-06, + "loss": 17634.6813, + "step": 28780 + }, + { + "epoch": 0.058157621496705274, + "grad_norm": 22201.58203125, + "learning_rate": 5.758000000000001e-06, + "loss": 9760.0047, + "step": 28790 + }, + { + "epoch": 0.058177822129389094, + "grad_norm": 12238.3662109375, + "learning_rate": 5.76e-06, + "loss": 8181.8508, + "step": 28800 + }, + { + "epoch": 0.05819802276207291, + "grad_norm": 13844.0302734375, + "learning_rate": 5.762000000000001e-06, + "loss": 11262.0344, + "step": 28810 + }, + { + "epoch": 0.05821822339475673, + "grad_norm": 187271.421875, + "learning_rate": 5.764000000000001e-06, + "loss": 19048.3937, + "step": 28820 + }, + { + "epoch": 0.05823842402744054, + "grad_norm": 480.8028259277344, + "learning_rate": 5.766e-06, + "loss": 14647.8578, + "step": 28830 + }, + { + "epoch": 0.05825862466012435, + "grad_norm": 44683.23046875, + "learning_rate": 5.7680000000000005e-06, + "loss": 9023.8352, + "step": 28840 + }, + { + "epoch": 0.05827882529280817, + "grad_norm": 14583.185546875, + "learning_rate": 5.77e-06, + "loss": 9873.2938, + "step": 28850 + }, + { + "epoch": 0.058299025925491986, + "grad_norm": 4917.05908203125, + "learning_rate": 5.772000000000001e-06, + "loss": 4537.7293, + "step": 28860 + }, + { + "epoch": 0.0583192265581758, + "grad_norm": 155212.390625, + "learning_rate": 5.774000000000001e-06, + "loss": 13812.8719, + "step": 28870 + }, + { + "epoch": 0.05833942719085962, + "grad_norm": 5452.8828125, + "learning_rate": 5.776e-06, + "loss": 5844.1199, + "step": 28880 + }, + { + "epoch": 0.05835962782354343, + "grad_norm": 14921.134765625, + "learning_rate": 5.778e-06, + "loss": 10087.6812, + "step": 28890 + }, + { + "epoch": 0.05837982845622725, + "grad_norm": 30378.6796875, + "learning_rate": 5.78e-06, + "loss": 9551.3312, + "step": 28900 + }, + { + "epoch": 0.058400029088911065, + "grad_norm": 8531.9169921875, + "learning_rate": 5.782000000000001e-06, + "loss": 10531.6938, + "step": 28910 + }, + { + "epoch": 0.05842022972159488, + "grad_norm": 47647.6875, + "learning_rate": 5.784000000000001e-06, + "loss": 6746.7219, + "step": 28920 + }, + { + "epoch": 0.0584404303542787, + "grad_norm": 4484.02001953125, + "learning_rate": 5.786e-06, + "loss": 10743.2109, + "step": 28930 + }, + { + "epoch": 0.05846063098696251, + "grad_norm": 4885.7978515625, + "learning_rate": 5.788e-06, + "loss": 6737.082, + "step": 28940 + }, + { + "epoch": 0.058480831619646324, + "grad_norm": 5254.201171875, + "learning_rate": 5.7900000000000005e-06, + "loss": 23873.4516, + "step": 28950 + }, + { + "epoch": 0.058501032252330144, + "grad_norm": 15261.572265625, + "learning_rate": 5.792000000000001e-06, + "loss": 3895.791, + "step": 28960 + }, + { + "epoch": 0.05852123288501396, + "grad_norm": 7765.29443359375, + "learning_rate": 5.794000000000001e-06, + "loss": 5426.2105, + "step": 28970 + }, + { + "epoch": 0.05854143351769778, + "grad_norm": 2008.37451171875, + "learning_rate": 5.796000000000001e-06, + "loss": 6669.5664, + "step": 28980 + }, + { + "epoch": 0.05856163415038159, + "grad_norm": 73374.7265625, + "learning_rate": 5.798e-06, + "loss": 9491.0125, + "step": 28990 + }, + { + "epoch": 0.058581834783065403, + "grad_norm": 1439.518798828125, + "learning_rate": 5.8e-06, + "loss": 6526.9441, + "step": 29000 + }, + { + "epoch": 0.058602035415749223, + "grad_norm": 29996.302734375, + "learning_rate": 5.802000000000001e-06, + "loss": 4865.8672, + "step": 29010 + }, + { + "epoch": 0.05862223604843304, + "grad_norm": 27587.26953125, + "learning_rate": 5.804000000000001e-06, + "loss": 9334.9188, + "step": 29020 + }, + { + "epoch": 0.05864243668111685, + "grad_norm": 5223.10888671875, + "learning_rate": 5.806000000000001e-06, + "loss": 18822.4734, + "step": 29030 + }, + { + "epoch": 0.05866263731380067, + "grad_norm": 25982.986328125, + "learning_rate": 5.808e-06, + "loss": 12854.1414, + "step": 29040 + }, + { + "epoch": 0.05868283794648448, + "grad_norm": 150013.109375, + "learning_rate": 5.81e-06, + "loss": 18912.5922, + "step": 29050 + }, + { + "epoch": 0.0587030385791683, + "grad_norm": 8137.5478515625, + "learning_rate": 5.812000000000001e-06, + "loss": 9965.6305, + "step": 29060 + }, + { + "epoch": 0.058723239211852116, + "grad_norm": 15405.1875, + "learning_rate": 5.814000000000001e-06, + "loss": 6596.6031, + "step": 29070 + }, + { + "epoch": 0.05874343984453593, + "grad_norm": 4052.512939453125, + "learning_rate": 5.816000000000001e-06, + "loss": 5345.1434, + "step": 29080 + }, + { + "epoch": 0.05876364047721975, + "grad_norm": 4555.2666015625, + "learning_rate": 5.818e-06, + "loss": 4447.8699, + "step": 29090 + }, + { + "epoch": 0.05878384110990356, + "grad_norm": 23714.51171875, + "learning_rate": 5.82e-06, + "loss": 7258.65, + "step": 29100 + }, + { + "epoch": 0.058804041742587375, + "grad_norm": 102902.5546875, + "learning_rate": 5.822000000000001e-06, + "loss": 13054.6094, + "step": 29110 + }, + { + "epoch": 0.058824242375271195, + "grad_norm": 15945.5263671875, + "learning_rate": 5.8240000000000005e-06, + "loss": 11482.6656, + "step": 29120 + }, + { + "epoch": 0.05884444300795501, + "grad_norm": 16147.53515625, + "learning_rate": 5.826000000000001e-06, + "loss": 10537.7875, + "step": 29130 + }, + { + "epoch": 0.05886464364063883, + "grad_norm": 11056.1953125, + "learning_rate": 5.828e-06, + "loss": 5846.0898, + "step": 29140 + }, + { + "epoch": 0.05888484427332264, + "grad_norm": 64596.5703125, + "learning_rate": 5.83e-06, + "loss": 8500.7461, + "step": 29150 + }, + { + "epoch": 0.058905044906006454, + "grad_norm": 16083.0908203125, + "learning_rate": 5.832000000000001e-06, + "loss": 7536.9422, + "step": 29160 + }, + { + "epoch": 0.058925245538690274, + "grad_norm": 473.3346252441406, + "learning_rate": 5.834e-06, + "loss": 5002.5277, + "step": 29170 + }, + { + "epoch": 0.05894544617137409, + "grad_norm": 55760.7421875, + "learning_rate": 5.8360000000000005e-06, + "loss": 11132.0977, + "step": 29180 + }, + { + "epoch": 0.0589656468040579, + "grad_norm": 32037.818359375, + "learning_rate": 5.838000000000001e-06, + "loss": 14472.8156, + "step": 29190 + }, + { + "epoch": 0.05898584743674172, + "grad_norm": 157648.8125, + "learning_rate": 5.84e-06, + "loss": 22856.3234, + "step": 29200 + }, + { + "epoch": 0.05900604806942553, + "grad_norm": 215572.609375, + "learning_rate": 5.842000000000001e-06, + "loss": 14724.7094, + "step": 29210 + }, + { + "epoch": 0.05902624870210935, + "grad_norm": 11060.8564453125, + "learning_rate": 5.844000000000001e-06, + "loss": 8411.4422, + "step": 29220 + }, + { + "epoch": 0.059046449334793166, + "grad_norm": 67039.3203125, + "learning_rate": 5.8460000000000004e-06, + "loss": 12271.943, + "step": 29230 + }, + { + "epoch": 0.05906664996747698, + "grad_norm": 10056.5830078125, + "learning_rate": 5.848000000000001e-06, + "loss": 5796.1668, + "step": 29240 + }, + { + "epoch": 0.0590868506001608, + "grad_norm": 33523.66796875, + "learning_rate": 5.85e-06, + "loss": 5751.2121, + "step": 29250 + }, + { + "epoch": 0.05910705123284461, + "grad_norm": 4224.298828125, + "learning_rate": 5.852000000000001e-06, + "loss": 4806.0621, + "step": 29260 + }, + { + "epoch": 0.059127251865528425, + "grad_norm": 15489.693359375, + "learning_rate": 5.854000000000001e-06, + "loss": 13951.6984, + "step": 29270 + }, + { + "epoch": 0.059147452498212245, + "grad_norm": 42113.30078125, + "learning_rate": 5.856e-06, + "loss": 15633.0, + "step": 29280 + }, + { + "epoch": 0.05916765313089606, + "grad_norm": 32726.65625, + "learning_rate": 5.8580000000000005e-06, + "loss": 6679.732, + "step": 29290 + }, + { + "epoch": 0.05918785376357988, + "grad_norm": 19181.1640625, + "learning_rate": 5.86e-06, + "loss": 3453.993, + "step": 29300 + }, + { + "epoch": 0.05920805439626369, + "grad_norm": 19584.689453125, + "learning_rate": 5.862000000000001e-06, + "loss": 8990.343, + "step": 29310 + }, + { + "epoch": 0.059228255028947505, + "grad_norm": 2649.474853515625, + "learning_rate": 5.864000000000001e-06, + "loss": 15065.5031, + "step": 29320 + }, + { + "epoch": 0.059248455661631325, + "grad_norm": 16434.076171875, + "learning_rate": 5.866e-06, + "loss": 7926.9852, + "step": 29330 + }, + { + "epoch": 0.05926865629431514, + "grad_norm": 11068.5205078125, + "learning_rate": 5.868e-06, + "loss": 7417.9469, + "step": 29340 + }, + { + "epoch": 0.05928885692699895, + "grad_norm": 62523.17578125, + "learning_rate": 5.8700000000000005e-06, + "loss": 7319.8687, + "step": 29350 + }, + { + "epoch": 0.05930905755968277, + "grad_norm": 1052.485595703125, + "learning_rate": 5.872000000000001e-06, + "loss": 14949.4344, + "step": 29360 + }, + { + "epoch": 0.059329258192366584, + "grad_norm": 8051.16796875, + "learning_rate": 5.874000000000001e-06, + "loss": 8709.0961, + "step": 29370 + }, + { + "epoch": 0.059349458825050404, + "grad_norm": 38111.09765625, + "learning_rate": 5.876000000000001e-06, + "loss": 7294.1578, + "step": 29380 + }, + { + "epoch": 0.05936965945773422, + "grad_norm": 93941.1328125, + "learning_rate": 5.878e-06, + "loss": 9761.6203, + "step": 29390 + }, + { + "epoch": 0.05938986009041803, + "grad_norm": 9126.431640625, + "learning_rate": 5.8800000000000005e-06, + "loss": 21885.1031, + "step": 29400 + }, + { + "epoch": 0.05941006072310185, + "grad_norm": 6040.0537109375, + "learning_rate": 5.882e-06, + "loss": 8649.2367, + "step": 29410 + }, + { + "epoch": 0.05943026135578566, + "grad_norm": 6644.189453125, + "learning_rate": 5.884000000000001e-06, + "loss": 7749.3734, + "step": 29420 + }, + { + "epoch": 0.059450461988469476, + "grad_norm": 20806.224609375, + "learning_rate": 5.886000000000001e-06, + "loss": 9778.3742, + "step": 29430 + }, + { + "epoch": 0.059470662621153296, + "grad_norm": 129407.2265625, + "learning_rate": 5.888e-06, + "loss": 9554.7906, + "step": 29440 + }, + { + "epoch": 0.05949086325383711, + "grad_norm": 83233.140625, + "learning_rate": 5.89e-06, + "loss": 10268.5805, + "step": 29450 + }, + { + "epoch": 0.05951106388652093, + "grad_norm": 44841.234375, + "learning_rate": 5.892e-06, + "loss": 7426.5672, + "step": 29460 + }, + { + "epoch": 0.05953126451920474, + "grad_norm": 16850.14453125, + "learning_rate": 5.894000000000001e-06, + "loss": 4439.2055, + "step": 29470 + }, + { + "epoch": 0.059551465151888555, + "grad_norm": 104194.3828125, + "learning_rate": 5.896000000000001e-06, + "loss": 13667.6094, + "step": 29480 + }, + { + "epoch": 0.059571665784572375, + "grad_norm": 62842.47265625, + "learning_rate": 5.898e-06, + "loss": 4652.3359, + "step": 29490 + }, + { + "epoch": 0.05959186641725619, + "grad_norm": 2350.042724609375, + "learning_rate": 5.9e-06, + "loss": 10318.1719, + "step": 29500 + }, + { + "epoch": 0.05961206704994, + "grad_norm": 333.65576171875, + "learning_rate": 5.9019999999999996e-06, + "loss": 7636.3602, + "step": 29510 + }, + { + "epoch": 0.05963226768262382, + "grad_norm": 83266.5546875, + "learning_rate": 5.9040000000000006e-06, + "loss": 10226.0797, + "step": 29520 + }, + { + "epoch": 0.059652468315307634, + "grad_norm": 266466.5625, + "learning_rate": 5.906000000000001e-06, + "loss": 14591.0656, + "step": 29530 + }, + { + "epoch": 0.059672668947991454, + "grad_norm": 12760.2021484375, + "learning_rate": 5.908e-06, + "loss": 9911.0234, + "step": 29540 + }, + { + "epoch": 0.05969286958067527, + "grad_norm": 3912.283203125, + "learning_rate": 5.91e-06, + "loss": 10168.0039, + "step": 29550 + }, + { + "epoch": 0.05971307021335908, + "grad_norm": 9604.8330078125, + "learning_rate": 5.912e-06, + "loss": 3998.9406, + "step": 29560 + }, + { + "epoch": 0.0597332708460429, + "grad_norm": 396.7561340332031, + "learning_rate": 5.9140000000000005e-06, + "loss": 5060.6117, + "step": 29570 + }, + { + "epoch": 0.059753471478726713, + "grad_norm": 147245.265625, + "learning_rate": 5.916000000000001e-06, + "loss": 18748.8219, + "step": 29580 + }, + { + "epoch": 0.05977367211141053, + "grad_norm": 236466.71875, + "learning_rate": 5.918000000000001e-06, + "loss": 10666.1703, + "step": 29590 + }, + { + "epoch": 0.05979387274409435, + "grad_norm": 33456.45703125, + "learning_rate": 5.92e-06, + "loss": 20188.9234, + "step": 29600 + }, + { + "epoch": 0.05981407337677816, + "grad_norm": 67723.546875, + "learning_rate": 5.922e-06, + "loss": 7220.7836, + "step": 29610 + }, + { + "epoch": 0.05983427400946198, + "grad_norm": 466.14080810546875, + "learning_rate": 5.924000000000001e-06, + "loss": 8630.2797, + "step": 29620 + }, + { + "epoch": 0.05985447464214579, + "grad_norm": 69880.7109375, + "learning_rate": 5.9260000000000005e-06, + "loss": 25373.9859, + "step": 29630 + }, + { + "epoch": 0.059874675274829606, + "grad_norm": 4367.87548828125, + "learning_rate": 5.928000000000001e-06, + "loss": 5742.6402, + "step": 29640 + }, + { + "epoch": 0.059894875907513426, + "grad_norm": 12566.7431640625, + "learning_rate": 5.93e-06, + "loss": 7648.3852, + "step": 29650 + }, + { + "epoch": 0.05991507654019724, + "grad_norm": 19220.78515625, + "learning_rate": 5.932e-06, + "loss": 5966.2652, + "step": 29660 + }, + { + "epoch": 0.05993527717288105, + "grad_norm": 3410.995361328125, + "learning_rate": 5.934000000000001e-06, + "loss": 9920.5641, + "step": 29670 + }, + { + "epoch": 0.05995547780556487, + "grad_norm": 1840.031005859375, + "learning_rate": 5.9360000000000004e-06, + "loss": 8812.5508, + "step": 29680 + }, + { + "epoch": 0.059975678438248685, + "grad_norm": 3646.684814453125, + "learning_rate": 5.9380000000000006e-06, + "loss": 13569.5891, + "step": 29690 + }, + { + "epoch": 0.059995879070932505, + "grad_norm": 2823.355712890625, + "learning_rate": 5.94e-06, + "loss": 6736.5078, + "step": 29700 + }, + { + "epoch": 0.06001607970361632, + "grad_norm": 56468.66796875, + "learning_rate": 5.942e-06, + "loss": 8886.6922, + "step": 29710 + }, + { + "epoch": 0.06003628033630013, + "grad_norm": 296880.34375, + "learning_rate": 5.944000000000001e-06, + "loss": 12584.7789, + "step": 29720 + }, + { + "epoch": 0.06005648096898395, + "grad_norm": 217294.046875, + "learning_rate": 5.946e-06, + "loss": 11195.7203, + "step": 29730 + }, + { + "epoch": 0.060076681601667764, + "grad_norm": 5143.6103515625, + "learning_rate": 5.9480000000000005e-06, + "loss": 9527.768, + "step": 29740 + }, + { + "epoch": 0.06009688223435158, + "grad_norm": 6219.7060546875, + "learning_rate": 5.950000000000001e-06, + "loss": 11549.2414, + "step": 29750 + }, + { + "epoch": 0.0601170828670354, + "grad_norm": 764.927978515625, + "learning_rate": 5.952e-06, + "loss": 7024.0016, + "step": 29760 + }, + { + "epoch": 0.06013728349971921, + "grad_norm": 40887.1484375, + "learning_rate": 5.954000000000001e-06, + "loss": 21363.0219, + "step": 29770 + }, + { + "epoch": 0.06015748413240303, + "grad_norm": 5481.09765625, + "learning_rate": 5.956000000000001e-06, + "loss": 17849.4, + "step": 29780 + }, + { + "epoch": 0.06017768476508684, + "grad_norm": 51921.71875, + "learning_rate": 5.958e-06, + "loss": 9812.8211, + "step": 29790 + }, + { + "epoch": 0.060197885397770656, + "grad_norm": 1000.5336303710938, + "learning_rate": 5.9600000000000005e-06, + "loss": 14015.8797, + "step": 29800 + }, + { + "epoch": 0.060218086030454476, + "grad_norm": 572544.8125, + "learning_rate": 5.962e-06, + "loss": 18650.9609, + "step": 29810 + }, + { + "epoch": 0.06023828666313829, + "grad_norm": 217407.984375, + "learning_rate": 5.964000000000001e-06, + "loss": 16091.5656, + "step": 29820 + }, + { + "epoch": 0.0602584872958221, + "grad_norm": 53548.16796875, + "learning_rate": 5.966000000000001e-06, + "loss": 12345.0914, + "step": 29830 + }, + { + "epoch": 0.06027868792850592, + "grad_norm": 9956.8203125, + "learning_rate": 5.968e-06, + "loss": 19007.1813, + "step": 29840 + }, + { + "epoch": 0.060298888561189735, + "grad_norm": 2001.611083984375, + "learning_rate": 5.9700000000000004e-06, + "loss": 16090.2859, + "step": 29850 + }, + { + "epoch": 0.060319089193873555, + "grad_norm": 126491.6171875, + "learning_rate": 5.972e-06, + "loss": 18736.8859, + "step": 29860 + }, + { + "epoch": 0.06033928982655737, + "grad_norm": 41986.90234375, + "learning_rate": 5.974000000000001e-06, + "loss": 11878.9125, + "step": 29870 + }, + { + "epoch": 0.06035949045924118, + "grad_norm": 30953.96484375, + "learning_rate": 5.976000000000001e-06, + "loss": 4970.1848, + "step": 29880 + }, + { + "epoch": 0.060379691091925, + "grad_norm": 5955.70068359375, + "learning_rate": 5.978e-06, + "loss": 6874.4875, + "step": 29890 + }, + { + "epoch": 0.060399891724608815, + "grad_norm": 102293.6875, + "learning_rate": 5.98e-06, + "loss": 15487.9562, + "step": 29900 + }, + { + "epoch": 0.06042009235729263, + "grad_norm": 4223.3466796875, + "learning_rate": 5.982e-06, + "loss": 3512.9652, + "step": 29910 + }, + { + "epoch": 0.06044029298997645, + "grad_norm": 7908.607421875, + "learning_rate": 5.984000000000001e-06, + "loss": 4689.9695, + "step": 29920 + }, + { + "epoch": 0.06046049362266026, + "grad_norm": 12721.748046875, + "learning_rate": 5.986000000000001e-06, + "loss": 21140.7203, + "step": 29930 + }, + { + "epoch": 0.06048069425534408, + "grad_norm": 77007.046875, + "learning_rate": 5.988e-06, + "loss": 14470.2109, + "step": 29940 + }, + { + "epoch": 0.060500894888027894, + "grad_norm": 4106.439453125, + "learning_rate": 5.99e-06, + "loss": 6972.707, + "step": 29950 + }, + { + "epoch": 0.06052109552071171, + "grad_norm": 3080.09912109375, + "learning_rate": 5.992e-06, + "loss": 12310.4406, + "step": 29960 + }, + { + "epoch": 0.06054129615339553, + "grad_norm": 230.4069061279297, + "learning_rate": 5.9940000000000005e-06, + "loss": 5130.6945, + "step": 29970 + }, + { + "epoch": 0.06056149678607934, + "grad_norm": 2592.27099609375, + "learning_rate": 5.996000000000001e-06, + "loss": 5730.0742, + "step": 29980 + }, + { + "epoch": 0.06058169741876315, + "grad_norm": 75952.3203125, + "learning_rate": 5.998000000000001e-06, + "loss": 7974.1492, + "step": 29990 + }, + { + "epoch": 0.06060189805144697, + "grad_norm": 32726.4375, + "learning_rate": 6e-06, + "loss": 6285.7563, + "step": 30000 + }, + { + "epoch": 0.060622098684130786, + "grad_norm": 28149.23046875, + "learning_rate": 6.002e-06, + "loss": 10895.7508, + "step": 30010 + }, + { + "epoch": 0.060642299316814606, + "grad_norm": 8957.046875, + "learning_rate": 6.004000000000001e-06, + "loss": 5418.3391, + "step": 30020 + }, + { + "epoch": 0.06066249994949842, + "grad_norm": 1675.398681640625, + "learning_rate": 6.006000000000001e-06, + "loss": 9895.9836, + "step": 30030 + }, + { + "epoch": 0.06068270058218223, + "grad_norm": 99144.171875, + "learning_rate": 6.008000000000001e-06, + "loss": 7570.2188, + "step": 30040 + }, + { + "epoch": 0.06070290121486605, + "grad_norm": 33813.85546875, + "learning_rate": 6.01e-06, + "loss": 7681.8055, + "step": 30050 + }, + { + "epoch": 0.060723101847549865, + "grad_norm": 27887.962890625, + "learning_rate": 6.012e-06, + "loss": 8692.343, + "step": 30060 + }, + { + "epoch": 0.06074330248023368, + "grad_norm": 11991.1796875, + "learning_rate": 6.014000000000001e-06, + "loss": 9299.0461, + "step": 30070 + }, + { + "epoch": 0.0607635031129175, + "grad_norm": 3284.333984375, + "learning_rate": 6.0160000000000005e-06, + "loss": 5918.4766, + "step": 30080 + }, + { + "epoch": 0.06078370374560131, + "grad_norm": 96477.984375, + "learning_rate": 6.018000000000001e-06, + "loss": 21702.9906, + "step": 30090 + }, + { + "epoch": 0.06080390437828513, + "grad_norm": 97023.109375, + "learning_rate": 6.02e-06, + "loss": 16114.1141, + "step": 30100 + }, + { + "epoch": 0.060824105010968944, + "grad_norm": 39889.51953125, + "learning_rate": 6.022e-06, + "loss": 12206.75, + "step": 30110 + }, + { + "epoch": 0.06084430564365276, + "grad_norm": 1347.7962646484375, + "learning_rate": 6.024000000000001e-06, + "loss": 4697.1234, + "step": 30120 + }, + { + "epoch": 0.06086450627633658, + "grad_norm": 50676.9921875, + "learning_rate": 6.026e-06, + "loss": 20445.7422, + "step": 30130 + }, + { + "epoch": 0.06088470690902039, + "grad_norm": 9974.2919921875, + "learning_rate": 6.0280000000000006e-06, + "loss": 6899.7289, + "step": 30140 + }, + { + "epoch": 0.060904907541704204, + "grad_norm": 49334.78125, + "learning_rate": 6.030000000000001e-06, + "loss": 11143.6891, + "step": 30150 + }, + { + "epoch": 0.060925108174388024, + "grad_norm": 47180.8359375, + "learning_rate": 6.032e-06, + "loss": 8154.1656, + "step": 30160 + }, + { + "epoch": 0.06094530880707184, + "grad_norm": 55542.671875, + "learning_rate": 6.034000000000001e-06, + "loss": 8493.2203, + "step": 30170 + }, + { + "epoch": 0.06096550943975566, + "grad_norm": 74419.2109375, + "learning_rate": 6.036000000000001e-06, + "loss": 10637.2141, + "step": 30180 + }, + { + "epoch": 0.06098571007243947, + "grad_norm": 12066.935546875, + "learning_rate": 6.0380000000000005e-06, + "loss": 7345.3195, + "step": 30190 + }, + { + "epoch": 0.06100591070512328, + "grad_norm": 196318.4375, + "learning_rate": 6.040000000000001e-06, + "loss": 15834.9922, + "step": 30200 + }, + { + "epoch": 0.0610261113378071, + "grad_norm": 1399.07568359375, + "learning_rate": 6.042e-06, + "loss": 10801.3008, + "step": 30210 + }, + { + "epoch": 0.061046311970490916, + "grad_norm": 1702.5689697265625, + "learning_rate": 6.044000000000001e-06, + "loss": 15157.1344, + "step": 30220 + }, + { + "epoch": 0.06106651260317473, + "grad_norm": 19285.072265625, + "learning_rate": 6.046000000000001e-06, + "loss": 17658.8047, + "step": 30230 + }, + { + "epoch": 0.06108671323585855, + "grad_norm": 80920.546875, + "learning_rate": 6.048e-06, + "loss": 16092.0688, + "step": 30240 + }, + { + "epoch": 0.06110691386854236, + "grad_norm": 41002.97265625, + "learning_rate": 6.0500000000000005e-06, + "loss": 2918.693, + "step": 30250 + }, + { + "epoch": 0.06112711450122618, + "grad_norm": 14567.6572265625, + "learning_rate": 6.052e-06, + "loss": 7294.9609, + "step": 30260 + }, + { + "epoch": 0.061147315133909995, + "grad_norm": 168343.59375, + "learning_rate": 6.054000000000001e-06, + "loss": 15952.4375, + "step": 30270 + }, + { + "epoch": 0.06116751576659381, + "grad_norm": 29947.7109375, + "learning_rate": 6.056000000000001e-06, + "loss": 4521.8379, + "step": 30280 + }, + { + "epoch": 0.06118771639927763, + "grad_norm": 58549.65625, + "learning_rate": 6.058e-06, + "loss": 11695.9094, + "step": 30290 + }, + { + "epoch": 0.06120791703196144, + "grad_norm": 7420.24951171875, + "learning_rate": 6.0600000000000004e-06, + "loss": 27268.0469, + "step": 30300 + }, + { + "epoch": 0.061228117664645254, + "grad_norm": 56127.33984375, + "learning_rate": 6.062e-06, + "loss": 14908.4734, + "step": 30310 + }, + { + "epoch": 0.061248318297329074, + "grad_norm": 29415.07421875, + "learning_rate": 6.064000000000001e-06, + "loss": 9589.1969, + "step": 30320 + }, + { + "epoch": 0.06126851893001289, + "grad_norm": 21590.3046875, + "learning_rate": 6.066000000000001e-06, + "loss": 8714.5258, + "step": 30330 + }, + { + "epoch": 0.06128871956269671, + "grad_norm": 72871.09375, + "learning_rate": 6.068e-06, + "loss": 13606.1219, + "step": 30340 + }, + { + "epoch": 0.06130892019538052, + "grad_norm": 902.5238037109375, + "learning_rate": 6.07e-06, + "loss": 19175.8375, + "step": 30350 + }, + { + "epoch": 0.06132912082806433, + "grad_norm": 96092.2578125, + "learning_rate": 6.0720000000000005e-06, + "loss": 15165.2547, + "step": 30360 + }, + { + "epoch": 0.06134932146074815, + "grad_norm": 8098.34375, + "learning_rate": 6.074000000000001e-06, + "loss": 18260.9109, + "step": 30370 + }, + { + "epoch": 0.061369522093431966, + "grad_norm": 36714.078125, + "learning_rate": 6.076000000000001e-06, + "loss": 5502.1187, + "step": 30380 + }, + { + "epoch": 0.06138972272611578, + "grad_norm": 77752.2421875, + "learning_rate": 6.078000000000001e-06, + "loss": 6635.007, + "step": 30390 + }, + { + "epoch": 0.0614099233587996, + "grad_norm": 22814.044921875, + "learning_rate": 6.08e-06, + "loss": 16723.0422, + "step": 30400 + }, + { + "epoch": 0.06143012399148341, + "grad_norm": 36990.84765625, + "learning_rate": 6.082e-06, + "loss": 6787.4445, + "step": 30410 + }, + { + "epoch": 0.061450324624167225, + "grad_norm": 9990.857421875, + "learning_rate": 6.084000000000001e-06, + "loss": 15315.6219, + "step": 30420 + }, + { + "epoch": 0.061470525256851045, + "grad_norm": 15932.29296875, + "learning_rate": 6.086000000000001e-06, + "loss": 5991.3121, + "step": 30430 + }, + { + "epoch": 0.06149072588953486, + "grad_norm": 45908.984375, + "learning_rate": 6.088000000000001e-06, + "loss": 10402.1703, + "step": 30440 + }, + { + "epoch": 0.06151092652221868, + "grad_norm": 0.0, + "learning_rate": 6.09e-06, + "loss": 3741.923, + "step": 30450 + }, + { + "epoch": 0.06153112715490249, + "grad_norm": 19986.931640625, + "learning_rate": 6.092e-06, + "loss": 2593.5457, + "step": 30460 + }, + { + "epoch": 0.061551327787586305, + "grad_norm": 10196.529296875, + "learning_rate": 6.094000000000001e-06, + "loss": 3691.7664, + "step": 30470 + }, + { + "epoch": 0.061571528420270125, + "grad_norm": 71311.0546875, + "learning_rate": 6.096000000000001e-06, + "loss": 4976.1617, + "step": 30480 + }, + { + "epoch": 0.06159172905295394, + "grad_norm": 4491.01123046875, + "learning_rate": 6.098000000000001e-06, + "loss": 16888.9688, + "step": 30490 + }, + { + "epoch": 0.06161192968563775, + "grad_norm": 21747.244140625, + "learning_rate": 6.1e-06, + "loss": 15575.7969, + "step": 30500 + }, + { + "epoch": 0.06163213031832157, + "grad_norm": 4187.56201171875, + "learning_rate": 6.102e-06, + "loss": 4629.702, + "step": 30510 + }, + { + "epoch": 0.061652330951005384, + "grad_norm": 4154.22265625, + "learning_rate": 6.104000000000001e-06, + "loss": 9254.2461, + "step": 30520 + }, + { + "epoch": 0.061672531583689204, + "grad_norm": 12699.2705078125, + "learning_rate": 6.1060000000000005e-06, + "loss": 29692.0438, + "step": 30530 + }, + { + "epoch": 0.06169273221637302, + "grad_norm": 199955.953125, + "learning_rate": 6.108000000000001e-06, + "loss": 36837.475, + "step": 30540 + }, + { + "epoch": 0.06171293284905683, + "grad_norm": 55126.42578125, + "learning_rate": 6.110000000000001e-06, + "loss": 12752.6797, + "step": 30550 + }, + { + "epoch": 0.06173313348174065, + "grad_norm": 51616.921875, + "learning_rate": 6.112e-06, + "loss": 12645.8656, + "step": 30560 + }, + { + "epoch": 0.06175333411442446, + "grad_norm": 2162.182861328125, + "learning_rate": 6.114000000000001e-06, + "loss": 3526.3172, + "step": 30570 + }, + { + "epoch": 0.061773534747108276, + "grad_norm": 8108.44482421875, + "learning_rate": 6.116000000000001e-06, + "loss": 6395.2188, + "step": 30580 + }, + { + "epoch": 0.061793735379792096, + "grad_norm": 1182.5450439453125, + "learning_rate": 6.1180000000000005e-06, + "loss": 16838.5078, + "step": 30590 + }, + { + "epoch": 0.06181393601247591, + "grad_norm": 249764.609375, + "learning_rate": 6.120000000000001e-06, + "loss": 9435.4969, + "step": 30600 + }, + { + "epoch": 0.06183413664515973, + "grad_norm": 1647.3497314453125, + "learning_rate": 6.122e-06, + "loss": 7598.1852, + "step": 30610 + }, + { + "epoch": 0.06185433727784354, + "grad_norm": 9259.3662109375, + "learning_rate": 6.124000000000001e-06, + "loss": 11129.118, + "step": 30620 + }, + { + "epoch": 0.061874537910527355, + "grad_norm": 887.16748046875, + "learning_rate": 6.126000000000001e-06, + "loss": 11093.7789, + "step": 30630 + }, + { + "epoch": 0.061894738543211175, + "grad_norm": 84356.640625, + "learning_rate": 6.1280000000000005e-06, + "loss": 9442.6453, + "step": 30640 + }, + { + "epoch": 0.06191493917589499, + "grad_norm": 34474.57421875, + "learning_rate": 6.130000000000001e-06, + "loss": 5587.3715, + "step": 30650 + }, + { + "epoch": 0.0619351398085788, + "grad_norm": 149263.6875, + "learning_rate": 6.132e-06, + "loss": 8510.4328, + "step": 30660 + }, + { + "epoch": 0.06195534044126262, + "grad_norm": 3109.910400390625, + "learning_rate": 6.134e-06, + "loss": 10406.8844, + "step": 30670 + }, + { + "epoch": 0.061975541073946434, + "grad_norm": 21770.0390625, + "learning_rate": 6.136000000000001e-06, + "loss": 14411.5953, + "step": 30680 + }, + { + "epoch": 0.061995741706630254, + "grad_norm": 244487.96875, + "learning_rate": 6.138e-06, + "loss": 13835.0484, + "step": 30690 + }, + { + "epoch": 0.06201594233931407, + "grad_norm": 3972.576171875, + "learning_rate": 6.1400000000000005e-06, + "loss": 17308.2109, + "step": 30700 + }, + { + "epoch": 0.06203614297199788, + "grad_norm": 93588.765625, + "learning_rate": 6.142e-06, + "loss": 9225.0617, + "step": 30710 + }, + { + "epoch": 0.0620563436046817, + "grad_norm": 15148.3076171875, + "learning_rate": 6.144e-06, + "loss": 5393.8988, + "step": 30720 + }, + { + "epoch": 0.062076544237365514, + "grad_norm": 246710.75, + "learning_rate": 6.146000000000001e-06, + "loss": 12994.9836, + "step": 30730 + }, + { + "epoch": 0.06209674487004933, + "grad_norm": 42203.875, + "learning_rate": 6.148e-06, + "loss": 13767.0781, + "step": 30740 + }, + { + "epoch": 0.06211694550273315, + "grad_norm": 1078.0435791015625, + "learning_rate": 6.15e-06, + "loss": 8143.6313, + "step": 30750 + }, + { + "epoch": 0.06213714613541696, + "grad_norm": 2122.34765625, + "learning_rate": 6.1520000000000006e-06, + "loss": 8265.3719, + "step": 30760 + }, + { + "epoch": 0.06215734676810078, + "grad_norm": 29899.69921875, + "learning_rate": 6.154e-06, + "loss": 13117.15, + "step": 30770 + }, + { + "epoch": 0.06217754740078459, + "grad_norm": 72520.9140625, + "learning_rate": 6.156000000000001e-06, + "loss": 10980.9305, + "step": 30780 + }, + { + "epoch": 0.062197748033468406, + "grad_norm": 2055.08544921875, + "learning_rate": 6.158000000000001e-06, + "loss": 9423.8789, + "step": 30790 + }, + { + "epoch": 0.062217948666152226, + "grad_norm": 110960.0625, + "learning_rate": 6.16e-06, + "loss": 13540.5328, + "step": 30800 + }, + { + "epoch": 0.06223814929883604, + "grad_norm": 32071.736328125, + "learning_rate": 6.1620000000000005e-06, + "loss": 8513.8781, + "step": 30810 + }, + { + "epoch": 0.06225834993151985, + "grad_norm": 6868.4189453125, + "learning_rate": 6.164e-06, + "loss": 14266.8813, + "step": 30820 + }, + { + "epoch": 0.06227855056420367, + "grad_norm": 307492.71875, + "learning_rate": 6.166000000000001e-06, + "loss": 17432.5187, + "step": 30830 + }, + { + "epoch": 0.062298751196887485, + "grad_norm": 28964.787109375, + "learning_rate": 6.168000000000001e-06, + "loss": 4523.7922, + "step": 30840 + }, + { + "epoch": 0.062318951829571305, + "grad_norm": 2724.1474609375, + "learning_rate": 6.17e-06, + "loss": 6072.1391, + "step": 30850 + }, + { + "epoch": 0.06233915246225512, + "grad_norm": 389.2127380371094, + "learning_rate": 6.172e-06, + "loss": 13548.575, + "step": 30860 + }, + { + "epoch": 0.06235935309493893, + "grad_norm": 12928.3056640625, + "learning_rate": 6.174e-06, + "loss": 8361.6711, + "step": 30870 + }, + { + "epoch": 0.06237955372762275, + "grad_norm": 357405.8125, + "learning_rate": 6.176000000000001e-06, + "loss": 30529.9094, + "step": 30880 + }, + { + "epoch": 0.062399754360306564, + "grad_norm": 53073.578125, + "learning_rate": 6.178000000000001e-06, + "loss": 5991.8711, + "step": 30890 + }, + { + "epoch": 0.06241995499299038, + "grad_norm": 2504.8955078125, + "learning_rate": 6.18e-06, + "loss": 12826.0758, + "step": 30900 + }, + { + "epoch": 0.0624401556256742, + "grad_norm": 22477.505859375, + "learning_rate": 6.182e-06, + "loss": 7477.1367, + "step": 30910 + }, + { + "epoch": 0.06246035625835801, + "grad_norm": 41166.3203125, + "learning_rate": 6.184e-06, + "loss": 4131.0781, + "step": 30920 + }, + { + "epoch": 0.06248055689104183, + "grad_norm": 80170.6953125, + "learning_rate": 6.1860000000000006e-06, + "loss": 23117.9344, + "step": 30930 + }, + { + "epoch": 0.06250075752372564, + "grad_norm": 1091.955810546875, + "learning_rate": 6.188000000000001e-06, + "loss": 6475.7199, + "step": 30940 + }, + { + "epoch": 0.06252095815640946, + "grad_norm": 10835.8388671875, + "learning_rate": 6.190000000000001e-06, + "loss": 8552.5461, + "step": 30950 + }, + { + "epoch": 0.06254115878909328, + "grad_norm": 49303.25, + "learning_rate": 6.192e-06, + "loss": 3895.0711, + "step": 30960 + }, + { + "epoch": 0.0625613594217771, + "grad_norm": 5113.6982421875, + "learning_rate": 6.194e-06, + "loss": 9269.9453, + "step": 30970 + }, + { + "epoch": 0.0625815600544609, + "grad_norm": 68544.921875, + "learning_rate": 6.196000000000001e-06, + "loss": 12770.3273, + "step": 30980 + }, + { + "epoch": 0.06260176068714472, + "grad_norm": 32242.39453125, + "learning_rate": 6.198000000000001e-06, + "loss": 8841.3438, + "step": 30990 + }, + { + "epoch": 0.06262196131982854, + "grad_norm": 1193.951904296875, + "learning_rate": 6.200000000000001e-06, + "loss": 12684.0094, + "step": 31000 + }, + { + "epoch": 0.06264216195251235, + "grad_norm": 774.8402709960938, + "learning_rate": 6.202e-06, + "loss": 6348.9121, + "step": 31010 + }, + { + "epoch": 0.06266236258519617, + "grad_norm": 303507.28125, + "learning_rate": 6.204e-06, + "loss": 13338.9734, + "step": 31020 + }, + { + "epoch": 0.06268256321787999, + "grad_norm": 11856.7939453125, + "learning_rate": 6.206000000000001e-06, + "loss": 4985.8238, + "step": 31030 + }, + { + "epoch": 0.0627027638505638, + "grad_norm": 86727.9609375, + "learning_rate": 6.2080000000000005e-06, + "loss": 18789.3187, + "step": 31040 + }, + { + "epoch": 0.06272296448324761, + "grad_norm": 52583.16015625, + "learning_rate": 6.210000000000001e-06, + "loss": 5945.5145, + "step": 31050 + }, + { + "epoch": 0.06274316511593143, + "grad_norm": 121433.2421875, + "learning_rate": 6.212e-06, + "loss": 16227.3453, + "step": 31060 + }, + { + "epoch": 0.06276336574861524, + "grad_norm": 4320.4765625, + "learning_rate": 6.214e-06, + "loss": 10174.3203, + "step": 31070 + }, + { + "epoch": 0.06278356638129906, + "grad_norm": 37952.66796875, + "learning_rate": 6.216000000000001e-06, + "loss": 4816.7793, + "step": 31080 + }, + { + "epoch": 0.06280376701398288, + "grad_norm": 4981.7265625, + "learning_rate": 6.2180000000000004e-06, + "loss": 12584.2156, + "step": 31090 + }, + { + "epoch": 0.06282396764666669, + "grad_norm": 233164.703125, + "learning_rate": 6.220000000000001e-06, + "loss": 16539.0031, + "step": 31100 + }, + { + "epoch": 0.0628441682793505, + "grad_norm": 8303.9267578125, + "learning_rate": 6.222e-06, + "loss": 10982.668, + "step": 31110 + }, + { + "epoch": 0.06286436891203433, + "grad_norm": 19116.681640625, + "learning_rate": 6.224e-06, + "loss": 24470.6016, + "step": 31120 + }, + { + "epoch": 0.06288456954471815, + "grad_norm": 138839.234375, + "learning_rate": 6.226000000000001e-06, + "loss": 15084.9672, + "step": 31130 + }, + { + "epoch": 0.06290477017740195, + "grad_norm": 8339.2763671875, + "learning_rate": 6.228e-06, + "loss": 7717.4961, + "step": 31140 + }, + { + "epoch": 0.06292497081008577, + "grad_norm": 6373.48583984375, + "learning_rate": 6.2300000000000005e-06, + "loss": 8589.7742, + "step": 31150 + }, + { + "epoch": 0.06294517144276959, + "grad_norm": 19392.630859375, + "learning_rate": 6.232000000000001e-06, + "loss": 12851.5328, + "step": 31160 + }, + { + "epoch": 0.0629653720754534, + "grad_norm": 60214.91796875, + "learning_rate": 6.234e-06, + "loss": 25804.2531, + "step": 31170 + }, + { + "epoch": 0.06298557270813722, + "grad_norm": 3221.41015625, + "learning_rate": 6.236000000000001e-06, + "loss": 11633.3961, + "step": 31180 + }, + { + "epoch": 0.06300577334082104, + "grad_norm": 124784.8125, + "learning_rate": 6.238000000000001e-06, + "loss": 15364.2422, + "step": 31190 + }, + { + "epoch": 0.06302597397350485, + "grad_norm": 97663.515625, + "learning_rate": 6.24e-06, + "loss": 16914.7828, + "step": 31200 + }, + { + "epoch": 0.06304617460618867, + "grad_norm": 53991.58203125, + "learning_rate": 6.2420000000000005e-06, + "loss": 4768.643, + "step": 31210 + }, + { + "epoch": 0.06306637523887249, + "grad_norm": 54807.40625, + "learning_rate": 6.244e-06, + "loss": 11254.9023, + "step": 31220 + }, + { + "epoch": 0.06308657587155629, + "grad_norm": 5833.79248046875, + "learning_rate": 6.246000000000001e-06, + "loss": 9674.7344, + "step": 31230 + }, + { + "epoch": 0.06310677650424011, + "grad_norm": 20622.953125, + "learning_rate": 6.248000000000001e-06, + "loss": 16224.1766, + "step": 31240 + }, + { + "epoch": 0.06312697713692393, + "grad_norm": 1713.0831298828125, + "learning_rate": 6.25e-06, + "loss": 11599.5, + "step": 31250 + }, + { + "epoch": 0.06314717776960774, + "grad_norm": 6038.45703125, + "learning_rate": 6.2520000000000004e-06, + "loss": 7339.1797, + "step": 31260 + }, + { + "epoch": 0.06316737840229156, + "grad_norm": 336538.75, + "learning_rate": 6.254e-06, + "loss": 25291.9781, + "step": 31270 + }, + { + "epoch": 0.06318757903497538, + "grad_norm": 70027.1171875, + "learning_rate": 6.256000000000001e-06, + "loss": 8755.2109, + "step": 31280 + }, + { + "epoch": 0.0632077796676592, + "grad_norm": 35015.52734375, + "learning_rate": 6.258000000000001e-06, + "loss": 11962.9133, + "step": 31290 + }, + { + "epoch": 0.063227980300343, + "grad_norm": 15812.0654296875, + "learning_rate": 6.26e-06, + "loss": 5523.5383, + "step": 31300 + }, + { + "epoch": 0.06324818093302682, + "grad_norm": 45335.60546875, + "learning_rate": 6.262e-06, + "loss": 12615.1453, + "step": 31310 + }, + { + "epoch": 0.06326838156571064, + "grad_norm": 16085.564453125, + "learning_rate": 6.264e-06, + "loss": 4714.5875, + "step": 31320 + }, + { + "epoch": 0.06328858219839445, + "grad_norm": 784.8346557617188, + "learning_rate": 6.266000000000001e-06, + "loss": 7385.2727, + "step": 31330 + }, + { + "epoch": 0.06330878283107827, + "grad_norm": 9435.6943359375, + "learning_rate": 6.268000000000001e-06, + "loss": 3260.3711, + "step": 31340 + }, + { + "epoch": 0.06332898346376209, + "grad_norm": 30412.158203125, + "learning_rate": 6.27e-06, + "loss": 12455.0836, + "step": 31350 + }, + { + "epoch": 0.0633491840964459, + "grad_norm": 17379.46484375, + "learning_rate": 6.272e-06, + "loss": 3097.7336, + "step": 31360 + }, + { + "epoch": 0.06336938472912972, + "grad_norm": 17176.724609375, + "learning_rate": 6.274e-06, + "loss": 19615.0594, + "step": 31370 + }, + { + "epoch": 0.06338958536181354, + "grad_norm": 5805.2294921875, + "learning_rate": 6.2760000000000006e-06, + "loss": 17549.8953, + "step": 31380 + }, + { + "epoch": 0.06340978599449734, + "grad_norm": 2648.29345703125, + "learning_rate": 6.278000000000001e-06, + "loss": 11737.7055, + "step": 31390 + }, + { + "epoch": 0.06342998662718116, + "grad_norm": 44818.015625, + "learning_rate": 6.280000000000001e-06, + "loss": 9663.2422, + "step": 31400 + }, + { + "epoch": 0.06345018725986498, + "grad_norm": 35856.625, + "learning_rate": 6.282e-06, + "loss": 7014.5477, + "step": 31410 + }, + { + "epoch": 0.06347038789254879, + "grad_norm": 6587.80615234375, + "learning_rate": 6.284e-06, + "loss": 6779.2125, + "step": 31420 + }, + { + "epoch": 0.06349058852523261, + "grad_norm": 4058.215087890625, + "learning_rate": 6.286000000000001e-06, + "loss": 6787.0063, + "step": 31430 + }, + { + "epoch": 0.06351078915791643, + "grad_norm": 59498.95703125, + "learning_rate": 6.288000000000001e-06, + "loss": 14973.2516, + "step": 31440 + }, + { + "epoch": 0.06353098979060025, + "grad_norm": 18611.7890625, + "learning_rate": 6.290000000000001e-06, + "loss": 5004.268, + "step": 31450 + }, + { + "epoch": 0.06355119042328405, + "grad_norm": 157596.078125, + "learning_rate": 6.292e-06, + "loss": 8811.6437, + "step": 31460 + }, + { + "epoch": 0.06357139105596787, + "grad_norm": 156519.625, + "learning_rate": 6.294e-06, + "loss": 17272.6344, + "step": 31470 + }, + { + "epoch": 0.0635915916886517, + "grad_norm": 4416.13232421875, + "learning_rate": 6.296000000000001e-06, + "loss": 6756.8844, + "step": 31480 + }, + { + "epoch": 0.0636117923213355, + "grad_norm": 21469.7734375, + "learning_rate": 6.2980000000000005e-06, + "loss": 8738.9336, + "step": 31490 + }, + { + "epoch": 0.06363199295401932, + "grad_norm": 121759.4921875, + "learning_rate": 6.300000000000001e-06, + "loss": 10945.8047, + "step": 31500 + }, + { + "epoch": 0.06365219358670314, + "grad_norm": 27995.994140625, + "learning_rate": 6.302e-06, + "loss": 5060.8805, + "step": 31510 + }, + { + "epoch": 0.06367239421938695, + "grad_norm": 23105.029296875, + "learning_rate": 6.304e-06, + "loss": 10176.3844, + "step": 31520 + }, + { + "epoch": 0.06369259485207077, + "grad_norm": 2735.46630859375, + "learning_rate": 6.306000000000001e-06, + "loss": 13156.1656, + "step": 31530 + }, + { + "epoch": 0.06371279548475459, + "grad_norm": 8767.724609375, + "learning_rate": 6.308e-06, + "loss": 12515.0531, + "step": 31540 + }, + { + "epoch": 0.06373299611743839, + "grad_norm": 9379.65234375, + "learning_rate": 6.3100000000000006e-06, + "loss": 12660.8719, + "step": 31550 + }, + { + "epoch": 0.06375319675012221, + "grad_norm": 77216.046875, + "learning_rate": 6.312000000000001e-06, + "loss": 14430.4156, + "step": 31560 + }, + { + "epoch": 0.06377339738280603, + "grad_norm": 36163.62890625, + "learning_rate": 6.314e-06, + "loss": 13166.4531, + "step": 31570 + }, + { + "epoch": 0.06379359801548984, + "grad_norm": 176205.34375, + "learning_rate": 6.316000000000001e-06, + "loss": 20236.3312, + "step": 31580 + }, + { + "epoch": 0.06381379864817366, + "grad_norm": 2158.51025390625, + "learning_rate": 6.318000000000001e-06, + "loss": 19986.3781, + "step": 31590 + }, + { + "epoch": 0.06383399928085748, + "grad_norm": 1419.3125, + "learning_rate": 6.3200000000000005e-06, + "loss": 10159.3258, + "step": 31600 + }, + { + "epoch": 0.0638541999135413, + "grad_norm": 10743.0966796875, + "learning_rate": 6.322000000000001e-06, + "loss": 4766.5656, + "step": 31610 + }, + { + "epoch": 0.0638744005462251, + "grad_norm": 12919.541015625, + "learning_rate": 6.324e-06, + "loss": 5196.4965, + "step": 31620 + }, + { + "epoch": 0.06389460117890892, + "grad_norm": 28968.599609375, + "learning_rate": 6.326000000000001e-06, + "loss": 7206.6375, + "step": 31630 + }, + { + "epoch": 0.06391480181159274, + "grad_norm": 6324.64111328125, + "learning_rate": 6.328000000000001e-06, + "loss": 10299.8016, + "step": 31640 + }, + { + "epoch": 0.06393500244427655, + "grad_norm": 8841.494140625, + "learning_rate": 6.33e-06, + "loss": 4598.2852, + "step": 31650 + }, + { + "epoch": 0.06395520307696037, + "grad_norm": 75632.21875, + "learning_rate": 6.3320000000000005e-06, + "loss": 12170.7227, + "step": 31660 + }, + { + "epoch": 0.06397540370964419, + "grad_norm": 8774.6748046875, + "learning_rate": 6.334e-06, + "loss": 2238.7951, + "step": 31670 + }, + { + "epoch": 0.063995604342328, + "grad_norm": 44895.23828125, + "learning_rate": 6.336000000000001e-06, + "loss": 9188.1938, + "step": 31680 + }, + { + "epoch": 0.06401580497501182, + "grad_norm": 29387.521484375, + "learning_rate": 6.338000000000001e-06, + "loss": 7233.1391, + "step": 31690 + }, + { + "epoch": 0.06403600560769564, + "grad_norm": 17075.74609375, + "learning_rate": 6.34e-06, + "loss": 10059.6328, + "step": 31700 + }, + { + "epoch": 0.06405620624037944, + "grad_norm": 27622.34375, + "learning_rate": 6.3420000000000004e-06, + "loss": 7852.5617, + "step": 31710 + }, + { + "epoch": 0.06407640687306326, + "grad_norm": 1692.5035400390625, + "learning_rate": 6.344e-06, + "loss": 5442.5684, + "step": 31720 + }, + { + "epoch": 0.06409660750574708, + "grad_norm": 1173.8148193359375, + "learning_rate": 6.346000000000001e-06, + "loss": 5719.5566, + "step": 31730 + }, + { + "epoch": 0.06411680813843089, + "grad_norm": 40284.1796875, + "learning_rate": 6.348000000000001e-06, + "loss": 10842.8414, + "step": 31740 + }, + { + "epoch": 0.06413700877111471, + "grad_norm": 9459.3349609375, + "learning_rate": 6.35e-06, + "loss": 6480.9977, + "step": 31750 + }, + { + "epoch": 0.06415720940379853, + "grad_norm": 47094.98046875, + "learning_rate": 6.352e-06, + "loss": 6584.5672, + "step": 31760 + }, + { + "epoch": 0.06417741003648235, + "grad_norm": 16266.5068359375, + "learning_rate": 6.3540000000000005e-06, + "loss": 17672.1125, + "step": 31770 + }, + { + "epoch": 0.06419761066916616, + "grad_norm": 79499.265625, + "learning_rate": 6.356000000000001e-06, + "loss": 19313.7016, + "step": 31780 + }, + { + "epoch": 0.06421781130184998, + "grad_norm": 608.4041748046875, + "learning_rate": 6.358000000000001e-06, + "loss": 4240.9223, + "step": 31790 + }, + { + "epoch": 0.0642380119345338, + "grad_norm": 6739.38525390625, + "learning_rate": 6.360000000000001e-06, + "loss": 10343.1227, + "step": 31800 + }, + { + "epoch": 0.0642582125672176, + "grad_norm": 1370.126708984375, + "learning_rate": 6.362e-06, + "loss": 6341.4203, + "step": 31810 + }, + { + "epoch": 0.06427841319990142, + "grad_norm": 28049.537109375, + "learning_rate": 6.364e-06, + "loss": 7083.3039, + "step": 31820 + }, + { + "epoch": 0.06429861383258524, + "grad_norm": 18927.779296875, + "learning_rate": 6.366000000000001e-06, + "loss": 29182.8688, + "step": 31830 + }, + { + "epoch": 0.06431881446526905, + "grad_norm": 8613.9267578125, + "learning_rate": 6.368000000000001e-06, + "loss": 988.7749, + "step": 31840 + }, + { + "epoch": 0.06433901509795287, + "grad_norm": 70818.828125, + "learning_rate": 6.370000000000001e-06, + "loss": 13451.9031, + "step": 31850 + }, + { + "epoch": 0.06435921573063669, + "grad_norm": 118218.03125, + "learning_rate": 6.372e-06, + "loss": 14795.7281, + "step": 31860 + }, + { + "epoch": 0.0643794163633205, + "grad_norm": 30373.837890625, + "learning_rate": 6.374e-06, + "loss": 16117.5187, + "step": 31870 + }, + { + "epoch": 0.06439961699600431, + "grad_norm": 8036.86328125, + "learning_rate": 6.376e-06, + "loss": 3947.4004, + "step": 31880 + }, + { + "epoch": 0.06441981762868813, + "grad_norm": 18390.427734375, + "learning_rate": 6.378000000000001e-06, + "loss": 3292.218, + "step": 31890 + }, + { + "epoch": 0.06444001826137194, + "grad_norm": 438.5331115722656, + "learning_rate": 6.380000000000001e-06, + "loss": 10299.8898, + "step": 31900 + }, + { + "epoch": 0.06446021889405576, + "grad_norm": 4461.97900390625, + "learning_rate": 6.382e-06, + "loss": 8668.0414, + "step": 31910 + }, + { + "epoch": 0.06448041952673958, + "grad_norm": 29702.533203125, + "learning_rate": 6.384e-06, + "loss": 11557.8148, + "step": 31920 + }, + { + "epoch": 0.0645006201594234, + "grad_norm": 22482.19921875, + "learning_rate": 6.386e-06, + "loss": 4679.1523, + "step": 31930 + }, + { + "epoch": 0.0645208207921072, + "grad_norm": 19622.162109375, + "learning_rate": 6.3880000000000005e-06, + "loss": 15168.4625, + "step": 31940 + }, + { + "epoch": 0.06454102142479103, + "grad_norm": 12128.5341796875, + "learning_rate": 6.390000000000001e-06, + "loss": 3985.007, + "step": 31950 + }, + { + "epoch": 0.06456122205747485, + "grad_norm": 2367.93115234375, + "learning_rate": 6.392000000000001e-06, + "loss": 21747.8375, + "step": 31960 + }, + { + "epoch": 0.06458142269015865, + "grad_norm": 257313.46875, + "learning_rate": 6.394e-06, + "loss": 12591.0453, + "step": 31970 + }, + { + "epoch": 0.06460162332284247, + "grad_norm": 136843.796875, + "learning_rate": 6.396e-06, + "loss": 21581.2391, + "step": 31980 + }, + { + "epoch": 0.06462182395552629, + "grad_norm": 8860.033203125, + "learning_rate": 6.398000000000001e-06, + "loss": 30022.0563, + "step": 31990 + }, + { + "epoch": 0.0646420245882101, + "grad_norm": 152674.15625, + "learning_rate": 6.4000000000000006e-06, + "loss": 8874.3687, + "step": 32000 + }, + { + "epoch": 0.06466222522089392, + "grad_norm": 16018.23828125, + "learning_rate": 6.402000000000001e-06, + "loss": 13726.9625, + "step": 32010 + }, + { + "epoch": 0.06468242585357774, + "grad_norm": 386090.375, + "learning_rate": 6.404e-06, + "loss": 28033.1813, + "step": 32020 + }, + { + "epoch": 0.06470262648626154, + "grad_norm": 182149.84375, + "learning_rate": 6.406e-06, + "loss": 8148.6398, + "step": 32030 + }, + { + "epoch": 0.06472282711894536, + "grad_norm": 2846.380859375, + "learning_rate": 6.408000000000001e-06, + "loss": 13029.1688, + "step": 32040 + }, + { + "epoch": 0.06474302775162918, + "grad_norm": 1968.7919921875, + "learning_rate": 6.4100000000000005e-06, + "loss": 15293.2141, + "step": 32050 + }, + { + "epoch": 0.06476322838431299, + "grad_norm": 160369.734375, + "learning_rate": 6.412000000000001e-06, + "loss": 12406.6805, + "step": 32060 + }, + { + "epoch": 0.06478342901699681, + "grad_norm": 14319.5107421875, + "learning_rate": 6.414e-06, + "loss": 9193.057, + "step": 32070 + }, + { + "epoch": 0.06480362964968063, + "grad_norm": 7262.9033203125, + "learning_rate": 6.416e-06, + "loss": 9424.4672, + "step": 32080 + }, + { + "epoch": 0.06482383028236445, + "grad_norm": 22232.287109375, + "learning_rate": 6.418000000000001e-06, + "loss": 5489.7539, + "step": 32090 + }, + { + "epoch": 0.06484403091504826, + "grad_norm": 72566.65625, + "learning_rate": 6.42e-06, + "loss": 13074.6172, + "step": 32100 + }, + { + "epoch": 0.06486423154773208, + "grad_norm": 21826.146484375, + "learning_rate": 6.4220000000000005e-06, + "loss": 7172.3109, + "step": 32110 + }, + { + "epoch": 0.0648844321804159, + "grad_norm": 7878.43701171875, + "learning_rate": 6.424e-06, + "loss": 5947.1832, + "step": 32120 + }, + { + "epoch": 0.0649046328130997, + "grad_norm": 1671.3072509765625, + "learning_rate": 6.426e-06, + "loss": 14010.3641, + "step": 32130 + }, + { + "epoch": 0.06492483344578352, + "grad_norm": 14462.3701171875, + "learning_rate": 6.428000000000001e-06, + "loss": 8656.1461, + "step": 32140 + }, + { + "epoch": 0.06494503407846734, + "grad_norm": 26880.03515625, + "learning_rate": 6.43e-06, + "loss": 4054.0648, + "step": 32150 + }, + { + "epoch": 0.06496523471115115, + "grad_norm": 30292.935546875, + "learning_rate": 6.432e-06, + "loss": 11350.8188, + "step": 32160 + }, + { + "epoch": 0.06498543534383497, + "grad_norm": 7027.87158203125, + "learning_rate": 6.4340000000000006e-06, + "loss": 7963.6836, + "step": 32170 + }, + { + "epoch": 0.06500563597651879, + "grad_norm": 74240.671875, + "learning_rate": 6.436e-06, + "loss": 6473.6316, + "step": 32180 + }, + { + "epoch": 0.0650258366092026, + "grad_norm": 56775.05078125, + "learning_rate": 6.438000000000001e-06, + "loss": 10039.9672, + "step": 32190 + }, + { + "epoch": 0.06504603724188641, + "grad_norm": 110827.484375, + "learning_rate": 6.440000000000001e-06, + "loss": 12639.9344, + "step": 32200 + }, + { + "epoch": 0.06506623787457023, + "grad_norm": 1484.4520263671875, + "learning_rate": 6.442e-06, + "loss": 8786.9141, + "step": 32210 + }, + { + "epoch": 0.06508643850725404, + "grad_norm": 15083.806640625, + "learning_rate": 6.4440000000000005e-06, + "loss": 5573.1406, + "step": 32220 + }, + { + "epoch": 0.06510663913993786, + "grad_norm": 3532.524658203125, + "learning_rate": 6.446e-06, + "loss": 6704.9602, + "step": 32230 + }, + { + "epoch": 0.06512683977262168, + "grad_norm": 239188.328125, + "learning_rate": 6.448000000000001e-06, + "loss": 9303.1875, + "step": 32240 + }, + { + "epoch": 0.0651470404053055, + "grad_norm": 196607.421875, + "learning_rate": 6.450000000000001e-06, + "loss": 9681.0266, + "step": 32250 + }, + { + "epoch": 0.0651672410379893, + "grad_norm": 204261.90625, + "learning_rate": 6.452e-06, + "loss": 9351.3031, + "step": 32260 + }, + { + "epoch": 0.06518744167067313, + "grad_norm": 2577.6240234375, + "learning_rate": 6.454e-06, + "loss": 6953.0422, + "step": 32270 + }, + { + "epoch": 0.06520764230335695, + "grad_norm": 10992.94140625, + "learning_rate": 6.456e-06, + "loss": 18133.9437, + "step": 32280 + }, + { + "epoch": 0.06522784293604075, + "grad_norm": 27995.369140625, + "learning_rate": 6.458000000000001e-06, + "loss": 15608.6938, + "step": 32290 + }, + { + "epoch": 0.06524804356872457, + "grad_norm": 21988.505859375, + "learning_rate": 6.460000000000001e-06, + "loss": 7343.1687, + "step": 32300 + }, + { + "epoch": 0.06526824420140839, + "grad_norm": 1169.558349609375, + "learning_rate": 6.462e-06, + "loss": 6728.4039, + "step": 32310 + }, + { + "epoch": 0.0652884448340922, + "grad_norm": 99438.1953125, + "learning_rate": 6.464e-06, + "loss": 7599.5727, + "step": 32320 + }, + { + "epoch": 0.06530864546677602, + "grad_norm": 3210.206787109375, + "learning_rate": 6.4660000000000004e-06, + "loss": 15146.0328, + "step": 32330 + }, + { + "epoch": 0.06532884609945984, + "grad_norm": 1639.3297119140625, + "learning_rate": 6.468000000000001e-06, + "loss": 7560.2211, + "step": 32340 + }, + { + "epoch": 0.06534904673214365, + "grad_norm": 7411.56396484375, + "learning_rate": 6.470000000000001e-06, + "loss": 9312.1266, + "step": 32350 + }, + { + "epoch": 0.06536924736482747, + "grad_norm": 1690.8291015625, + "learning_rate": 6.472000000000001e-06, + "loss": 5521.4484, + "step": 32360 + }, + { + "epoch": 0.06538944799751129, + "grad_norm": 15713.1669921875, + "learning_rate": 6.474e-06, + "loss": 7894.3148, + "step": 32370 + }, + { + "epoch": 0.06540964863019509, + "grad_norm": 39079.58984375, + "learning_rate": 6.476e-06, + "loss": 9402.5984, + "step": 32380 + }, + { + "epoch": 0.06542984926287891, + "grad_norm": 7515.82373046875, + "learning_rate": 6.478000000000001e-06, + "loss": 16920.2938, + "step": 32390 + }, + { + "epoch": 0.06545004989556273, + "grad_norm": 3091.986328125, + "learning_rate": 6.480000000000001e-06, + "loss": 6101.6211, + "step": 32400 + }, + { + "epoch": 0.06547025052824655, + "grad_norm": 120267.7265625, + "learning_rate": 6.482000000000001e-06, + "loss": 19317.7562, + "step": 32410 + }, + { + "epoch": 0.06549045116093036, + "grad_norm": 6731.3759765625, + "learning_rate": 6.484e-06, + "loss": 12104.1086, + "step": 32420 + }, + { + "epoch": 0.06551065179361418, + "grad_norm": 504.1181335449219, + "learning_rate": 6.486e-06, + "loss": 7681.1187, + "step": 32430 + }, + { + "epoch": 0.065530852426298, + "grad_norm": 10799.33984375, + "learning_rate": 6.488000000000001e-06, + "loss": 8803.8563, + "step": 32440 + }, + { + "epoch": 0.0655510530589818, + "grad_norm": 78193.484375, + "learning_rate": 6.4900000000000005e-06, + "loss": 6602.975, + "step": 32450 + }, + { + "epoch": 0.06557125369166562, + "grad_norm": 25701.482421875, + "learning_rate": 6.492000000000001e-06, + "loss": 13779.9156, + "step": 32460 + }, + { + "epoch": 0.06559145432434944, + "grad_norm": 97893.0078125, + "learning_rate": 6.494e-06, + "loss": 9753.8969, + "step": 32470 + }, + { + "epoch": 0.06561165495703325, + "grad_norm": 77116.078125, + "learning_rate": 6.496e-06, + "loss": 11976.5891, + "step": 32480 + }, + { + "epoch": 0.06563185558971707, + "grad_norm": 3862.97705078125, + "learning_rate": 6.498000000000001e-06, + "loss": 9409.9438, + "step": 32490 + }, + { + "epoch": 0.06565205622240089, + "grad_norm": 2427.36083984375, + "learning_rate": 6.5000000000000004e-06, + "loss": 1517.0089, + "step": 32500 + }, + { + "epoch": 0.0656722568550847, + "grad_norm": 8649.830078125, + "learning_rate": 6.502000000000001e-06, + "loss": 15906.1063, + "step": 32510 + }, + { + "epoch": 0.06569245748776852, + "grad_norm": 11967.072265625, + "learning_rate": 6.504e-06, + "loss": 6766.0906, + "step": 32520 + }, + { + "epoch": 0.06571265812045234, + "grad_norm": 6701.849609375, + "learning_rate": 6.506e-06, + "loss": 14757.1906, + "step": 32530 + }, + { + "epoch": 0.06573285875313614, + "grad_norm": 1359.329833984375, + "learning_rate": 6.508000000000001e-06, + "loss": 8486.7898, + "step": 32540 + }, + { + "epoch": 0.06575305938581996, + "grad_norm": 35291.890625, + "learning_rate": 6.51e-06, + "loss": 9026.0234, + "step": 32550 + }, + { + "epoch": 0.06577326001850378, + "grad_norm": 326123.34375, + "learning_rate": 6.5120000000000005e-06, + "loss": 18033.875, + "step": 32560 + }, + { + "epoch": 0.0657934606511876, + "grad_norm": 40768.94921875, + "learning_rate": 6.514000000000001e-06, + "loss": 4360.9109, + "step": 32570 + }, + { + "epoch": 0.06581366128387141, + "grad_norm": 906.4695434570312, + "learning_rate": 6.516e-06, + "loss": 10918.0164, + "step": 32580 + }, + { + "epoch": 0.06583386191655523, + "grad_norm": 137918.84375, + "learning_rate": 6.518000000000001e-06, + "loss": 10824.8773, + "step": 32590 + }, + { + "epoch": 0.06585406254923905, + "grad_norm": 108919.3671875, + "learning_rate": 6.520000000000001e-06, + "loss": 12968.5797, + "step": 32600 + }, + { + "epoch": 0.06587426318192285, + "grad_norm": 18884.34765625, + "learning_rate": 6.522e-06, + "loss": 11462.7344, + "step": 32610 + }, + { + "epoch": 0.06589446381460667, + "grad_norm": 153273.234375, + "learning_rate": 6.5240000000000006e-06, + "loss": 5950.3375, + "step": 32620 + }, + { + "epoch": 0.0659146644472905, + "grad_norm": 239777.28125, + "learning_rate": 6.526e-06, + "loss": 9705.4062, + "step": 32630 + }, + { + "epoch": 0.0659348650799743, + "grad_norm": 25183.88671875, + "learning_rate": 6.528000000000001e-06, + "loss": 8466.8188, + "step": 32640 + }, + { + "epoch": 0.06595506571265812, + "grad_norm": 16494.017578125, + "learning_rate": 6.530000000000001e-06, + "loss": 11807.0344, + "step": 32650 + }, + { + "epoch": 0.06597526634534194, + "grad_norm": 6947.16943359375, + "learning_rate": 6.532e-06, + "loss": 14308.9609, + "step": 32660 + }, + { + "epoch": 0.06599546697802575, + "grad_norm": 88161.546875, + "learning_rate": 6.5340000000000005e-06, + "loss": 6810.1922, + "step": 32670 + }, + { + "epoch": 0.06601566761070957, + "grad_norm": 117191.390625, + "learning_rate": 6.536e-06, + "loss": 13956.1172, + "step": 32680 + }, + { + "epoch": 0.06603586824339339, + "grad_norm": 6161.31884765625, + "learning_rate": 6.538000000000001e-06, + "loss": 8390.3531, + "step": 32690 + }, + { + "epoch": 0.06605606887607719, + "grad_norm": 121585.7578125, + "learning_rate": 6.540000000000001e-06, + "loss": 18456.3797, + "step": 32700 + }, + { + "epoch": 0.06607626950876101, + "grad_norm": 1232.4046630859375, + "learning_rate": 6.542e-06, + "loss": 6567.3031, + "step": 32710 + }, + { + "epoch": 0.06609647014144483, + "grad_norm": 6641.71142578125, + "learning_rate": 6.544e-06, + "loss": 5581.0809, + "step": 32720 + }, + { + "epoch": 0.06611667077412865, + "grad_norm": 505.0578918457031, + "learning_rate": 6.5460000000000005e-06, + "loss": 10058.6008, + "step": 32730 + }, + { + "epoch": 0.06613687140681246, + "grad_norm": 4759.51611328125, + "learning_rate": 6.548000000000001e-06, + "loss": 4117.4324, + "step": 32740 + }, + { + "epoch": 0.06615707203949628, + "grad_norm": 22736.841796875, + "learning_rate": 6.550000000000001e-06, + "loss": 12366.9422, + "step": 32750 + }, + { + "epoch": 0.0661772726721801, + "grad_norm": 12533.6708984375, + "learning_rate": 6.552000000000001e-06, + "loss": 9263.5883, + "step": 32760 + }, + { + "epoch": 0.0661974733048639, + "grad_norm": 4126.7958984375, + "learning_rate": 6.554e-06, + "loss": 8353.3359, + "step": 32770 + }, + { + "epoch": 0.06621767393754772, + "grad_norm": 1147.3929443359375, + "learning_rate": 6.556e-06, + "loss": 9735.4352, + "step": 32780 + }, + { + "epoch": 0.06623787457023154, + "grad_norm": 50965.08203125, + "learning_rate": 6.558000000000001e-06, + "loss": 6455.7063, + "step": 32790 + }, + { + "epoch": 0.06625807520291535, + "grad_norm": 173179.125, + "learning_rate": 6.560000000000001e-06, + "loss": 19241.5031, + "step": 32800 + }, + { + "epoch": 0.06627827583559917, + "grad_norm": 26855.90625, + "learning_rate": 6.562000000000001e-06, + "loss": 7119.1906, + "step": 32810 + }, + { + "epoch": 0.06629847646828299, + "grad_norm": 379288.25, + "learning_rate": 6.564e-06, + "loss": 24412.8812, + "step": 32820 + }, + { + "epoch": 0.0663186771009668, + "grad_norm": 12744.677734375, + "learning_rate": 6.566e-06, + "loss": 14337.6266, + "step": 32830 + }, + { + "epoch": 0.06633887773365062, + "grad_norm": 1250.5755615234375, + "learning_rate": 6.568000000000001e-06, + "loss": 4397.9496, + "step": 32840 + }, + { + "epoch": 0.06635907836633444, + "grad_norm": 2398.857177734375, + "learning_rate": 6.570000000000001e-06, + "loss": 4769.9594, + "step": 32850 + }, + { + "epoch": 0.06637927899901824, + "grad_norm": 135421.046875, + "learning_rate": 6.572000000000001e-06, + "loss": 5734.3836, + "step": 32860 + }, + { + "epoch": 0.06639947963170206, + "grad_norm": 9183.9111328125, + "learning_rate": 6.574e-06, + "loss": 5246.9785, + "step": 32870 + }, + { + "epoch": 0.06641968026438588, + "grad_norm": 5483.3486328125, + "learning_rate": 6.576e-06, + "loss": 5502.3984, + "step": 32880 + }, + { + "epoch": 0.0664398808970697, + "grad_norm": 4198.4833984375, + "learning_rate": 6.578000000000001e-06, + "loss": 5968.4605, + "step": 32890 + }, + { + "epoch": 0.06646008152975351, + "grad_norm": 47984.42578125, + "learning_rate": 6.5800000000000005e-06, + "loss": 10205.6812, + "step": 32900 + }, + { + "epoch": 0.06648028216243733, + "grad_norm": 23033.9765625, + "learning_rate": 6.582000000000001e-06, + "loss": 6200.2902, + "step": 32910 + }, + { + "epoch": 0.06650048279512115, + "grad_norm": 797.0961303710938, + "learning_rate": 6.584e-06, + "loss": 9158.0953, + "step": 32920 + }, + { + "epoch": 0.06652068342780496, + "grad_norm": 24408.470703125, + "learning_rate": 6.586e-06, + "loss": 5124.1516, + "step": 32930 + }, + { + "epoch": 0.06654088406048878, + "grad_norm": 57869.6015625, + "learning_rate": 6.588000000000001e-06, + "loss": 8113.9492, + "step": 32940 + }, + { + "epoch": 0.0665610846931726, + "grad_norm": 35657.00390625, + "learning_rate": 6.5900000000000004e-06, + "loss": 8708.3805, + "step": 32950 + }, + { + "epoch": 0.0665812853258564, + "grad_norm": 11794.8974609375, + "learning_rate": 6.592000000000001e-06, + "loss": 11268.6734, + "step": 32960 + }, + { + "epoch": 0.06660148595854022, + "grad_norm": 5658.1806640625, + "learning_rate": 6.594000000000001e-06, + "loss": 6996.9422, + "step": 32970 + }, + { + "epoch": 0.06662168659122404, + "grad_norm": 19477.953125, + "learning_rate": 6.596e-06, + "loss": 9324.85, + "step": 32980 + }, + { + "epoch": 0.06664188722390785, + "grad_norm": 7201.9384765625, + "learning_rate": 6.598000000000001e-06, + "loss": 4856.5938, + "step": 32990 + }, + { + "epoch": 0.06666208785659167, + "grad_norm": 45819.94921875, + "learning_rate": 6.600000000000001e-06, + "loss": 14477.2797, + "step": 33000 + }, + { + "epoch": 0.06668228848927549, + "grad_norm": 1877.9039306640625, + "learning_rate": 6.6020000000000005e-06, + "loss": 11746.3375, + "step": 33010 + }, + { + "epoch": 0.0667024891219593, + "grad_norm": 3893.42724609375, + "learning_rate": 6.604000000000001e-06, + "loss": 13252.7437, + "step": 33020 + }, + { + "epoch": 0.06672268975464311, + "grad_norm": 9742.0400390625, + "learning_rate": 6.606e-06, + "loss": 3840.5801, + "step": 33030 + }, + { + "epoch": 0.06674289038732693, + "grad_norm": 36775.1171875, + "learning_rate": 6.608000000000001e-06, + "loss": 11118.7242, + "step": 33040 + }, + { + "epoch": 0.06676309102001075, + "grad_norm": 94961.4453125, + "learning_rate": 6.610000000000001e-06, + "loss": 15119.0812, + "step": 33050 + }, + { + "epoch": 0.06678329165269456, + "grad_norm": 9264.0341796875, + "learning_rate": 6.612e-06, + "loss": 3228.3025, + "step": 33060 + }, + { + "epoch": 0.06680349228537838, + "grad_norm": 6759.51513671875, + "learning_rate": 6.6140000000000005e-06, + "loss": 15682.0469, + "step": 33070 + }, + { + "epoch": 0.0668236929180622, + "grad_norm": 106510.328125, + "learning_rate": 6.616e-06, + "loss": 5797.125, + "step": 33080 + }, + { + "epoch": 0.066843893550746, + "grad_norm": 58474.18359375, + "learning_rate": 6.618000000000001e-06, + "loss": 10283.6508, + "step": 33090 + }, + { + "epoch": 0.06686409418342983, + "grad_norm": 29376.408203125, + "learning_rate": 6.620000000000001e-06, + "loss": 4017.3152, + "step": 33100 + }, + { + "epoch": 0.06688429481611365, + "grad_norm": 4496.21728515625, + "learning_rate": 6.622e-06, + "loss": 6177.5723, + "step": 33110 + }, + { + "epoch": 0.06690449544879745, + "grad_norm": 63428.3359375, + "learning_rate": 6.6240000000000004e-06, + "loss": 7251.6148, + "step": 33120 + }, + { + "epoch": 0.06692469608148127, + "grad_norm": 1931.0418701171875, + "learning_rate": 6.626000000000001e-06, + "loss": 10238.1422, + "step": 33130 + }, + { + "epoch": 0.06694489671416509, + "grad_norm": 1390.3206787109375, + "learning_rate": 6.628e-06, + "loss": 9346.8656, + "step": 33140 + }, + { + "epoch": 0.0669650973468489, + "grad_norm": 53278.7421875, + "learning_rate": 6.630000000000001e-06, + "loss": 7953.193, + "step": 33150 + }, + { + "epoch": 0.06698529797953272, + "grad_norm": 71959.4453125, + "learning_rate": 6.632000000000001e-06, + "loss": 4296.7336, + "step": 33160 + }, + { + "epoch": 0.06700549861221654, + "grad_norm": 32960.7421875, + "learning_rate": 6.634e-06, + "loss": 8606.6906, + "step": 33170 + }, + { + "epoch": 0.06702569924490034, + "grad_norm": 154922.28125, + "learning_rate": 6.6360000000000005e-06, + "loss": 16370.9156, + "step": 33180 + }, + { + "epoch": 0.06704589987758416, + "grad_norm": 1094.612548828125, + "learning_rate": 6.638e-06, + "loss": 9152.2625, + "step": 33190 + }, + { + "epoch": 0.06706610051026798, + "grad_norm": 25000.080078125, + "learning_rate": 6.640000000000001e-06, + "loss": 13794.8359, + "step": 33200 + }, + { + "epoch": 0.0670863011429518, + "grad_norm": 102753.171875, + "learning_rate": 6.642000000000001e-06, + "loss": 7640.4656, + "step": 33210 + }, + { + "epoch": 0.06710650177563561, + "grad_norm": 1227.8990478515625, + "learning_rate": 6.644e-06, + "loss": 6855.575, + "step": 33220 + }, + { + "epoch": 0.06712670240831943, + "grad_norm": 13532.88671875, + "learning_rate": 6.646e-06, + "loss": 6968.55, + "step": 33230 + }, + { + "epoch": 0.06714690304100325, + "grad_norm": 5289.802734375, + "learning_rate": 6.648e-06, + "loss": 12357.5922, + "step": 33240 + }, + { + "epoch": 0.06716710367368706, + "grad_norm": 32564.923828125, + "learning_rate": 6.650000000000001e-06, + "loss": 9080.9086, + "step": 33250 + }, + { + "epoch": 0.06718730430637088, + "grad_norm": 37375.765625, + "learning_rate": 6.652000000000001e-06, + "loss": 19414.4375, + "step": 33260 + }, + { + "epoch": 0.0672075049390547, + "grad_norm": 31881.869140625, + "learning_rate": 6.654e-06, + "loss": 11708.607, + "step": 33270 + }, + { + "epoch": 0.0672277055717385, + "grad_norm": 3040.7353515625, + "learning_rate": 6.656e-06, + "loss": 9941.8141, + "step": 33280 + }, + { + "epoch": 0.06724790620442232, + "grad_norm": 146235.765625, + "learning_rate": 6.658e-06, + "loss": 13985.9688, + "step": 33290 + }, + { + "epoch": 0.06726810683710614, + "grad_norm": 5085.7138671875, + "learning_rate": 6.660000000000001e-06, + "loss": 6817.8664, + "step": 33300 + }, + { + "epoch": 0.06728830746978995, + "grad_norm": 12783.76171875, + "learning_rate": 6.662000000000001e-06, + "loss": 8002.8742, + "step": 33310 + }, + { + "epoch": 0.06730850810247377, + "grad_norm": 20345.107421875, + "learning_rate": 6.664e-06, + "loss": 15169.6672, + "step": 33320 + }, + { + "epoch": 0.06732870873515759, + "grad_norm": 3471.452880859375, + "learning_rate": 6.666e-06, + "loss": 9777.7406, + "step": 33330 + }, + { + "epoch": 0.0673489093678414, + "grad_norm": 4640.40625, + "learning_rate": 6.668e-06, + "loss": 15062.2625, + "step": 33340 + }, + { + "epoch": 0.06736911000052521, + "grad_norm": 19524.255859375, + "learning_rate": 6.6700000000000005e-06, + "loss": 5657.718, + "step": 33350 + }, + { + "epoch": 0.06738931063320903, + "grad_norm": 12031.6962890625, + "learning_rate": 6.672000000000001e-06, + "loss": 9017.9961, + "step": 33360 + }, + { + "epoch": 0.06740951126589285, + "grad_norm": 84707.8203125, + "learning_rate": 6.674000000000001e-06, + "loss": 7197.1773, + "step": 33370 + }, + { + "epoch": 0.06742971189857666, + "grad_norm": 10177.18359375, + "learning_rate": 6.676e-06, + "loss": 11610.3594, + "step": 33380 + }, + { + "epoch": 0.06744991253126048, + "grad_norm": 2664.844482421875, + "learning_rate": 6.678e-06, + "loss": 12426.4891, + "step": 33390 + }, + { + "epoch": 0.0674701131639443, + "grad_norm": 88575.75, + "learning_rate": 6.680000000000001e-06, + "loss": 11048.9453, + "step": 33400 + }, + { + "epoch": 0.0674903137966281, + "grad_norm": 13617.9326171875, + "learning_rate": 6.6820000000000006e-06, + "loss": 8700.8125, + "step": 33410 + }, + { + "epoch": 0.06751051442931193, + "grad_norm": 2820.001220703125, + "learning_rate": 6.684000000000001e-06, + "loss": 10352.8578, + "step": 33420 + }, + { + "epoch": 0.06753071506199575, + "grad_norm": 1797.0750732421875, + "learning_rate": 6.686e-06, + "loss": 6082.1383, + "step": 33430 + }, + { + "epoch": 0.06755091569467955, + "grad_norm": 16921.953125, + "learning_rate": 6.688e-06, + "loss": 9206.1984, + "step": 33440 + }, + { + "epoch": 0.06757111632736337, + "grad_norm": 5193.91455078125, + "learning_rate": 6.690000000000001e-06, + "loss": 18687.3344, + "step": 33450 + }, + { + "epoch": 0.06759131696004719, + "grad_norm": 80355.40625, + "learning_rate": 6.6920000000000005e-06, + "loss": 5559.9586, + "step": 33460 + }, + { + "epoch": 0.067611517592731, + "grad_norm": 50913.515625, + "learning_rate": 6.694000000000001e-06, + "loss": 7132.1281, + "step": 33470 + }, + { + "epoch": 0.06763171822541482, + "grad_norm": 20504.822265625, + "learning_rate": 6.696e-06, + "loss": 7994.8805, + "step": 33480 + }, + { + "epoch": 0.06765191885809864, + "grad_norm": 91380.765625, + "learning_rate": 6.698e-06, + "loss": 7529.5867, + "step": 33490 + }, + { + "epoch": 0.06767211949078245, + "grad_norm": 7466.62548828125, + "learning_rate": 6.700000000000001e-06, + "loss": 8639.1719, + "step": 33500 + }, + { + "epoch": 0.06769232012346627, + "grad_norm": 211410.140625, + "learning_rate": 6.702e-06, + "loss": 12568.1422, + "step": 33510 + }, + { + "epoch": 0.06771252075615009, + "grad_norm": 51205.1640625, + "learning_rate": 6.7040000000000005e-06, + "loss": 4632.0402, + "step": 33520 + }, + { + "epoch": 0.06773272138883389, + "grad_norm": 150870.5625, + "learning_rate": 6.706000000000001e-06, + "loss": 13719.1031, + "step": 33530 + }, + { + "epoch": 0.06775292202151771, + "grad_norm": 50111.4609375, + "learning_rate": 6.708e-06, + "loss": 9702.0742, + "step": 33540 + }, + { + "epoch": 0.06777312265420153, + "grad_norm": 14318.4521484375, + "learning_rate": 6.710000000000001e-06, + "loss": 8013.907, + "step": 33550 + }, + { + "epoch": 0.06779332328688535, + "grad_norm": 4044.865234375, + "learning_rate": 6.712000000000001e-06, + "loss": 14237.3516, + "step": 33560 + }, + { + "epoch": 0.06781352391956916, + "grad_norm": 41307.87109375, + "learning_rate": 6.7140000000000004e-06, + "loss": 5445.6934, + "step": 33570 + }, + { + "epoch": 0.06783372455225298, + "grad_norm": 19416.091796875, + "learning_rate": 6.716000000000001e-06, + "loss": 7512.0984, + "step": 33580 + }, + { + "epoch": 0.0678539251849368, + "grad_norm": 757.5476684570312, + "learning_rate": 6.718e-06, + "loss": 6897.9406, + "step": 33590 + }, + { + "epoch": 0.0678741258176206, + "grad_norm": 152045.0, + "learning_rate": 6.720000000000001e-06, + "loss": 7582.7672, + "step": 33600 + }, + { + "epoch": 0.06789432645030442, + "grad_norm": 5302.759765625, + "learning_rate": 6.722000000000001e-06, + "loss": 12791.5508, + "step": 33610 + }, + { + "epoch": 0.06791452708298824, + "grad_norm": 208257.984375, + "learning_rate": 6.724e-06, + "loss": 16808.5906, + "step": 33620 + }, + { + "epoch": 0.06793472771567205, + "grad_norm": 274387.96875, + "learning_rate": 6.7260000000000005e-06, + "loss": 14896.2375, + "step": 33630 + }, + { + "epoch": 0.06795492834835587, + "grad_norm": 7507.99560546875, + "learning_rate": 6.728e-06, + "loss": 6496.025, + "step": 33640 + }, + { + "epoch": 0.06797512898103969, + "grad_norm": 27553.4921875, + "learning_rate": 6.730000000000001e-06, + "loss": 5883.0352, + "step": 33650 + }, + { + "epoch": 0.0679953296137235, + "grad_norm": 98096.0546875, + "learning_rate": 6.732000000000001e-06, + "loss": 11513.8797, + "step": 33660 + }, + { + "epoch": 0.06801553024640732, + "grad_norm": 9782.8173828125, + "learning_rate": 6.734e-06, + "loss": 8829.6664, + "step": 33670 + }, + { + "epoch": 0.06803573087909114, + "grad_norm": 28109.466796875, + "learning_rate": 6.736e-06, + "loss": 15538.1156, + "step": 33680 + }, + { + "epoch": 0.06805593151177494, + "grad_norm": 4134.20751953125, + "learning_rate": 6.738e-06, + "loss": 12117.7141, + "step": 33690 + }, + { + "epoch": 0.06807613214445876, + "grad_norm": 167573.828125, + "learning_rate": 6.740000000000001e-06, + "loss": 11072.4813, + "step": 33700 + }, + { + "epoch": 0.06809633277714258, + "grad_norm": 28666.447265625, + "learning_rate": 6.742000000000001e-06, + "loss": 4846.425, + "step": 33710 + }, + { + "epoch": 0.0681165334098264, + "grad_norm": 447659.96875, + "learning_rate": 6.744e-06, + "loss": 17306.1937, + "step": 33720 + }, + { + "epoch": 0.06813673404251021, + "grad_norm": 37216.0859375, + "learning_rate": 6.746e-06, + "loss": 11078.7695, + "step": 33730 + }, + { + "epoch": 0.06815693467519403, + "grad_norm": 8443.283203125, + "learning_rate": 6.7480000000000004e-06, + "loss": 9134.9672, + "step": 33740 + }, + { + "epoch": 0.06817713530787785, + "grad_norm": 14612.890625, + "learning_rate": 6.750000000000001e-06, + "loss": 3984.9691, + "step": 33750 + }, + { + "epoch": 0.06819733594056165, + "grad_norm": 227198.078125, + "learning_rate": 6.752000000000001e-06, + "loss": 16218.75, + "step": 33760 + }, + { + "epoch": 0.06821753657324547, + "grad_norm": 11443.6650390625, + "learning_rate": 6.754000000000001e-06, + "loss": 5915.5328, + "step": 33770 + }, + { + "epoch": 0.0682377372059293, + "grad_norm": 21785.498046875, + "learning_rate": 6.756e-06, + "loss": 12218.4445, + "step": 33780 + }, + { + "epoch": 0.0682579378386131, + "grad_norm": 2706.17333984375, + "learning_rate": 6.758e-06, + "loss": 14134.8125, + "step": 33790 + }, + { + "epoch": 0.06827813847129692, + "grad_norm": 17651.677734375, + "learning_rate": 6.760000000000001e-06, + "loss": 14764.9609, + "step": 33800 + }, + { + "epoch": 0.06829833910398074, + "grad_norm": 34753.859375, + "learning_rate": 6.762000000000001e-06, + "loss": 4482.1586, + "step": 33810 + }, + { + "epoch": 0.06831853973666455, + "grad_norm": 6654.67724609375, + "learning_rate": 6.764000000000001e-06, + "loss": 5360.2426, + "step": 33820 + }, + { + "epoch": 0.06833874036934837, + "grad_norm": 9999.896484375, + "learning_rate": 6.766e-06, + "loss": 16127.9188, + "step": 33830 + }, + { + "epoch": 0.06835894100203219, + "grad_norm": 28415.26953125, + "learning_rate": 6.768e-06, + "loss": 35148.5594, + "step": 33840 + }, + { + "epoch": 0.06837914163471599, + "grad_norm": 59593.06640625, + "learning_rate": 6.770000000000001e-06, + "loss": 12296.875, + "step": 33850 + }, + { + "epoch": 0.06839934226739981, + "grad_norm": 7760.1904296875, + "learning_rate": 6.7720000000000006e-06, + "loss": 6237.198, + "step": 33860 + }, + { + "epoch": 0.06841954290008363, + "grad_norm": 10485.849609375, + "learning_rate": 6.774000000000001e-06, + "loss": 8731.4539, + "step": 33870 + }, + { + "epoch": 0.06843974353276745, + "grad_norm": 3881.5703125, + "learning_rate": 6.776e-06, + "loss": 11755.5148, + "step": 33880 + }, + { + "epoch": 0.06845994416545126, + "grad_norm": 1293.674560546875, + "learning_rate": 6.778e-06, + "loss": 3997.5191, + "step": 33890 + }, + { + "epoch": 0.06848014479813508, + "grad_norm": 30635.603515625, + "learning_rate": 6.780000000000001e-06, + "loss": 8218.8164, + "step": 33900 + }, + { + "epoch": 0.0685003454308189, + "grad_norm": 16974.392578125, + "learning_rate": 6.7820000000000005e-06, + "loss": 21304.3016, + "step": 33910 + }, + { + "epoch": 0.0685205460635027, + "grad_norm": 27050.693359375, + "learning_rate": 6.784000000000001e-06, + "loss": 8836.7344, + "step": 33920 + }, + { + "epoch": 0.06854074669618652, + "grad_norm": 1048.1707763671875, + "learning_rate": 6.786000000000001e-06, + "loss": 17179.3172, + "step": 33930 + }, + { + "epoch": 0.06856094732887034, + "grad_norm": 82266.953125, + "learning_rate": 6.788e-06, + "loss": 11645.6805, + "step": 33940 + }, + { + "epoch": 0.06858114796155415, + "grad_norm": 26779.875, + "learning_rate": 6.790000000000001e-06, + "loss": 5984.973, + "step": 33950 + }, + { + "epoch": 0.06860134859423797, + "grad_norm": 629.892333984375, + "learning_rate": 6.792000000000001e-06, + "loss": 27225.1781, + "step": 33960 + }, + { + "epoch": 0.06862154922692179, + "grad_norm": 41694.31640625, + "learning_rate": 6.7940000000000005e-06, + "loss": 11447.4891, + "step": 33970 + }, + { + "epoch": 0.0686417498596056, + "grad_norm": 6316.6650390625, + "learning_rate": 6.796000000000001e-06, + "loss": 12705.0672, + "step": 33980 + }, + { + "epoch": 0.06866195049228942, + "grad_norm": 0.0, + "learning_rate": 6.798e-06, + "loss": 6259.9336, + "step": 33990 + }, + { + "epoch": 0.06868215112497324, + "grad_norm": 8112.775390625, + "learning_rate": 6.800000000000001e-06, + "loss": 10426.9594, + "step": 34000 + }, + { + "epoch": 0.06870235175765704, + "grad_norm": 25685.662109375, + "learning_rate": 6.802000000000001e-06, + "loss": 3799.4219, + "step": 34010 + }, + { + "epoch": 0.06872255239034086, + "grad_norm": 15191.8818359375, + "learning_rate": 6.804e-06, + "loss": 7669.3797, + "step": 34020 + }, + { + "epoch": 0.06874275302302468, + "grad_norm": 1717.891845703125, + "learning_rate": 6.8060000000000006e-06, + "loss": 4637.2574, + "step": 34030 + }, + { + "epoch": 0.0687629536557085, + "grad_norm": 20304.298828125, + "learning_rate": 6.808e-06, + "loss": 15799.4969, + "step": 34040 + }, + { + "epoch": 0.06878315428839231, + "grad_norm": 44477.9921875, + "learning_rate": 6.810000000000001e-06, + "loss": 7175.9008, + "step": 34050 + }, + { + "epoch": 0.06880335492107613, + "grad_norm": 639.0581665039062, + "learning_rate": 6.812000000000001e-06, + "loss": 15596.2359, + "step": 34060 + }, + { + "epoch": 0.06882355555375995, + "grad_norm": 1252.10791015625, + "learning_rate": 6.814e-06, + "loss": 15472.5141, + "step": 34070 + }, + { + "epoch": 0.06884375618644376, + "grad_norm": 155841.109375, + "learning_rate": 6.8160000000000005e-06, + "loss": 16039.0469, + "step": 34080 + }, + { + "epoch": 0.06886395681912758, + "grad_norm": 96717.265625, + "learning_rate": 6.818e-06, + "loss": 6683.4547, + "step": 34090 + }, + { + "epoch": 0.0688841574518114, + "grad_norm": 22523.45703125, + "learning_rate": 6.820000000000001e-06, + "loss": 14051.8422, + "step": 34100 + }, + { + "epoch": 0.0689043580844952, + "grad_norm": 5389.49365234375, + "learning_rate": 6.822000000000001e-06, + "loss": 9509.5141, + "step": 34110 + }, + { + "epoch": 0.06892455871717902, + "grad_norm": 77390.0078125, + "learning_rate": 6.824e-06, + "loss": 8290.2016, + "step": 34120 + }, + { + "epoch": 0.06894475934986284, + "grad_norm": 6806.67724609375, + "learning_rate": 6.826e-06, + "loss": 4417.4988, + "step": 34130 + }, + { + "epoch": 0.06896495998254665, + "grad_norm": 61090.80859375, + "learning_rate": 6.8280000000000005e-06, + "loss": 9428.9562, + "step": 34140 + }, + { + "epoch": 0.06898516061523047, + "grad_norm": 44231.484375, + "learning_rate": 6.830000000000001e-06, + "loss": 6418.5695, + "step": 34150 + }, + { + "epoch": 0.06900536124791429, + "grad_norm": 28752.576171875, + "learning_rate": 6.832000000000001e-06, + "loss": 9509.1555, + "step": 34160 + }, + { + "epoch": 0.0690255618805981, + "grad_norm": 63038.3359375, + "learning_rate": 6.834000000000001e-06, + "loss": 8467.332, + "step": 34170 + }, + { + "epoch": 0.06904576251328191, + "grad_norm": 3853.859375, + "learning_rate": 6.836e-06, + "loss": 1652.6977, + "step": 34180 + }, + { + "epoch": 0.06906596314596573, + "grad_norm": 11199.3828125, + "learning_rate": 6.8380000000000004e-06, + "loss": 8273.5539, + "step": 34190 + }, + { + "epoch": 0.06908616377864955, + "grad_norm": 68582.1328125, + "learning_rate": 6.8400000000000014e-06, + "loss": 9709.0281, + "step": 34200 + }, + { + "epoch": 0.06910636441133336, + "grad_norm": 903.2653198242188, + "learning_rate": 6.842000000000001e-06, + "loss": 7208.1945, + "step": 34210 + }, + { + "epoch": 0.06912656504401718, + "grad_norm": 10924.9697265625, + "learning_rate": 6.844000000000001e-06, + "loss": 13092.843, + "step": 34220 + }, + { + "epoch": 0.069146765676701, + "grad_norm": 76893.3359375, + "learning_rate": 6.846e-06, + "loss": 11139.0516, + "step": 34230 + }, + { + "epoch": 0.0691669663093848, + "grad_norm": 436.65887451171875, + "learning_rate": 6.848e-06, + "loss": 9798.543, + "step": 34240 + }, + { + "epoch": 0.06918716694206863, + "grad_norm": 58645.97265625, + "learning_rate": 6.850000000000001e-06, + "loss": 11558.5195, + "step": 34250 + }, + { + "epoch": 0.06920736757475245, + "grad_norm": 9890.2958984375, + "learning_rate": 6.852000000000001e-06, + "loss": 8000.5969, + "step": 34260 + }, + { + "epoch": 0.06922756820743625, + "grad_norm": 35136.1640625, + "learning_rate": 6.854000000000001e-06, + "loss": 8761.8914, + "step": 34270 + }, + { + "epoch": 0.06924776884012007, + "grad_norm": 48419.8984375, + "learning_rate": 6.856e-06, + "loss": 10474.6914, + "step": 34280 + }, + { + "epoch": 0.06926796947280389, + "grad_norm": 12244.259765625, + "learning_rate": 6.858e-06, + "loss": 6661.1539, + "step": 34290 + }, + { + "epoch": 0.0692881701054877, + "grad_norm": 458247.15625, + "learning_rate": 6.860000000000001e-06, + "loss": 16613.0219, + "step": 34300 + }, + { + "epoch": 0.06930837073817152, + "grad_norm": 8406.056640625, + "learning_rate": 6.8620000000000005e-06, + "loss": 6594.282, + "step": 34310 + }, + { + "epoch": 0.06932857137085534, + "grad_norm": 128969.890625, + "learning_rate": 6.864000000000001e-06, + "loss": 8002.1672, + "step": 34320 + }, + { + "epoch": 0.06934877200353914, + "grad_norm": 88014.4765625, + "learning_rate": 6.866000000000001e-06, + "loss": 23896.8906, + "step": 34330 + }, + { + "epoch": 0.06936897263622296, + "grad_norm": 1254.29296875, + "learning_rate": 6.868e-06, + "loss": 3008.5828, + "step": 34340 + }, + { + "epoch": 0.06938917326890678, + "grad_norm": 33218.0, + "learning_rate": 6.870000000000001e-06, + "loss": 8185.6531, + "step": 34350 + }, + { + "epoch": 0.0694093739015906, + "grad_norm": 360989.09375, + "learning_rate": 6.872000000000001e-06, + "loss": 19503.5422, + "step": 34360 + }, + { + "epoch": 0.06942957453427441, + "grad_norm": 94129.421875, + "learning_rate": 6.874000000000001e-06, + "loss": 17094.8812, + "step": 34370 + }, + { + "epoch": 0.06944977516695823, + "grad_norm": 17512.234375, + "learning_rate": 6.876000000000001e-06, + "loss": 9886.3773, + "step": 34380 + }, + { + "epoch": 0.06946997579964205, + "grad_norm": 46517.3203125, + "learning_rate": 6.878e-06, + "loss": 23294.5422, + "step": 34390 + }, + { + "epoch": 0.06949017643232586, + "grad_norm": 14953.5341796875, + "learning_rate": 6.88e-06, + "loss": 4221.4953, + "step": 34400 + }, + { + "epoch": 0.06951037706500968, + "grad_norm": 19662.126953125, + "learning_rate": 6.882000000000001e-06, + "loss": 11695.2687, + "step": 34410 + }, + { + "epoch": 0.0695305776976935, + "grad_norm": 21186.607421875, + "learning_rate": 6.8840000000000005e-06, + "loss": 6453.7863, + "step": 34420 + }, + { + "epoch": 0.0695507783303773, + "grad_norm": 17726.626953125, + "learning_rate": 6.886000000000001e-06, + "loss": 7709.0633, + "step": 34430 + }, + { + "epoch": 0.06957097896306112, + "grad_norm": 3566.2666015625, + "learning_rate": 6.888e-06, + "loss": 15709.9578, + "step": 34440 + }, + { + "epoch": 0.06959117959574494, + "grad_norm": 74236.4609375, + "learning_rate": 6.89e-06, + "loss": 8122.7109, + "step": 34450 + }, + { + "epoch": 0.06961138022842875, + "grad_norm": 53741.91796875, + "learning_rate": 6.892000000000001e-06, + "loss": 7672.5625, + "step": 34460 + }, + { + "epoch": 0.06963158086111257, + "grad_norm": 201684.359375, + "learning_rate": 6.894e-06, + "loss": 8955.2375, + "step": 34470 + }, + { + "epoch": 0.06965178149379639, + "grad_norm": 6753.55615234375, + "learning_rate": 6.8960000000000006e-06, + "loss": 2535.6117, + "step": 34480 + }, + { + "epoch": 0.0696719821264802, + "grad_norm": 11013.7919921875, + "learning_rate": 6.898e-06, + "loss": 10706.5172, + "step": 34490 + }, + { + "epoch": 0.06969218275916401, + "grad_norm": 575.8831176757812, + "learning_rate": 6.9e-06, + "loss": 4212.0176, + "step": 34500 + }, + { + "epoch": 0.06971238339184783, + "grad_norm": 30898.75390625, + "learning_rate": 6.902000000000001e-06, + "loss": 3454.1418, + "step": 34510 + }, + { + "epoch": 0.06973258402453165, + "grad_norm": 1766.811279296875, + "learning_rate": 6.904e-06, + "loss": 4366.7777, + "step": 34520 + }, + { + "epoch": 0.06975278465721546, + "grad_norm": 39633.3203125, + "learning_rate": 6.9060000000000005e-06, + "loss": 5308.8383, + "step": 34530 + }, + { + "epoch": 0.06977298528989928, + "grad_norm": 3447.427978515625, + "learning_rate": 6.908000000000001e-06, + "loss": 12362.2914, + "step": 34540 + }, + { + "epoch": 0.0697931859225831, + "grad_norm": 147463.28125, + "learning_rate": 6.91e-06, + "loss": 6677.0953, + "step": 34550 + }, + { + "epoch": 0.06981338655526691, + "grad_norm": 78172.0703125, + "learning_rate": 6.912000000000001e-06, + "loss": 24323.5828, + "step": 34560 + }, + { + "epoch": 0.06983358718795073, + "grad_norm": 2443.14111328125, + "learning_rate": 6.914000000000001e-06, + "loss": 13360.9312, + "step": 34570 + }, + { + "epoch": 0.06985378782063455, + "grad_norm": 83727.6875, + "learning_rate": 6.916e-06, + "loss": 7479.5609, + "step": 34580 + }, + { + "epoch": 0.06987398845331835, + "grad_norm": 70980.1875, + "learning_rate": 6.9180000000000005e-06, + "loss": 14384.9375, + "step": 34590 + }, + { + "epoch": 0.06989418908600217, + "grad_norm": 70984.6015625, + "learning_rate": 6.92e-06, + "loss": 20115.9813, + "step": 34600 + }, + { + "epoch": 0.06991438971868599, + "grad_norm": 21630.0703125, + "learning_rate": 6.922000000000001e-06, + "loss": 4694.218, + "step": 34610 + }, + { + "epoch": 0.0699345903513698, + "grad_norm": 35080.91015625, + "learning_rate": 6.924000000000001e-06, + "loss": 9386.8094, + "step": 34620 + }, + { + "epoch": 0.06995479098405362, + "grad_norm": 13243.1591796875, + "learning_rate": 6.926e-06, + "loss": 12038.3742, + "step": 34630 + }, + { + "epoch": 0.06997499161673744, + "grad_norm": 227.5408172607422, + "learning_rate": 6.928e-06, + "loss": 12084.0305, + "step": 34640 + }, + { + "epoch": 0.06999519224942125, + "grad_norm": 632.8187255859375, + "learning_rate": 6.93e-06, + "loss": 11752.2555, + "step": 34650 + }, + { + "epoch": 0.07001539288210507, + "grad_norm": 224.3376922607422, + "learning_rate": 6.932000000000001e-06, + "loss": 10586.0953, + "step": 34660 + }, + { + "epoch": 0.07003559351478889, + "grad_norm": 11929.30078125, + "learning_rate": 6.934000000000001e-06, + "loss": 11211.6516, + "step": 34670 + }, + { + "epoch": 0.0700557941474727, + "grad_norm": 22472.15234375, + "learning_rate": 6.936e-06, + "loss": 14516.4156, + "step": 34680 + }, + { + "epoch": 0.07007599478015651, + "grad_norm": 24138.548828125, + "learning_rate": 6.938e-06, + "loss": 9492.3516, + "step": 34690 + }, + { + "epoch": 0.07009619541284033, + "grad_norm": 6832.9794921875, + "learning_rate": 6.9400000000000005e-06, + "loss": 13568.3422, + "step": 34700 + }, + { + "epoch": 0.07011639604552415, + "grad_norm": 26916.56640625, + "learning_rate": 6.942000000000001e-06, + "loss": 11957.7234, + "step": 34710 + }, + { + "epoch": 0.07013659667820796, + "grad_norm": 48563.88671875, + "learning_rate": 6.944000000000001e-06, + "loss": 6940.1438, + "step": 34720 + }, + { + "epoch": 0.07015679731089178, + "grad_norm": 37744.3984375, + "learning_rate": 6.946000000000001e-06, + "loss": 8114.4219, + "step": 34730 + }, + { + "epoch": 0.0701769979435756, + "grad_norm": 86592.984375, + "learning_rate": 6.948e-06, + "loss": 17606.8844, + "step": 34740 + }, + { + "epoch": 0.0701971985762594, + "grad_norm": 108249.2265625, + "learning_rate": 6.95e-06, + "loss": 11637.6219, + "step": 34750 + }, + { + "epoch": 0.07021739920894322, + "grad_norm": 133241.875, + "learning_rate": 6.952000000000001e-06, + "loss": 12548.3578, + "step": 34760 + }, + { + "epoch": 0.07023759984162704, + "grad_norm": 73842.75, + "learning_rate": 6.954000000000001e-06, + "loss": 8591.982, + "step": 34770 + }, + { + "epoch": 0.07025780047431085, + "grad_norm": 6941.3095703125, + "learning_rate": 6.956000000000001e-06, + "loss": 5159.4273, + "step": 34780 + }, + { + "epoch": 0.07027800110699467, + "grad_norm": 8294.7451171875, + "learning_rate": 6.958e-06, + "loss": 2638.0301, + "step": 34790 + }, + { + "epoch": 0.07029820173967849, + "grad_norm": 1631.356689453125, + "learning_rate": 6.96e-06, + "loss": 2027.942, + "step": 34800 + }, + { + "epoch": 0.0703184023723623, + "grad_norm": 2655.388427734375, + "learning_rate": 6.962000000000001e-06, + "loss": 17257.225, + "step": 34810 + }, + { + "epoch": 0.07033860300504612, + "grad_norm": 4200.6708984375, + "learning_rate": 6.964000000000001e-06, + "loss": 10162.1164, + "step": 34820 + }, + { + "epoch": 0.07035880363772994, + "grad_norm": 4825.427734375, + "learning_rate": 6.966000000000001e-06, + "loss": 4183.1406, + "step": 34830 + }, + { + "epoch": 0.07037900427041376, + "grad_norm": 97206.5, + "learning_rate": 6.968e-06, + "loss": 24861.1984, + "step": 34840 + }, + { + "epoch": 0.07039920490309756, + "grad_norm": 38607.953125, + "learning_rate": 6.97e-06, + "loss": 11127.8039, + "step": 34850 + }, + { + "epoch": 0.07041940553578138, + "grad_norm": 1063.3934326171875, + "learning_rate": 6.972000000000001e-06, + "loss": 6498.3781, + "step": 34860 + }, + { + "epoch": 0.0704396061684652, + "grad_norm": 6667.2255859375, + "learning_rate": 6.9740000000000005e-06, + "loss": 10867.5875, + "step": 34870 + }, + { + "epoch": 0.07045980680114901, + "grad_norm": 53617.48828125, + "learning_rate": 6.976000000000001e-06, + "loss": 7161.1227, + "step": 34880 + }, + { + "epoch": 0.07048000743383283, + "grad_norm": 15975.630859375, + "learning_rate": 6.978e-06, + "loss": 7388.35, + "step": 34890 + }, + { + "epoch": 0.07050020806651665, + "grad_norm": 23896.115234375, + "learning_rate": 6.98e-06, + "loss": 7434.8516, + "step": 34900 + }, + { + "epoch": 0.07052040869920045, + "grad_norm": 1311.2325439453125, + "learning_rate": 6.982000000000001e-06, + "loss": 10634.6992, + "step": 34910 + }, + { + "epoch": 0.07054060933188427, + "grad_norm": 30150.986328125, + "learning_rate": 6.984e-06, + "loss": 5731.2969, + "step": 34920 + }, + { + "epoch": 0.0705608099645681, + "grad_norm": 5106.716796875, + "learning_rate": 6.9860000000000005e-06, + "loss": 2950.5529, + "step": 34930 + }, + { + "epoch": 0.0705810105972519, + "grad_norm": 417999.46875, + "learning_rate": 6.988000000000001e-06, + "loss": 22839.9906, + "step": 34940 + }, + { + "epoch": 0.07060121122993572, + "grad_norm": 9728.6904296875, + "learning_rate": 6.99e-06, + "loss": 4477.368, + "step": 34950 + }, + { + "epoch": 0.07062141186261954, + "grad_norm": 68935.5546875, + "learning_rate": 6.992000000000001e-06, + "loss": 7808.0, + "step": 34960 + }, + { + "epoch": 0.07064161249530335, + "grad_norm": 2797.341064453125, + "learning_rate": 6.994000000000001e-06, + "loss": 8459.5203, + "step": 34970 + }, + { + "epoch": 0.07066181312798717, + "grad_norm": 3301.560791015625, + "learning_rate": 6.9960000000000004e-06, + "loss": 5831.4691, + "step": 34980 + }, + { + "epoch": 0.07068201376067099, + "grad_norm": 1624.8680419921875, + "learning_rate": 6.998000000000001e-06, + "loss": 11984.4625, + "step": 34990 + }, + { + "epoch": 0.0707022143933548, + "grad_norm": 1885.7015380859375, + "learning_rate": 7e-06, + "loss": 10710.3469, + "step": 35000 + }, + { + "epoch": 0.07072241502603861, + "grad_norm": 80636.640625, + "learning_rate": 7.002000000000001e-06, + "loss": 8957.9172, + "step": 35010 + }, + { + "epoch": 0.07074261565872243, + "grad_norm": 80642.328125, + "learning_rate": 7.004000000000001e-06, + "loss": 12363.425, + "step": 35020 + }, + { + "epoch": 0.07076281629140625, + "grad_norm": 147946.515625, + "learning_rate": 7.006e-06, + "loss": 9895.4844, + "step": 35030 + }, + { + "epoch": 0.07078301692409006, + "grad_norm": 5249.71240234375, + "learning_rate": 7.0080000000000005e-06, + "loss": 4863.6617, + "step": 35040 + }, + { + "epoch": 0.07080321755677388, + "grad_norm": 241418.53125, + "learning_rate": 7.01e-06, + "loss": 14614.9281, + "step": 35050 + }, + { + "epoch": 0.0708234181894577, + "grad_norm": 75102.046875, + "learning_rate": 7.012000000000001e-06, + "loss": 5997.834, + "step": 35060 + }, + { + "epoch": 0.0708436188221415, + "grad_norm": 16969.205078125, + "learning_rate": 7.014000000000001e-06, + "loss": 9081.4031, + "step": 35070 + }, + { + "epoch": 0.07086381945482532, + "grad_norm": 1973.8138427734375, + "learning_rate": 7.016e-06, + "loss": 5439.1957, + "step": 35080 + }, + { + "epoch": 0.07088402008750914, + "grad_norm": 50913.31640625, + "learning_rate": 7.018e-06, + "loss": 10065.5063, + "step": 35090 + }, + { + "epoch": 0.07090422072019295, + "grad_norm": 106283.9375, + "learning_rate": 7.0200000000000006e-06, + "loss": 19643.1188, + "step": 35100 + }, + { + "epoch": 0.07092442135287677, + "grad_norm": 34920.28125, + "learning_rate": 7.022000000000001e-06, + "loss": 12885.2625, + "step": 35110 + }, + { + "epoch": 0.07094462198556059, + "grad_norm": 21983.447265625, + "learning_rate": 7.024000000000001e-06, + "loss": 7624.5383, + "step": 35120 + }, + { + "epoch": 0.0709648226182444, + "grad_norm": 4226.6494140625, + "learning_rate": 7.026000000000001e-06, + "loss": 4108.8203, + "step": 35130 + }, + { + "epoch": 0.07098502325092822, + "grad_norm": 3305.096923828125, + "learning_rate": 7.028e-06, + "loss": 6796.0258, + "step": 35140 + }, + { + "epoch": 0.07100522388361204, + "grad_norm": 9811.6171875, + "learning_rate": 7.0300000000000005e-06, + "loss": 18249.3859, + "step": 35150 + }, + { + "epoch": 0.07102542451629586, + "grad_norm": 28113.78125, + "learning_rate": 7.0320000000000015e-06, + "loss": 9563.3031, + "step": 35160 + }, + { + "epoch": 0.07104562514897966, + "grad_norm": 49549.2734375, + "learning_rate": 7.034000000000001e-06, + "loss": 4626.2852, + "step": 35170 + }, + { + "epoch": 0.07106582578166348, + "grad_norm": 6890.16943359375, + "learning_rate": 7.036000000000001e-06, + "loss": 10288.7617, + "step": 35180 + }, + { + "epoch": 0.0710860264143473, + "grad_norm": 33965.43359375, + "learning_rate": 7.038e-06, + "loss": 12762.3516, + "step": 35190 + }, + { + "epoch": 0.07110622704703111, + "grad_norm": 73504.8359375, + "learning_rate": 7.04e-06, + "loss": 15709.8406, + "step": 35200 + }, + { + "epoch": 0.07112642767971493, + "grad_norm": 3341.400390625, + "learning_rate": 7.042000000000001e-06, + "loss": 12479.6086, + "step": 35210 + }, + { + "epoch": 0.07114662831239875, + "grad_norm": 21998.908203125, + "learning_rate": 7.044000000000001e-06, + "loss": 10010.125, + "step": 35220 + }, + { + "epoch": 0.07116682894508256, + "grad_norm": 300754.90625, + "learning_rate": 7.046000000000001e-06, + "loss": 11944.3844, + "step": 35230 + }, + { + "epoch": 0.07118702957776638, + "grad_norm": 0.0, + "learning_rate": 7.048e-06, + "loss": 13261.2156, + "step": 35240 + }, + { + "epoch": 0.0712072302104502, + "grad_norm": 4770.02294921875, + "learning_rate": 7.05e-06, + "loss": 15717.8656, + "step": 35250 + }, + { + "epoch": 0.071227430843134, + "grad_norm": 4944.18408203125, + "learning_rate": 7.052000000000001e-06, + "loss": 8238.3648, + "step": 35260 + }, + { + "epoch": 0.07124763147581782, + "grad_norm": 11074.0107421875, + "learning_rate": 7.0540000000000006e-06, + "loss": 8655.1414, + "step": 35270 + }, + { + "epoch": 0.07126783210850164, + "grad_norm": 14417.705078125, + "learning_rate": 7.056000000000001e-06, + "loss": 5516.7539, + "step": 35280 + }, + { + "epoch": 0.07128803274118545, + "grad_norm": 2699.06103515625, + "learning_rate": 7.058e-06, + "loss": 7624.4828, + "step": 35290 + }, + { + "epoch": 0.07130823337386927, + "grad_norm": 12398.279296875, + "learning_rate": 7.06e-06, + "loss": 7319.3031, + "step": 35300 + }, + { + "epoch": 0.07132843400655309, + "grad_norm": 5781.90966796875, + "learning_rate": 7.062000000000001e-06, + "loss": 2399.5492, + "step": 35310 + }, + { + "epoch": 0.07134863463923691, + "grad_norm": 5015.8359375, + "learning_rate": 7.0640000000000005e-06, + "loss": 11993.9508, + "step": 35320 + }, + { + "epoch": 0.07136883527192071, + "grad_norm": 405738.6875, + "learning_rate": 7.066000000000001e-06, + "loss": 15200.5625, + "step": 35330 + }, + { + "epoch": 0.07138903590460453, + "grad_norm": 168327.0625, + "learning_rate": 7.068000000000001e-06, + "loss": 21285.0125, + "step": 35340 + }, + { + "epoch": 0.07140923653728835, + "grad_norm": 6376.2880859375, + "learning_rate": 7.07e-06, + "loss": 10387.5812, + "step": 35350 + }, + { + "epoch": 0.07142943716997216, + "grad_norm": 23831.228515625, + "learning_rate": 7.072000000000001e-06, + "loss": 7754.6039, + "step": 35360 + }, + { + "epoch": 0.07144963780265598, + "grad_norm": 18397.595703125, + "learning_rate": 7.074000000000001e-06, + "loss": 9684.425, + "step": 35370 + }, + { + "epoch": 0.0714698384353398, + "grad_norm": 6829.88232421875, + "learning_rate": 7.0760000000000005e-06, + "loss": 10429.7898, + "step": 35380 + }, + { + "epoch": 0.0714900390680236, + "grad_norm": 6447.0263671875, + "learning_rate": 7.078000000000001e-06, + "loss": 6257.657, + "step": 35390 + }, + { + "epoch": 0.07151023970070743, + "grad_norm": 117097.265625, + "learning_rate": 7.08e-06, + "loss": 7749.1594, + "step": 35400 + }, + { + "epoch": 0.07153044033339125, + "grad_norm": 72493.96875, + "learning_rate": 7.082000000000001e-06, + "loss": 8491.382, + "step": 35410 + }, + { + "epoch": 0.07155064096607505, + "grad_norm": 10526.7001953125, + "learning_rate": 7.084000000000001e-06, + "loss": 9557.782, + "step": 35420 + }, + { + "epoch": 0.07157084159875887, + "grad_norm": 121026.59375, + "learning_rate": 7.0860000000000004e-06, + "loss": 6878.2539, + "step": 35430 + }, + { + "epoch": 0.07159104223144269, + "grad_norm": 71394.8359375, + "learning_rate": 7.088000000000001e-06, + "loss": 4696.7922, + "step": 35440 + }, + { + "epoch": 0.0716112428641265, + "grad_norm": 12620.451171875, + "learning_rate": 7.09e-06, + "loss": 9471.9609, + "step": 35450 + }, + { + "epoch": 0.07163144349681032, + "grad_norm": 38769.984375, + "learning_rate": 7.092000000000001e-06, + "loss": 7997.3359, + "step": 35460 + }, + { + "epoch": 0.07165164412949414, + "grad_norm": 14435.6953125, + "learning_rate": 7.094000000000001e-06, + "loss": 8906.8289, + "step": 35470 + }, + { + "epoch": 0.07167184476217796, + "grad_norm": 3644.096923828125, + "learning_rate": 7.096e-06, + "loss": 7219.4563, + "step": 35480 + }, + { + "epoch": 0.07169204539486176, + "grad_norm": 25805.7734375, + "learning_rate": 7.0980000000000005e-06, + "loss": 9286.8516, + "step": 35490 + }, + { + "epoch": 0.07171224602754558, + "grad_norm": 19669.517578125, + "learning_rate": 7.100000000000001e-06, + "loss": 17099.2156, + "step": 35500 + }, + { + "epoch": 0.0717324466602294, + "grad_norm": 4133.20361328125, + "learning_rate": 7.102000000000001e-06, + "loss": 3642.3988, + "step": 35510 + }, + { + "epoch": 0.07175264729291321, + "grad_norm": 28601.5, + "learning_rate": 7.104000000000001e-06, + "loss": 5798.5582, + "step": 35520 + }, + { + "epoch": 0.07177284792559703, + "grad_norm": 58323.26953125, + "learning_rate": 7.106000000000001e-06, + "loss": 18426.2641, + "step": 35530 + }, + { + "epoch": 0.07179304855828085, + "grad_norm": 26651.82421875, + "learning_rate": 7.108e-06, + "loss": 11120.1594, + "step": 35540 + }, + { + "epoch": 0.07181324919096466, + "grad_norm": 1953.768310546875, + "learning_rate": 7.1100000000000005e-06, + "loss": 12560.5586, + "step": 35550 + }, + { + "epoch": 0.07183344982364848, + "grad_norm": 1038.0177001953125, + "learning_rate": 7.1120000000000015e-06, + "loss": 9750.1969, + "step": 35560 + }, + { + "epoch": 0.0718536504563323, + "grad_norm": 10049.060546875, + "learning_rate": 7.114000000000001e-06, + "loss": 4396.5668, + "step": 35570 + }, + { + "epoch": 0.0718738510890161, + "grad_norm": 7641.98095703125, + "learning_rate": 7.116000000000001e-06, + "loss": 6500.3637, + "step": 35580 + }, + { + "epoch": 0.07189405172169992, + "grad_norm": 206588.484375, + "learning_rate": 7.118e-06, + "loss": 20394.9953, + "step": 35590 + }, + { + "epoch": 0.07191425235438374, + "grad_norm": 9981.62109375, + "learning_rate": 7.1200000000000004e-06, + "loss": 1986.9441, + "step": 35600 + }, + { + "epoch": 0.07193445298706755, + "grad_norm": 27782.451171875, + "learning_rate": 7.1220000000000014e-06, + "loss": 13793.9094, + "step": 35610 + }, + { + "epoch": 0.07195465361975137, + "grad_norm": 27085.94140625, + "learning_rate": 7.124000000000001e-06, + "loss": 6172.95, + "step": 35620 + }, + { + "epoch": 0.07197485425243519, + "grad_norm": 36581.8515625, + "learning_rate": 7.126000000000001e-06, + "loss": 3830.998, + "step": 35630 + }, + { + "epoch": 0.07199505488511901, + "grad_norm": 309.8634033203125, + "learning_rate": 7.128e-06, + "loss": 12224.3344, + "step": 35640 + }, + { + "epoch": 0.07201525551780281, + "grad_norm": 8309.4775390625, + "learning_rate": 7.13e-06, + "loss": 5652.7465, + "step": 35650 + }, + { + "epoch": 0.07203545615048663, + "grad_norm": 11941.2177734375, + "learning_rate": 7.132e-06, + "loss": 10893.5719, + "step": 35660 + }, + { + "epoch": 0.07205565678317045, + "grad_norm": 45531.08203125, + "learning_rate": 7.134000000000001e-06, + "loss": 7286.5461, + "step": 35670 + }, + { + "epoch": 0.07207585741585426, + "grad_norm": 50283.640625, + "learning_rate": 7.136000000000001e-06, + "loss": 7560.9484, + "step": 35680 + }, + { + "epoch": 0.07209605804853808, + "grad_norm": 7578.30078125, + "learning_rate": 7.138e-06, + "loss": 11790.8438, + "step": 35690 + }, + { + "epoch": 0.0721162586812219, + "grad_norm": 40790.578125, + "learning_rate": 7.14e-06, + "loss": 17603.4031, + "step": 35700 + }, + { + "epoch": 0.07213645931390571, + "grad_norm": 777.1832885742188, + "learning_rate": 7.142e-06, + "loss": 7634.0562, + "step": 35710 + }, + { + "epoch": 0.07215665994658953, + "grad_norm": 51698.11328125, + "learning_rate": 7.1440000000000005e-06, + "loss": 5002.8672, + "step": 35720 + }, + { + "epoch": 0.07217686057927335, + "grad_norm": 5799.01708984375, + "learning_rate": 7.146000000000001e-06, + "loss": 6099.2414, + "step": 35730 + }, + { + "epoch": 0.07219706121195715, + "grad_norm": 2742.393310546875, + "learning_rate": 7.148000000000001e-06, + "loss": 12557.0125, + "step": 35740 + }, + { + "epoch": 0.07221726184464097, + "grad_norm": 633.3200073242188, + "learning_rate": 7.15e-06, + "loss": 14251.6391, + "step": 35750 + }, + { + "epoch": 0.0722374624773248, + "grad_norm": 33261.93359375, + "learning_rate": 7.152e-06, + "loss": 5904.2543, + "step": 35760 + }, + { + "epoch": 0.0722576631100086, + "grad_norm": 4088.23291015625, + "learning_rate": 7.154000000000001e-06, + "loss": 16851.7, + "step": 35770 + }, + { + "epoch": 0.07227786374269242, + "grad_norm": 54231.21484375, + "learning_rate": 7.156000000000001e-06, + "loss": 16962.9734, + "step": 35780 + }, + { + "epoch": 0.07229806437537624, + "grad_norm": 22505.466796875, + "learning_rate": 7.158000000000001e-06, + "loss": 7260.0602, + "step": 35790 + }, + { + "epoch": 0.07231826500806006, + "grad_norm": 51164.6953125, + "learning_rate": 7.16e-06, + "loss": 7140.7609, + "step": 35800 + }, + { + "epoch": 0.07233846564074387, + "grad_norm": 8213.5654296875, + "learning_rate": 7.162e-06, + "loss": 6275.8039, + "step": 35810 + }, + { + "epoch": 0.07235866627342769, + "grad_norm": 10416.17578125, + "learning_rate": 7.164000000000001e-06, + "loss": 8770.7695, + "step": 35820 + }, + { + "epoch": 0.0723788669061115, + "grad_norm": 2866.545654296875, + "learning_rate": 7.1660000000000005e-06, + "loss": 4498.266, + "step": 35830 + }, + { + "epoch": 0.07239906753879531, + "grad_norm": 31742.1171875, + "learning_rate": 7.168000000000001e-06, + "loss": 4404.4984, + "step": 35840 + }, + { + "epoch": 0.07241926817147913, + "grad_norm": 82786.3671875, + "learning_rate": 7.17e-06, + "loss": 9784.1063, + "step": 35850 + }, + { + "epoch": 0.07243946880416295, + "grad_norm": 8391.3662109375, + "learning_rate": 7.172e-06, + "loss": 8878.4883, + "step": 35860 + }, + { + "epoch": 0.07245966943684676, + "grad_norm": 7762.3076171875, + "learning_rate": 7.174000000000001e-06, + "loss": 7379.9156, + "step": 35870 + }, + { + "epoch": 0.07247987006953058, + "grad_norm": 259597.828125, + "learning_rate": 7.176e-06, + "loss": 14815.575, + "step": 35880 + }, + { + "epoch": 0.0725000707022144, + "grad_norm": 1259.002197265625, + "learning_rate": 7.1780000000000006e-06, + "loss": 3867.9652, + "step": 35890 + }, + { + "epoch": 0.0725202713348982, + "grad_norm": 38609.734375, + "learning_rate": 7.180000000000001e-06, + "loss": 7309.0273, + "step": 35900 + }, + { + "epoch": 0.07254047196758202, + "grad_norm": 90655.75, + "learning_rate": 7.182e-06, + "loss": 12386.1953, + "step": 35910 + }, + { + "epoch": 0.07256067260026584, + "grad_norm": 108012.046875, + "learning_rate": 7.184000000000001e-06, + "loss": 14737.5344, + "step": 35920 + }, + { + "epoch": 0.07258087323294965, + "grad_norm": 1399.2470703125, + "learning_rate": 7.186000000000001e-06, + "loss": 13227.6531, + "step": 35930 + }, + { + "epoch": 0.07260107386563347, + "grad_norm": 4862.77001953125, + "learning_rate": 7.1880000000000005e-06, + "loss": 6930.1687, + "step": 35940 + }, + { + "epoch": 0.07262127449831729, + "grad_norm": 94040.859375, + "learning_rate": 7.190000000000001e-06, + "loss": 6587.2641, + "step": 35950 + }, + { + "epoch": 0.07264147513100111, + "grad_norm": 26590.49609375, + "learning_rate": 7.192e-06, + "loss": 8354.6938, + "step": 35960 + }, + { + "epoch": 0.07266167576368492, + "grad_norm": 2292.135009765625, + "learning_rate": 7.194000000000001e-06, + "loss": 7416.9602, + "step": 35970 + }, + { + "epoch": 0.07268187639636874, + "grad_norm": 15186.9111328125, + "learning_rate": 7.196000000000001e-06, + "loss": 14719.3406, + "step": 35980 + }, + { + "epoch": 0.07270207702905256, + "grad_norm": 11408.6875, + "learning_rate": 7.198e-06, + "loss": 5229.3133, + "step": 35990 + }, + { + "epoch": 0.07272227766173636, + "grad_norm": 143347.046875, + "learning_rate": 7.2000000000000005e-06, + "loss": 16849.3937, + "step": 36000 + }, + { + "epoch": 0.07274247829442018, + "grad_norm": 55999.25390625, + "learning_rate": 7.202e-06, + "loss": 11007.2437, + "step": 36010 + }, + { + "epoch": 0.072762678927104, + "grad_norm": 11620.8212890625, + "learning_rate": 7.204000000000001e-06, + "loss": 8746.0266, + "step": 36020 + }, + { + "epoch": 0.07278287955978781, + "grad_norm": 4830.78955078125, + "learning_rate": 7.206000000000001e-06, + "loss": 7253.1836, + "step": 36030 + }, + { + "epoch": 0.07280308019247163, + "grad_norm": 20024.103515625, + "learning_rate": 7.208e-06, + "loss": 4872.0516, + "step": 36040 + }, + { + "epoch": 0.07282328082515545, + "grad_norm": 2976.0537109375, + "learning_rate": 7.2100000000000004e-06, + "loss": 4112.1852, + "step": 36050 + }, + { + "epoch": 0.07284348145783925, + "grad_norm": 17926.447265625, + "learning_rate": 7.212e-06, + "loss": 5044.2047, + "step": 36060 + }, + { + "epoch": 0.07286368209052307, + "grad_norm": 49300.91015625, + "learning_rate": 7.214000000000001e-06, + "loss": 6238.8, + "step": 36070 + }, + { + "epoch": 0.0728838827232069, + "grad_norm": 31235.734375, + "learning_rate": 7.216000000000001e-06, + "loss": 4711.5523, + "step": 36080 + }, + { + "epoch": 0.0729040833558907, + "grad_norm": 72280.578125, + "learning_rate": 7.218e-06, + "loss": 9506.825, + "step": 36090 + }, + { + "epoch": 0.07292428398857452, + "grad_norm": 7389.68896484375, + "learning_rate": 7.22e-06, + "loss": 9037.5555, + "step": 36100 + }, + { + "epoch": 0.07294448462125834, + "grad_norm": 1357.3037109375, + "learning_rate": 7.2220000000000005e-06, + "loss": 3633.3184, + "step": 36110 + }, + { + "epoch": 0.07296468525394216, + "grad_norm": 263399.8125, + "learning_rate": 7.224000000000001e-06, + "loss": 17041.6969, + "step": 36120 + }, + { + "epoch": 0.07298488588662597, + "grad_norm": 20317.33203125, + "learning_rate": 7.226000000000001e-06, + "loss": 4851.2949, + "step": 36130 + }, + { + "epoch": 0.07300508651930979, + "grad_norm": 103695.0703125, + "learning_rate": 7.228000000000001e-06, + "loss": 14610.3578, + "step": 36140 + }, + { + "epoch": 0.0730252871519936, + "grad_norm": 15626.0478515625, + "learning_rate": 7.23e-06, + "loss": 6016.0605, + "step": 36150 + }, + { + "epoch": 0.07304548778467741, + "grad_norm": 154722.8125, + "learning_rate": 7.232e-06, + "loss": 7503.8953, + "step": 36160 + }, + { + "epoch": 0.07306568841736123, + "grad_norm": 18221.078125, + "learning_rate": 7.234000000000001e-06, + "loss": 6664.3828, + "step": 36170 + }, + { + "epoch": 0.07308588905004505, + "grad_norm": 10793.326171875, + "learning_rate": 7.236000000000001e-06, + "loss": 10779.3547, + "step": 36180 + }, + { + "epoch": 0.07310608968272886, + "grad_norm": 1629.685791015625, + "learning_rate": 7.238000000000001e-06, + "loss": 7061.3234, + "step": 36190 + }, + { + "epoch": 0.07312629031541268, + "grad_norm": 7395.17822265625, + "learning_rate": 7.24e-06, + "loss": 3486.493, + "step": 36200 + }, + { + "epoch": 0.0731464909480965, + "grad_norm": 17595.1484375, + "learning_rate": 7.242e-06, + "loss": 8752.9688, + "step": 36210 + }, + { + "epoch": 0.0731666915807803, + "grad_norm": 17806.55859375, + "learning_rate": 7.244000000000001e-06, + "loss": 8677.9234, + "step": 36220 + }, + { + "epoch": 0.07318689221346412, + "grad_norm": 2751.87646484375, + "learning_rate": 7.246000000000001e-06, + "loss": 14280.0766, + "step": 36230 + }, + { + "epoch": 0.07320709284614794, + "grad_norm": 5889.75927734375, + "learning_rate": 7.248000000000001e-06, + "loss": 10814.5828, + "step": 36240 + }, + { + "epoch": 0.07322729347883175, + "grad_norm": 130140.1015625, + "learning_rate": 7.25e-06, + "loss": 18381.1984, + "step": 36250 + }, + { + "epoch": 0.07324749411151557, + "grad_norm": 148101.015625, + "learning_rate": 7.252e-06, + "loss": 11588.4789, + "step": 36260 + }, + { + "epoch": 0.07326769474419939, + "grad_norm": 92009.9609375, + "learning_rate": 7.254000000000001e-06, + "loss": 10110.1211, + "step": 36270 + }, + { + "epoch": 0.07328789537688321, + "grad_norm": 16551.6953125, + "learning_rate": 7.2560000000000005e-06, + "loss": 6080.7344, + "step": 36280 + }, + { + "epoch": 0.07330809600956702, + "grad_norm": 119515.53125, + "learning_rate": 7.258000000000001e-06, + "loss": 9173.4078, + "step": 36290 + }, + { + "epoch": 0.07332829664225084, + "grad_norm": 45172.05859375, + "learning_rate": 7.260000000000001e-06, + "loss": 11895.993, + "step": 36300 + }, + { + "epoch": 0.07334849727493466, + "grad_norm": 10031.3779296875, + "learning_rate": 7.262e-06, + "loss": 7900.3461, + "step": 36310 + }, + { + "epoch": 0.07336869790761846, + "grad_norm": 1280.4136962890625, + "learning_rate": 7.264000000000001e-06, + "loss": 3405.727, + "step": 36320 + }, + { + "epoch": 0.07338889854030228, + "grad_norm": 6419.67333984375, + "learning_rate": 7.266000000000001e-06, + "loss": 7955.7969, + "step": 36330 + }, + { + "epoch": 0.0734090991729861, + "grad_norm": 97105.75, + "learning_rate": 7.2680000000000005e-06, + "loss": 19653.2469, + "step": 36340 + }, + { + "epoch": 0.07342929980566991, + "grad_norm": 16279.2744140625, + "learning_rate": 7.270000000000001e-06, + "loss": 6343.123, + "step": 36350 + }, + { + "epoch": 0.07344950043835373, + "grad_norm": 10034.98828125, + "learning_rate": 7.272e-06, + "loss": 12259.6086, + "step": 36360 + }, + { + "epoch": 0.07346970107103755, + "grad_norm": 15509.484375, + "learning_rate": 7.274000000000001e-06, + "loss": 8694.0445, + "step": 36370 + }, + { + "epoch": 0.07348990170372136, + "grad_norm": 13334.9619140625, + "learning_rate": 7.276000000000001e-06, + "loss": 12740.2898, + "step": 36380 + }, + { + "epoch": 0.07351010233640518, + "grad_norm": 15020.7626953125, + "learning_rate": 7.2780000000000005e-06, + "loss": 7202.5477, + "step": 36390 + }, + { + "epoch": 0.073530302969089, + "grad_norm": 5054.1845703125, + "learning_rate": 7.280000000000001e-06, + "loss": 5503.3484, + "step": 36400 + }, + { + "epoch": 0.0735505036017728, + "grad_norm": 214527.984375, + "learning_rate": 7.282e-06, + "loss": 13731.3094, + "step": 36410 + }, + { + "epoch": 0.07357070423445662, + "grad_norm": 251.22720336914062, + "learning_rate": 7.284000000000001e-06, + "loss": 10211.443, + "step": 36420 + }, + { + "epoch": 0.07359090486714044, + "grad_norm": 102783.6484375, + "learning_rate": 7.286000000000001e-06, + "loss": 11801.2953, + "step": 36430 + }, + { + "epoch": 0.07361110549982426, + "grad_norm": 10040.662109375, + "learning_rate": 7.288e-06, + "loss": 5678.4418, + "step": 36440 + }, + { + "epoch": 0.07363130613250807, + "grad_norm": 5371.892578125, + "learning_rate": 7.2900000000000005e-06, + "loss": 5015.432, + "step": 36450 + }, + { + "epoch": 0.07365150676519189, + "grad_norm": 57584.4140625, + "learning_rate": 7.292e-06, + "loss": 15074.2234, + "step": 36460 + }, + { + "epoch": 0.07367170739787571, + "grad_norm": 40452.171875, + "learning_rate": 7.294000000000001e-06, + "loss": 8852.1953, + "step": 36470 + }, + { + "epoch": 0.07369190803055951, + "grad_norm": 37082.8203125, + "learning_rate": 7.296000000000001e-06, + "loss": 2351.2555, + "step": 36480 + }, + { + "epoch": 0.07371210866324333, + "grad_norm": 27533.486328125, + "learning_rate": 7.298e-06, + "loss": 9524.025, + "step": 36490 + }, + { + "epoch": 0.07373230929592715, + "grad_norm": 16788.173828125, + "learning_rate": 7.3e-06, + "loss": 8625.4234, + "step": 36500 + }, + { + "epoch": 0.07375250992861096, + "grad_norm": 522.4963989257812, + "learning_rate": 7.3020000000000006e-06, + "loss": 5943.184, + "step": 36510 + }, + { + "epoch": 0.07377271056129478, + "grad_norm": 15315.478515625, + "learning_rate": 7.304000000000001e-06, + "loss": 7208.3188, + "step": 36520 + }, + { + "epoch": 0.0737929111939786, + "grad_norm": 14543.5244140625, + "learning_rate": 7.306000000000001e-06, + "loss": 7094.7719, + "step": 36530 + }, + { + "epoch": 0.0738131118266624, + "grad_norm": 13912.7490234375, + "learning_rate": 7.308000000000001e-06, + "loss": 9625.9086, + "step": 36540 + }, + { + "epoch": 0.07383331245934623, + "grad_norm": 5164.98974609375, + "learning_rate": 7.31e-06, + "loss": 12442.1961, + "step": 36550 + }, + { + "epoch": 0.07385351309203005, + "grad_norm": 22487.935546875, + "learning_rate": 7.3120000000000005e-06, + "loss": 15141.0797, + "step": 36560 + }, + { + "epoch": 0.07387371372471385, + "grad_norm": 24316.5390625, + "learning_rate": 7.3140000000000015e-06, + "loss": 21206.3391, + "step": 36570 + }, + { + "epoch": 0.07389391435739767, + "grad_norm": 33877.45703125, + "learning_rate": 7.316000000000001e-06, + "loss": 8906.5461, + "step": 36580 + }, + { + "epoch": 0.07391411499008149, + "grad_norm": 45266.21875, + "learning_rate": 7.318000000000001e-06, + "loss": 12841.6734, + "step": 36590 + }, + { + "epoch": 0.0739343156227653, + "grad_norm": 267676.0, + "learning_rate": 7.32e-06, + "loss": 19315.5766, + "step": 36600 + }, + { + "epoch": 0.07395451625544912, + "grad_norm": 23760.787109375, + "learning_rate": 7.322e-06, + "loss": 6138.0379, + "step": 36610 + }, + { + "epoch": 0.07397471688813294, + "grad_norm": 14904.447265625, + "learning_rate": 7.324000000000001e-06, + "loss": 10292.8609, + "step": 36620 + }, + { + "epoch": 0.07399491752081676, + "grad_norm": 4921.74658203125, + "learning_rate": 7.326000000000001e-06, + "loss": 14813.9, + "step": 36630 + }, + { + "epoch": 0.07401511815350056, + "grad_norm": 2232.633056640625, + "learning_rate": 7.328000000000001e-06, + "loss": 6976.7555, + "step": 36640 + }, + { + "epoch": 0.07403531878618438, + "grad_norm": 102383.7734375, + "learning_rate": 7.33e-06, + "loss": 11341.2516, + "step": 36650 + }, + { + "epoch": 0.0740555194188682, + "grad_norm": 212655.984375, + "learning_rate": 7.332e-06, + "loss": 11673.207, + "step": 36660 + }, + { + "epoch": 0.07407572005155201, + "grad_norm": 3077.040283203125, + "learning_rate": 7.334000000000001e-06, + "loss": 5726.523, + "step": 36670 + }, + { + "epoch": 0.07409592068423583, + "grad_norm": 101279.8125, + "learning_rate": 7.3360000000000006e-06, + "loss": 8585.2188, + "step": 36680 + }, + { + "epoch": 0.07411612131691965, + "grad_norm": 32074.77734375, + "learning_rate": 7.338000000000001e-06, + "loss": 13890.7453, + "step": 36690 + }, + { + "epoch": 0.07413632194960346, + "grad_norm": 18290.1796875, + "learning_rate": 7.340000000000001e-06, + "loss": 4177.5727, + "step": 36700 + }, + { + "epoch": 0.07415652258228728, + "grad_norm": 6296.44873046875, + "learning_rate": 7.342e-06, + "loss": 5665.7621, + "step": 36710 + }, + { + "epoch": 0.0741767232149711, + "grad_norm": 3201.275146484375, + "learning_rate": 7.344000000000001e-06, + "loss": 3427.3406, + "step": 36720 + }, + { + "epoch": 0.0741969238476549, + "grad_norm": 10342.951171875, + "learning_rate": 7.346000000000001e-06, + "loss": 16273.5281, + "step": 36730 + }, + { + "epoch": 0.07421712448033872, + "grad_norm": 15902.447265625, + "learning_rate": 7.348000000000001e-06, + "loss": 12990.2344, + "step": 36740 + }, + { + "epoch": 0.07423732511302254, + "grad_norm": 14989.0595703125, + "learning_rate": 7.350000000000001e-06, + "loss": 5373.0406, + "step": 36750 + }, + { + "epoch": 0.07425752574570635, + "grad_norm": 19423.630859375, + "learning_rate": 7.352e-06, + "loss": 9513.1742, + "step": 36760 + }, + { + "epoch": 0.07427772637839017, + "grad_norm": 36878.546875, + "learning_rate": 7.354000000000001e-06, + "loss": 7207.9102, + "step": 36770 + }, + { + "epoch": 0.07429792701107399, + "grad_norm": 29562.28125, + "learning_rate": 7.356000000000001e-06, + "loss": 8788.0812, + "step": 36780 + }, + { + "epoch": 0.07431812764375781, + "grad_norm": 6229.736328125, + "learning_rate": 7.3580000000000005e-06, + "loss": 12874.2313, + "step": 36790 + }, + { + "epoch": 0.07433832827644161, + "grad_norm": 10912.2568359375, + "learning_rate": 7.360000000000001e-06, + "loss": 3899.2457, + "step": 36800 + }, + { + "epoch": 0.07435852890912543, + "grad_norm": 3115.259033203125, + "learning_rate": 7.362e-06, + "loss": 15101.1719, + "step": 36810 + }, + { + "epoch": 0.07437872954180925, + "grad_norm": 2024.281005859375, + "learning_rate": 7.364000000000001e-06, + "loss": 18108.8734, + "step": 36820 + }, + { + "epoch": 0.07439893017449306, + "grad_norm": 1051.8671875, + "learning_rate": 7.366000000000001e-06, + "loss": 7300.6945, + "step": 36830 + }, + { + "epoch": 0.07441913080717688, + "grad_norm": 3116.01416015625, + "learning_rate": 7.3680000000000004e-06, + "loss": 5403.4879, + "step": 36840 + }, + { + "epoch": 0.0744393314398607, + "grad_norm": 23802.212890625, + "learning_rate": 7.370000000000001e-06, + "loss": 9536.7406, + "step": 36850 + }, + { + "epoch": 0.07445953207254451, + "grad_norm": 6021.45068359375, + "learning_rate": 7.372e-06, + "loss": 4921.7492, + "step": 36860 + }, + { + "epoch": 0.07447973270522833, + "grad_norm": 38596.69921875, + "learning_rate": 7.374000000000001e-06, + "loss": 2290.5682, + "step": 36870 + }, + { + "epoch": 0.07449993333791215, + "grad_norm": 77197.890625, + "learning_rate": 7.376000000000001e-06, + "loss": 3774.6914, + "step": 36880 + }, + { + "epoch": 0.07452013397059595, + "grad_norm": 2183.840576171875, + "learning_rate": 7.378e-06, + "loss": 4246.6156, + "step": 36890 + }, + { + "epoch": 0.07454033460327977, + "grad_norm": 10989.8291015625, + "learning_rate": 7.3800000000000005e-06, + "loss": 6825.6211, + "step": 36900 + }, + { + "epoch": 0.0745605352359636, + "grad_norm": 60025.765625, + "learning_rate": 7.382000000000001e-06, + "loss": 8961.1711, + "step": 36910 + }, + { + "epoch": 0.0745807358686474, + "grad_norm": 7151.474609375, + "learning_rate": 7.384e-06, + "loss": 8704.6555, + "step": 36920 + }, + { + "epoch": 0.07460093650133122, + "grad_norm": 10005.8056640625, + "learning_rate": 7.386000000000001e-06, + "loss": 8508.7156, + "step": 36930 + }, + { + "epoch": 0.07462113713401504, + "grad_norm": 146568.921875, + "learning_rate": 7.388000000000001e-06, + "loss": 10160.7102, + "step": 36940 + }, + { + "epoch": 0.07464133776669886, + "grad_norm": 10711.8671875, + "learning_rate": 7.39e-06, + "loss": 5456.0465, + "step": 36950 + }, + { + "epoch": 0.07466153839938267, + "grad_norm": 3788.166748046875, + "learning_rate": 7.3920000000000005e-06, + "loss": 5077.5684, + "step": 36960 + }, + { + "epoch": 0.07468173903206649, + "grad_norm": 10643.041015625, + "learning_rate": 7.394e-06, + "loss": 4192.216, + "step": 36970 + }, + { + "epoch": 0.0747019396647503, + "grad_norm": 39190.6875, + "learning_rate": 7.396000000000001e-06, + "loss": 7030.4438, + "step": 36980 + }, + { + "epoch": 0.07472214029743411, + "grad_norm": 3894.966552734375, + "learning_rate": 7.398000000000001e-06, + "loss": 5514.5102, + "step": 36990 + }, + { + "epoch": 0.07474234093011793, + "grad_norm": 3225.404052734375, + "learning_rate": 7.4e-06, + "loss": 6164.6582, + "step": 37000 + }, + { + "epoch": 0.07476254156280175, + "grad_norm": 23406.685546875, + "learning_rate": 7.4020000000000005e-06, + "loss": 8545.4633, + "step": 37010 + }, + { + "epoch": 0.07478274219548556, + "grad_norm": 2071.168212890625, + "learning_rate": 7.404e-06, + "loss": 8136.8523, + "step": 37020 + }, + { + "epoch": 0.07480294282816938, + "grad_norm": 59512.3203125, + "learning_rate": 7.406000000000001e-06, + "loss": 7447.8313, + "step": 37030 + }, + { + "epoch": 0.0748231434608532, + "grad_norm": 1137.1683349609375, + "learning_rate": 7.408000000000001e-06, + "loss": 6492.5133, + "step": 37040 + }, + { + "epoch": 0.074843344093537, + "grad_norm": 1736.72802734375, + "learning_rate": 7.41e-06, + "loss": 11090.343, + "step": 37050 + }, + { + "epoch": 0.07486354472622082, + "grad_norm": 2592.012451171875, + "learning_rate": 7.412e-06, + "loss": 8795.4805, + "step": 37060 + }, + { + "epoch": 0.07488374535890464, + "grad_norm": 2151.290283203125, + "learning_rate": 7.4140000000000005e-06, + "loss": 13558.9016, + "step": 37070 + }, + { + "epoch": 0.07490394599158845, + "grad_norm": 21302.19140625, + "learning_rate": 7.416000000000001e-06, + "loss": 26996.6969, + "step": 37080 + }, + { + "epoch": 0.07492414662427227, + "grad_norm": 3050.98095703125, + "learning_rate": 7.418000000000001e-06, + "loss": 9969.1656, + "step": 37090 + }, + { + "epoch": 0.07494434725695609, + "grad_norm": 1966.739990234375, + "learning_rate": 7.420000000000001e-06, + "loss": 9221.2891, + "step": 37100 + }, + { + "epoch": 0.07496454788963991, + "grad_norm": 1878.593505859375, + "learning_rate": 7.422e-06, + "loss": 10220.5398, + "step": 37110 + }, + { + "epoch": 0.07498474852232372, + "grad_norm": 50237.9453125, + "learning_rate": 7.424e-06, + "loss": 3626.3523, + "step": 37120 + }, + { + "epoch": 0.07500494915500754, + "grad_norm": 59836.50390625, + "learning_rate": 7.426000000000001e-06, + "loss": 4886.568, + "step": 37130 + }, + { + "epoch": 0.07502514978769136, + "grad_norm": 6598.1376953125, + "learning_rate": 7.428000000000001e-06, + "loss": 7044.6195, + "step": 37140 + }, + { + "epoch": 0.07504535042037516, + "grad_norm": 6845.60400390625, + "learning_rate": 7.430000000000001e-06, + "loss": 11256.0812, + "step": 37150 + }, + { + "epoch": 0.07506555105305898, + "grad_norm": 112359.265625, + "learning_rate": 7.432e-06, + "loss": 5877.2246, + "step": 37160 + }, + { + "epoch": 0.0750857516857428, + "grad_norm": 16093.1259765625, + "learning_rate": 7.434e-06, + "loss": 4131.3984, + "step": 37170 + }, + { + "epoch": 0.07510595231842661, + "grad_norm": 9035.76953125, + "learning_rate": 7.436000000000001e-06, + "loss": 3060.702, + "step": 37180 + }, + { + "epoch": 0.07512615295111043, + "grad_norm": 253002.21875, + "learning_rate": 7.438000000000001e-06, + "loss": 14761.2234, + "step": 37190 + }, + { + "epoch": 0.07514635358379425, + "grad_norm": 103355.6328125, + "learning_rate": 7.440000000000001e-06, + "loss": 8026.5555, + "step": 37200 + }, + { + "epoch": 0.07516655421647805, + "grad_norm": 119176.203125, + "learning_rate": 7.442e-06, + "loss": 14874.9813, + "step": 37210 + }, + { + "epoch": 0.07518675484916187, + "grad_norm": 46.074058532714844, + "learning_rate": 7.444e-06, + "loss": 10554.018, + "step": 37220 + }, + { + "epoch": 0.0752069554818457, + "grad_norm": 20619.30078125, + "learning_rate": 7.446000000000001e-06, + "loss": 10958.1258, + "step": 37230 + }, + { + "epoch": 0.0752271561145295, + "grad_norm": 7238.60498046875, + "learning_rate": 7.4480000000000005e-06, + "loss": 9879.7437, + "step": 37240 + }, + { + "epoch": 0.07524735674721332, + "grad_norm": 16031.998046875, + "learning_rate": 7.450000000000001e-06, + "loss": 8887.8289, + "step": 37250 + }, + { + "epoch": 0.07526755737989714, + "grad_norm": 28393.068359375, + "learning_rate": 7.452e-06, + "loss": 9376.1984, + "step": 37260 + }, + { + "epoch": 0.07528775801258096, + "grad_norm": 6392.84765625, + "learning_rate": 7.454e-06, + "loss": 9673.5187, + "step": 37270 + }, + { + "epoch": 0.07530795864526477, + "grad_norm": 1660.0849609375, + "learning_rate": 7.456000000000001e-06, + "loss": 14023.5453, + "step": 37280 + }, + { + "epoch": 0.07532815927794859, + "grad_norm": 77426.3515625, + "learning_rate": 7.458e-06, + "loss": 3431.0871, + "step": 37290 + }, + { + "epoch": 0.0753483599106324, + "grad_norm": 15543.431640625, + "learning_rate": 7.4600000000000006e-06, + "loss": 2050.09, + "step": 37300 + }, + { + "epoch": 0.07536856054331621, + "grad_norm": 64153.9296875, + "learning_rate": 7.462000000000001e-06, + "loss": 5417.7688, + "step": 37310 + }, + { + "epoch": 0.07538876117600003, + "grad_norm": 6430.28271484375, + "learning_rate": 7.464e-06, + "loss": 8155.7219, + "step": 37320 + }, + { + "epoch": 0.07540896180868385, + "grad_norm": 30103.0546875, + "learning_rate": 7.466000000000001e-06, + "loss": 6416.7937, + "step": 37330 + }, + { + "epoch": 0.07542916244136766, + "grad_norm": 27843.91796875, + "learning_rate": 7.468000000000001e-06, + "loss": 4613.625, + "step": 37340 + }, + { + "epoch": 0.07544936307405148, + "grad_norm": 14701.384765625, + "learning_rate": 7.4700000000000005e-06, + "loss": 10661.9844, + "step": 37350 + }, + { + "epoch": 0.0754695637067353, + "grad_norm": 53434.359375, + "learning_rate": 7.472000000000001e-06, + "loss": 4819.6852, + "step": 37360 + }, + { + "epoch": 0.0754897643394191, + "grad_norm": 2403.398193359375, + "learning_rate": 7.474e-06, + "loss": 5045.7484, + "step": 37370 + }, + { + "epoch": 0.07550996497210292, + "grad_norm": 20076.150390625, + "learning_rate": 7.476000000000001e-06, + "loss": 3482.9852, + "step": 37380 + }, + { + "epoch": 0.07553016560478674, + "grad_norm": 39023.5390625, + "learning_rate": 7.478000000000001e-06, + "loss": 8727.7859, + "step": 37390 + }, + { + "epoch": 0.07555036623747055, + "grad_norm": 31956.662109375, + "learning_rate": 7.48e-06, + "loss": 7390.8953, + "step": 37400 + }, + { + "epoch": 0.07557056687015437, + "grad_norm": 50342.16015625, + "learning_rate": 7.4820000000000005e-06, + "loss": 14647.4328, + "step": 37410 + }, + { + "epoch": 0.07559076750283819, + "grad_norm": 70993.828125, + "learning_rate": 7.484e-06, + "loss": 16474.7328, + "step": 37420 + }, + { + "epoch": 0.07561096813552201, + "grad_norm": 25175.15234375, + "learning_rate": 7.486000000000001e-06, + "loss": 3256.6408, + "step": 37430 + }, + { + "epoch": 0.07563116876820582, + "grad_norm": 18387.529296875, + "learning_rate": 7.488000000000001e-06, + "loss": 8872.8531, + "step": 37440 + }, + { + "epoch": 0.07565136940088964, + "grad_norm": 13296.62109375, + "learning_rate": 7.49e-06, + "loss": 7030.5086, + "step": 37450 + }, + { + "epoch": 0.07567157003357346, + "grad_norm": 4367.82470703125, + "learning_rate": 7.4920000000000004e-06, + "loss": 6226.6473, + "step": 37460 + }, + { + "epoch": 0.07569177066625726, + "grad_norm": 3501.68896484375, + "learning_rate": 7.494000000000001e-06, + "loss": 6318.6672, + "step": 37470 + }, + { + "epoch": 0.07571197129894108, + "grad_norm": 10964.9619140625, + "learning_rate": 7.496000000000001e-06, + "loss": 13711.7547, + "step": 37480 + }, + { + "epoch": 0.0757321719316249, + "grad_norm": 1583.02783203125, + "learning_rate": 7.498000000000001e-06, + "loss": 4247.2254, + "step": 37490 + }, + { + "epoch": 0.07575237256430871, + "grad_norm": 39620.79296875, + "learning_rate": 7.500000000000001e-06, + "loss": 11169.2234, + "step": 37500 + }, + { + "epoch": 0.07577257319699253, + "grad_norm": 7076.1923828125, + "learning_rate": 7.502e-06, + "loss": 8959.4773, + "step": 37510 + }, + { + "epoch": 0.07579277382967635, + "grad_norm": 3750.5380859375, + "learning_rate": 7.5040000000000005e-06, + "loss": 6797.7102, + "step": 37520 + }, + { + "epoch": 0.07581297446236016, + "grad_norm": 10969.0859375, + "learning_rate": 7.506000000000001e-06, + "loss": 10360.5266, + "step": 37530 + }, + { + "epoch": 0.07583317509504398, + "grad_norm": 5945.052734375, + "learning_rate": 7.508000000000001e-06, + "loss": 5423.0961, + "step": 37540 + }, + { + "epoch": 0.0758533757277278, + "grad_norm": 76440.875, + "learning_rate": 7.510000000000001e-06, + "loss": 3316.2059, + "step": 37550 + }, + { + "epoch": 0.0758735763604116, + "grad_norm": 63421.73046875, + "learning_rate": 7.512e-06, + "loss": 6417.4898, + "step": 37560 + }, + { + "epoch": 0.07589377699309542, + "grad_norm": 5840.185546875, + "learning_rate": 7.514e-06, + "loss": 2976.8502, + "step": 37570 + }, + { + "epoch": 0.07591397762577924, + "grad_norm": 0.0, + "learning_rate": 7.516000000000001e-06, + "loss": 5460.6117, + "step": 37580 + }, + { + "epoch": 0.07593417825846306, + "grad_norm": 83823.6484375, + "learning_rate": 7.518000000000001e-06, + "loss": 5578.2535, + "step": 37590 + }, + { + "epoch": 0.07595437889114687, + "grad_norm": 1743.631591796875, + "learning_rate": 7.520000000000001e-06, + "loss": 11628.0992, + "step": 37600 + }, + { + "epoch": 0.07597457952383069, + "grad_norm": 21100.99609375, + "learning_rate": 7.522e-06, + "loss": 16142.9672, + "step": 37610 + }, + { + "epoch": 0.07599478015651451, + "grad_norm": 69963.3203125, + "learning_rate": 7.524e-06, + "loss": 9544.6484, + "step": 37620 + }, + { + "epoch": 0.07601498078919831, + "grad_norm": 54105.06640625, + "learning_rate": 7.526000000000001e-06, + "loss": 15342.0125, + "step": 37630 + }, + { + "epoch": 0.07603518142188213, + "grad_norm": 76489.7265625, + "learning_rate": 7.528000000000001e-06, + "loss": 8066.857, + "step": 37640 + }, + { + "epoch": 0.07605538205456595, + "grad_norm": 3187.320068359375, + "learning_rate": 7.530000000000001e-06, + "loss": 9378.1148, + "step": 37650 + }, + { + "epoch": 0.07607558268724976, + "grad_norm": 13812.693359375, + "learning_rate": 7.532e-06, + "loss": 10482.9078, + "step": 37660 + }, + { + "epoch": 0.07609578331993358, + "grad_norm": 15301.3115234375, + "learning_rate": 7.534e-06, + "loss": 6099.2141, + "step": 37670 + }, + { + "epoch": 0.0761159839526174, + "grad_norm": 20403.7265625, + "learning_rate": 7.536000000000001e-06, + "loss": 1704.3805, + "step": 37680 + }, + { + "epoch": 0.0761361845853012, + "grad_norm": 30890.306640625, + "learning_rate": 7.5380000000000005e-06, + "loss": 14304.0219, + "step": 37690 + }, + { + "epoch": 0.07615638521798503, + "grad_norm": 1009.7464599609375, + "learning_rate": 7.540000000000001e-06, + "loss": 12693.8234, + "step": 37700 + }, + { + "epoch": 0.07617658585066885, + "grad_norm": 56971.97265625, + "learning_rate": 7.542000000000001e-06, + "loss": 5782.0793, + "step": 37710 + }, + { + "epoch": 0.07619678648335265, + "grad_norm": 115082.078125, + "learning_rate": 7.544e-06, + "loss": 9080.8125, + "step": 37720 + }, + { + "epoch": 0.07621698711603647, + "grad_norm": 6449.22119140625, + "learning_rate": 7.546000000000001e-06, + "loss": 6565.0664, + "step": 37730 + }, + { + "epoch": 0.07623718774872029, + "grad_norm": 13678.7529296875, + "learning_rate": 7.548000000000001e-06, + "loss": 3235.1568, + "step": 37740 + }, + { + "epoch": 0.07625738838140411, + "grad_norm": 2321.381591796875, + "learning_rate": 7.5500000000000006e-06, + "loss": 6789.7031, + "step": 37750 + }, + { + "epoch": 0.07627758901408792, + "grad_norm": 19422.841796875, + "learning_rate": 7.552000000000001e-06, + "loss": 6585.4883, + "step": 37760 + }, + { + "epoch": 0.07629778964677174, + "grad_norm": 3083.84423828125, + "learning_rate": 7.554e-06, + "loss": 5445.9324, + "step": 37770 + }, + { + "epoch": 0.07631799027945556, + "grad_norm": 181489.34375, + "learning_rate": 7.556000000000001e-06, + "loss": 9576.9594, + "step": 37780 + }, + { + "epoch": 0.07633819091213936, + "grad_norm": 322169.625, + "learning_rate": 7.558000000000001e-06, + "loss": 10430.4125, + "step": 37790 + }, + { + "epoch": 0.07635839154482318, + "grad_norm": 1701.7698974609375, + "learning_rate": 7.5600000000000005e-06, + "loss": 7009.0133, + "step": 37800 + }, + { + "epoch": 0.076378592177507, + "grad_norm": 288.1236572265625, + "learning_rate": 7.562000000000001e-06, + "loss": 3653.6535, + "step": 37810 + }, + { + "epoch": 0.07639879281019081, + "grad_norm": 7765.63427734375, + "learning_rate": 7.564e-06, + "loss": 15080.3953, + "step": 37820 + }, + { + "epoch": 0.07641899344287463, + "grad_norm": 6914.89306640625, + "learning_rate": 7.566000000000001e-06, + "loss": 16751.7891, + "step": 37830 + }, + { + "epoch": 0.07643919407555845, + "grad_norm": 1640.23974609375, + "learning_rate": 7.568000000000001e-06, + "loss": 16602.0344, + "step": 37840 + }, + { + "epoch": 0.07645939470824226, + "grad_norm": 37452.3203125, + "learning_rate": 7.57e-06, + "loss": 7211.8758, + "step": 37850 + }, + { + "epoch": 0.07647959534092608, + "grad_norm": 135630.796875, + "learning_rate": 7.5720000000000005e-06, + "loss": 11020.1437, + "step": 37860 + }, + { + "epoch": 0.0764997959736099, + "grad_norm": 2766.810302734375, + "learning_rate": 7.574e-06, + "loss": 8697.5312, + "step": 37870 + }, + { + "epoch": 0.0765199966062937, + "grad_norm": 37460.33203125, + "learning_rate": 7.576000000000001e-06, + "loss": 5096.4617, + "step": 37880 + }, + { + "epoch": 0.07654019723897752, + "grad_norm": 66128.328125, + "learning_rate": 7.578000000000001e-06, + "loss": 4190.4836, + "step": 37890 + }, + { + "epoch": 0.07656039787166134, + "grad_norm": 18875.53515625, + "learning_rate": 7.58e-06, + "loss": 9477.4352, + "step": 37900 + }, + { + "epoch": 0.07658059850434516, + "grad_norm": 1517.0081787109375, + "learning_rate": 7.582e-06, + "loss": 4092.1715, + "step": 37910 + }, + { + "epoch": 0.07660079913702897, + "grad_norm": 62168.05859375, + "learning_rate": 7.5840000000000006e-06, + "loss": 7967.4758, + "step": 37920 + }, + { + "epoch": 0.07662099976971279, + "grad_norm": 312.7580871582031, + "learning_rate": 7.586000000000001e-06, + "loss": 5587.4352, + "step": 37930 + }, + { + "epoch": 0.07664120040239661, + "grad_norm": 2431.15625, + "learning_rate": 7.588000000000001e-06, + "loss": 4439.5621, + "step": 37940 + }, + { + "epoch": 0.07666140103508041, + "grad_norm": 132880.265625, + "learning_rate": 7.590000000000001e-06, + "loss": 12522.6437, + "step": 37950 + }, + { + "epoch": 0.07668160166776423, + "grad_norm": 60482.74609375, + "learning_rate": 7.592e-06, + "loss": 6519.3305, + "step": 37960 + }, + { + "epoch": 0.07670180230044805, + "grad_norm": 106775.4921875, + "learning_rate": 7.5940000000000005e-06, + "loss": 15795.8344, + "step": 37970 + }, + { + "epoch": 0.07672200293313186, + "grad_norm": 315802.21875, + "learning_rate": 7.5960000000000015e-06, + "loss": 17684.2594, + "step": 37980 + }, + { + "epoch": 0.07674220356581568, + "grad_norm": 19455.033203125, + "learning_rate": 7.598000000000001e-06, + "loss": 13869.1828, + "step": 37990 + }, + { + "epoch": 0.0767624041984995, + "grad_norm": 152243.140625, + "learning_rate": 7.600000000000001e-06, + "loss": 10043.425, + "step": 38000 + }, + { + "epoch": 0.07678260483118331, + "grad_norm": 2858.478271484375, + "learning_rate": 7.602e-06, + "loss": 2004.5023, + "step": 38010 + }, + { + "epoch": 0.07680280546386713, + "grad_norm": 11647.4091796875, + "learning_rate": 7.604e-06, + "loss": 3955.4379, + "step": 38020 + }, + { + "epoch": 0.07682300609655095, + "grad_norm": 10796.1630859375, + "learning_rate": 7.606000000000001e-06, + "loss": 6483.7531, + "step": 38030 + }, + { + "epoch": 0.07684320672923475, + "grad_norm": 9447.517578125, + "learning_rate": 7.608000000000001e-06, + "loss": 3574.0465, + "step": 38040 + }, + { + "epoch": 0.07686340736191857, + "grad_norm": 2575.256103515625, + "learning_rate": 7.610000000000001e-06, + "loss": 2738.533, + "step": 38050 + }, + { + "epoch": 0.0768836079946024, + "grad_norm": 35095.07421875, + "learning_rate": 7.612e-06, + "loss": 8125.5875, + "step": 38060 + }, + { + "epoch": 0.07690380862728621, + "grad_norm": 20319.734375, + "learning_rate": 7.614e-06, + "loss": 9819.0609, + "step": 38070 + }, + { + "epoch": 0.07692400925997002, + "grad_norm": 127891.734375, + "learning_rate": 7.616000000000001e-06, + "loss": 6808.6516, + "step": 38080 + }, + { + "epoch": 0.07694420989265384, + "grad_norm": 34391.27734375, + "learning_rate": 7.618000000000001e-06, + "loss": 11785.2687, + "step": 38090 + }, + { + "epoch": 0.07696441052533766, + "grad_norm": 67366.6171875, + "learning_rate": 7.620000000000001e-06, + "loss": 11824.5063, + "step": 38100 + }, + { + "epoch": 0.07698461115802147, + "grad_norm": 1501.3951416015625, + "learning_rate": 7.622000000000001e-06, + "loss": 13251.6125, + "step": 38110 + }, + { + "epoch": 0.07700481179070529, + "grad_norm": 10803.322265625, + "learning_rate": 7.624e-06, + "loss": 9945.6703, + "step": 38120 + }, + { + "epoch": 0.0770250124233891, + "grad_norm": 29854.5078125, + "learning_rate": 7.626e-06, + "loss": 4764.7754, + "step": 38130 + }, + { + "epoch": 0.07704521305607291, + "grad_norm": 40375.62109375, + "learning_rate": 7.628000000000001e-06, + "loss": 11295.4844, + "step": 38140 + }, + { + "epoch": 0.07706541368875673, + "grad_norm": 11018.943359375, + "learning_rate": 7.630000000000001e-06, + "loss": 3845.091, + "step": 38150 + }, + { + "epoch": 0.07708561432144055, + "grad_norm": 2559.7451171875, + "learning_rate": 7.632e-06, + "loss": 5447.0227, + "step": 38160 + }, + { + "epoch": 0.07710581495412436, + "grad_norm": 1973.029052734375, + "learning_rate": 7.634e-06, + "loss": 4552.8445, + "step": 38170 + }, + { + "epoch": 0.07712601558680818, + "grad_norm": 92719.1171875, + "learning_rate": 7.636e-06, + "loss": 6880.6938, + "step": 38180 + }, + { + "epoch": 0.077146216219492, + "grad_norm": 16497.47265625, + "learning_rate": 7.638e-06, + "loss": 3231.7918, + "step": 38190 + }, + { + "epoch": 0.0771664168521758, + "grad_norm": 1226.76171875, + "learning_rate": 7.640000000000001e-06, + "loss": 9852.843, + "step": 38200 + }, + { + "epoch": 0.07718661748485962, + "grad_norm": 10241.56640625, + "learning_rate": 7.642e-06, + "loss": 6915.6578, + "step": 38210 + }, + { + "epoch": 0.07720681811754344, + "grad_norm": 26743.10546875, + "learning_rate": 7.644e-06, + "loss": 9445.5672, + "step": 38220 + }, + { + "epoch": 0.07722701875022726, + "grad_norm": 2774.2138671875, + "learning_rate": 7.646e-06, + "loss": 5523.1266, + "step": 38230 + }, + { + "epoch": 0.07724721938291107, + "grad_norm": 5015.3486328125, + "learning_rate": 7.648e-06, + "loss": 3384.7117, + "step": 38240 + }, + { + "epoch": 0.07726742001559489, + "grad_norm": 35998.2578125, + "learning_rate": 7.650000000000001e-06, + "loss": 10378.8758, + "step": 38250 + }, + { + "epoch": 0.07728762064827871, + "grad_norm": 644.7882080078125, + "learning_rate": 7.652e-06, + "loss": 13524.3969, + "step": 38260 + }, + { + "epoch": 0.07730782128096252, + "grad_norm": 21203.48046875, + "learning_rate": 7.654e-06, + "loss": 6195.077, + "step": 38270 + }, + { + "epoch": 0.07732802191364634, + "grad_norm": 4803.30029296875, + "learning_rate": 7.656000000000001e-06, + "loss": 6984.0445, + "step": 38280 + }, + { + "epoch": 0.07734822254633016, + "grad_norm": 83116.375, + "learning_rate": 7.658e-06, + "loss": 5576.4883, + "step": 38290 + }, + { + "epoch": 0.07736842317901396, + "grad_norm": 25620.65234375, + "learning_rate": 7.660000000000001e-06, + "loss": 11355.6875, + "step": 38300 + }, + { + "epoch": 0.07738862381169778, + "grad_norm": 9075.5029296875, + "learning_rate": 7.662e-06, + "loss": 2906.1605, + "step": 38310 + }, + { + "epoch": 0.0774088244443816, + "grad_norm": 14094.1396484375, + "learning_rate": 7.664e-06, + "loss": 4142.0188, + "step": 38320 + }, + { + "epoch": 0.07742902507706541, + "grad_norm": 32032.9375, + "learning_rate": 7.666e-06, + "loss": 7227.7063, + "step": 38330 + }, + { + "epoch": 0.07744922570974923, + "grad_norm": 86566.390625, + "learning_rate": 7.668000000000002e-06, + "loss": 4296.675, + "step": 38340 + }, + { + "epoch": 0.07746942634243305, + "grad_norm": 14167.599609375, + "learning_rate": 7.670000000000001e-06, + "loss": 9907.6891, + "step": 38350 + }, + { + "epoch": 0.07748962697511685, + "grad_norm": 63794.078125, + "learning_rate": 7.672e-06, + "loss": 5351.4844, + "step": 38360 + }, + { + "epoch": 0.07750982760780067, + "grad_norm": 91710.921875, + "learning_rate": 7.674e-06, + "loss": 10909.1266, + "step": 38370 + }, + { + "epoch": 0.0775300282404845, + "grad_norm": 20081.69140625, + "learning_rate": 7.676e-06, + "loss": 13408.3609, + "step": 38380 + }, + { + "epoch": 0.07755022887316831, + "grad_norm": 218768.078125, + "learning_rate": 7.678000000000002e-06, + "loss": 9164.1055, + "step": 38390 + }, + { + "epoch": 0.07757042950585212, + "grad_norm": 4629.4453125, + "learning_rate": 7.680000000000001e-06, + "loss": 14649.7859, + "step": 38400 + }, + { + "epoch": 0.07759063013853594, + "grad_norm": 25925.9375, + "learning_rate": 7.682e-06, + "loss": 7025.6945, + "step": 38410 + }, + { + "epoch": 0.07761083077121976, + "grad_norm": 17451.638671875, + "learning_rate": 7.684e-06, + "loss": 11557.8133, + "step": 38420 + }, + { + "epoch": 0.07763103140390357, + "grad_norm": 16027.74609375, + "learning_rate": 7.686e-06, + "loss": 12056.9688, + "step": 38430 + }, + { + "epoch": 0.07765123203658739, + "grad_norm": 520.9924926757812, + "learning_rate": 7.688000000000002e-06, + "loss": 16738.6031, + "step": 38440 + }, + { + "epoch": 0.0776714326692712, + "grad_norm": 25221.76171875, + "learning_rate": 7.690000000000001e-06, + "loss": 8333.7922, + "step": 38450 + }, + { + "epoch": 0.07769163330195501, + "grad_norm": 24795.97265625, + "learning_rate": 7.692e-06, + "loss": 4977.052, + "step": 38460 + }, + { + "epoch": 0.07771183393463883, + "grad_norm": 1046.6649169921875, + "learning_rate": 7.694e-06, + "loss": 12971.2, + "step": 38470 + }, + { + "epoch": 0.07773203456732265, + "grad_norm": 91785.046875, + "learning_rate": 7.696e-06, + "loss": 9862.3445, + "step": 38480 + }, + { + "epoch": 0.07775223520000646, + "grad_norm": 28791.83203125, + "learning_rate": 7.698000000000002e-06, + "loss": 10130.9734, + "step": 38490 + }, + { + "epoch": 0.07777243583269028, + "grad_norm": 1951.188232421875, + "learning_rate": 7.7e-06, + "loss": 3570.1434, + "step": 38500 + }, + { + "epoch": 0.0777926364653741, + "grad_norm": 57592.54296875, + "learning_rate": 7.702e-06, + "loss": 7399.0875, + "step": 38510 + }, + { + "epoch": 0.0778128370980579, + "grad_norm": 30366.982421875, + "learning_rate": 7.704000000000001e-06, + "loss": 6985.393, + "step": 38520 + }, + { + "epoch": 0.07783303773074172, + "grad_norm": 163111.328125, + "learning_rate": 7.706e-06, + "loss": 5608.5074, + "step": 38530 + }, + { + "epoch": 0.07785323836342554, + "grad_norm": 15756.298828125, + "learning_rate": 7.708000000000001e-06, + "loss": 9444.293, + "step": 38540 + }, + { + "epoch": 0.07787343899610936, + "grad_norm": 3529.503173828125, + "learning_rate": 7.71e-06, + "loss": 4453.9113, + "step": 38550 + }, + { + "epoch": 0.07789363962879317, + "grad_norm": 10268.291015625, + "learning_rate": 7.712e-06, + "loss": 3372.6496, + "step": 38560 + }, + { + "epoch": 0.07791384026147699, + "grad_norm": 4011.64501953125, + "learning_rate": 7.714000000000001e-06, + "loss": 4670.6664, + "step": 38570 + }, + { + "epoch": 0.07793404089416081, + "grad_norm": 5705.58056640625, + "learning_rate": 7.716e-06, + "loss": 5079.4477, + "step": 38580 + }, + { + "epoch": 0.07795424152684462, + "grad_norm": 12552.8837890625, + "learning_rate": 7.718000000000001e-06, + "loss": 6664.825, + "step": 38590 + }, + { + "epoch": 0.07797444215952844, + "grad_norm": 2024.084228515625, + "learning_rate": 7.72e-06, + "loss": 4839.8879, + "step": 38600 + }, + { + "epoch": 0.07799464279221226, + "grad_norm": 21685.3515625, + "learning_rate": 7.722e-06, + "loss": 4210.1871, + "step": 38610 + }, + { + "epoch": 0.07801484342489606, + "grad_norm": 5172.09326171875, + "learning_rate": 7.724000000000001e-06, + "loss": 4728.4016, + "step": 38620 + }, + { + "epoch": 0.07803504405757988, + "grad_norm": 221443.703125, + "learning_rate": 7.726e-06, + "loss": 14326.3531, + "step": 38630 + }, + { + "epoch": 0.0780552446902637, + "grad_norm": 26398.716796875, + "learning_rate": 7.728000000000001e-06, + "loss": 14049.4641, + "step": 38640 + }, + { + "epoch": 0.07807544532294751, + "grad_norm": 1739.33251953125, + "learning_rate": 7.73e-06, + "loss": 3210.0381, + "step": 38650 + }, + { + "epoch": 0.07809564595563133, + "grad_norm": 61653.84765625, + "learning_rate": 7.732e-06, + "loss": 12951.4992, + "step": 38660 + }, + { + "epoch": 0.07811584658831515, + "grad_norm": 91346.421875, + "learning_rate": 7.734e-06, + "loss": 8092.9414, + "step": 38670 + }, + { + "epoch": 0.07813604722099896, + "grad_norm": 25835.59765625, + "learning_rate": 7.736e-06, + "loss": 7336.9477, + "step": 38680 + }, + { + "epoch": 0.07815624785368278, + "grad_norm": 5531.7041015625, + "learning_rate": 7.738000000000001e-06, + "loss": 4749.0348, + "step": 38690 + }, + { + "epoch": 0.0781764484863666, + "grad_norm": 9279.5517578125, + "learning_rate": 7.74e-06, + "loss": 6814.7234, + "step": 38700 + }, + { + "epoch": 0.07819664911905042, + "grad_norm": 7816.9111328125, + "learning_rate": 7.742000000000001e-06, + "loss": 9659.2211, + "step": 38710 + }, + { + "epoch": 0.07821684975173422, + "grad_norm": 16634.224609375, + "learning_rate": 7.744e-06, + "loss": 14094.1344, + "step": 38720 + }, + { + "epoch": 0.07823705038441804, + "grad_norm": 8675.6181640625, + "learning_rate": 7.746e-06, + "loss": 14439.9672, + "step": 38730 + }, + { + "epoch": 0.07825725101710186, + "grad_norm": 66832.8671875, + "learning_rate": 7.748000000000001e-06, + "loss": 4580.0688, + "step": 38740 + }, + { + "epoch": 0.07827745164978567, + "grad_norm": 153097.640625, + "learning_rate": 7.75e-06, + "loss": 10482.4125, + "step": 38750 + }, + { + "epoch": 0.07829765228246949, + "grad_norm": 94263.2578125, + "learning_rate": 7.752000000000001e-06, + "loss": 6538.5305, + "step": 38760 + }, + { + "epoch": 0.07831785291515331, + "grad_norm": 12784.212890625, + "learning_rate": 7.754e-06, + "loss": 14734.6828, + "step": 38770 + }, + { + "epoch": 0.07833805354783711, + "grad_norm": 2062.43408203125, + "learning_rate": 7.756e-06, + "loss": 9295.6922, + "step": 38780 + }, + { + "epoch": 0.07835825418052093, + "grad_norm": 0.0, + "learning_rate": 7.758000000000001e-06, + "loss": 4798.1375, + "step": 38790 + }, + { + "epoch": 0.07837845481320475, + "grad_norm": 234296.5625, + "learning_rate": 7.76e-06, + "loss": 16123.0344, + "step": 38800 + }, + { + "epoch": 0.07839865544588856, + "grad_norm": 1795.0721435546875, + "learning_rate": 7.762000000000001e-06, + "loss": 7368.6359, + "step": 38810 + }, + { + "epoch": 0.07841885607857238, + "grad_norm": 70586.984375, + "learning_rate": 7.764e-06, + "loss": 8082.0547, + "step": 38820 + }, + { + "epoch": 0.0784390567112562, + "grad_norm": 117748.8671875, + "learning_rate": 7.766e-06, + "loss": 10606.5969, + "step": 38830 + }, + { + "epoch": 0.07845925734394, + "grad_norm": 8824.953125, + "learning_rate": 7.768e-06, + "loss": 5410.7121, + "step": 38840 + }, + { + "epoch": 0.07847945797662383, + "grad_norm": 14720.8798828125, + "learning_rate": 7.77e-06, + "loss": 5623.0512, + "step": 38850 + }, + { + "epoch": 0.07849965860930765, + "grad_norm": 1420.4217529296875, + "learning_rate": 7.772000000000001e-06, + "loss": 5764.8789, + "step": 38860 + }, + { + "epoch": 0.07851985924199147, + "grad_norm": 69183.578125, + "learning_rate": 7.774e-06, + "loss": 5363.6285, + "step": 38870 + }, + { + "epoch": 0.07854005987467527, + "grad_norm": 826.49609375, + "learning_rate": 7.776e-06, + "loss": 4954.0871, + "step": 38880 + }, + { + "epoch": 0.07856026050735909, + "grad_norm": 21731.013671875, + "learning_rate": 7.778e-06, + "loss": 12119.6305, + "step": 38890 + }, + { + "epoch": 0.07858046114004291, + "grad_norm": 0.0, + "learning_rate": 7.78e-06, + "loss": 7672.0609, + "step": 38900 + }, + { + "epoch": 0.07860066177272672, + "grad_norm": 72348.6875, + "learning_rate": 7.782000000000001e-06, + "loss": 16723.3516, + "step": 38910 + }, + { + "epoch": 0.07862086240541054, + "grad_norm": 361.9878845214844, + "learning_rate": 7.784e-06, + "loss": 5351.4957, + "step": 38920 + }, + { + "epoch": 0.07864106303809436, + "grad_norm": 24311.802734375, + "learning_rate": 7.786e-06, + "loss": 10102.4758, + "step": 38930 + }, + { + "epoch": 0.07866126367077816, + "grad_norm": 969.9327392578125, + "learning_rate": 7.788e-06, + "loss": 6442.0594, + "step": 38940 + }, + { + "epoch": 0.07868146430346198, + "grad_norm": 226552.28125, + "learning_rate": 7.790000000000002e-06, + "loss": 11063.4492, + "step": 38950 + }, + { + "epoch": 0.0787016649361458, + "grad_norm": 32612.28515625, + "learning_rate": 7.792000000000001e-06, + "loss": 5305.4934, + "step": 38960 + }, + { + "epoch": 0.07872186556882961, + "grad_norm": 3576.33544921875, + "learning_rate": 7.794e-06, + "loss": 18053.0984, + "step": 38970 + }, + { + "epoch": 0.07874206620151343, + "grad_norm": 1806.1787109375, + "learning_rate": 7.796e-06, + "loss": 13579.8563, + "step": 38980 + }, + { + "epoch": 0.07876226683419725, + "grad_norm": 135629.34375, + "learning_rate": 7.798e-06, + "loss": 12021.4586, + "step": 38990 + }, + { + "epoch": 0.07878246746688106, + "grad_norm": 17384.548828125, + "learning_rate": 7.800000000000002e-06, + "loss": 12044.2031, + "step": 39000 + }, + { + "epoch": 0.07880266809956488, + "grad_norm": 309068.125, + "learning_rate": 7.802000000000001e-06, + "loss": 11986.9945, + "step": 39010 + }, + { + "epoch": 0.0788228687322487, + "grad_norm": 27332.486328125, + "learning_rate": 7.804e-06, + "loss": 3146.9406, + "step": 39020 + }, + { + "epoch": 0.07884306936493252, + "grad_norm": 15027.3359375, + "learning_rate": 7.806e-06, + "loss": 4007.8047, + "step": 39030 + }, + { + "epoch": 0.07886326999761632, + "grad_norm": 35780.7578125, + "learning_rate": 7.808e-06, + "loss": 5444.7227, + "step": 39040 + }, + { + "epoch": 0.07888347063030014, + "grad_norm": 41794.96875, + "learning_rate": 7.810000000000001e-06, + "loss": 7398.2492, + "step": 39050 + }, + { + "epoch": 0.07890367126298396, + "grad_norm": 47523.79296875, + "learning_rate": 7.812e-06, + "loss": 10460.8711, + "step": 39060 + }, + { + "epoch": 0.07892387189566777, + "grad_norm": 42946.87109375, + "learning_rate": 7.814e-06, + "loss": 4656.6297, + "step": 39070 + }, + { + "epoch": 0.07894407252835159, + "grad_norm": 249677.828125, + "learning_rate": 7.816000000000001e-06, + "loss": 12166.6812, + "step": 39080 + }, + { + "epoch": 0.07896427316103541, + "grad_norm": 3847.938720703125, + "learning_rate": 7.818e-06, + "loss": 7373.2937, + "step": 39090 + }, + { + "epoch": 0.07898447379371921, + "grad_norm": 80184.390625, + "learning_rate": 7.820000000000001e-06, + "loss": 7933.7242, + "step": 39100 + }, + { + "epoch": 0.07900467442640303, + "grad_norm": 115.26930236816406, + "learning_rate": 7.822e-06, + "loss": 16272.8484, + "step": 39110 + }, + { + "epoch": 0.07902487505908685, + "grad_norm": 8736.1337890625, + "learning_rate": 7.824e-06, + "loss": 5362.8852, + "step": 39120 + }, + { + "epoch": 0.07904507569177066, + "grad_norm": 15467.76171875, + "learning_rate": 7.826000000000001e-06, + "loss": 11965.7156, + "step": 39130 + }, + { + "epoch": 0.07906527632445448, + "grad_norm": 2238.07177734375, + "learning_rate": 7.828000000000002e-06, + "loss": 7316.3648, + "step": 39140 + }, + { + "epoch": 0.0790854769571383, + "grad_norm": 14754.13671875, + "learning_rate": 7.830000000000001e-06, + "loss": 7128.1781, + "step": 39150 + }, + { + "epoch": 0.07910567758982211, + "grad_norm": 4136.6328125, + "learning_rate": 7.832e-06, + "loss": 1938.4742, + "step": 39160 + }, + { + "epoch": 0.07912587822250593, + "grad_norm": 1025.3453369140625, + "learning_rate": 7.834e-06, + "loss": 3641.475, + "step": 39170 + }, + { + "epoch": 0.07914607885518975, + "grad_norm": 48428.19140625, + "learning_rate": 7.836000000000001e-06, + "loss": 4156.6988, + "step": 39180 + }, + { + "epoch": 0.07916627948787357, + "grad_norm": 10245.232421875, + "learning_rate": 7.838000000000002e-06, + "loss": 5093.8402, + "step": 39190 + }, + { + "epoch": 0.07918648012055737, + "grad_norm": 23072.517578125, + "learning_rate": 7.840000000000001e-06, + "loss": 12349.1891, + "step": 39200 + }, + { + "epoch": 0.0792066807532412, + "grad_norm": 971.8624267578125, + "learning_rate": 7.842e-06, + "loss": 3235.6811, + "step": 39210 + }, + { + "epoch": 0.07922688138592501, + "grad_norm": 25367.890625, + "learning_rate": 7.844e-06, + "loss": 14956.4281, + "step": 39220 + }, + { + "epoch": 0.07924708201860882, + "grad_norm": 6486.31494140625, + "learning_rate": 7.846e-06, + "loss": 4490.8273, + "step": 39230 + }, + { + "epoch": 0.07926728265129264, + "grad_norm": 9603.9482421875, + "learning_rate": 7.848000000000002e-06, + "loss": 7192.0555, + "step": 39240 + }, + { + "epoch": 0.07928748328397646, + "grad_norm": 48166.0234375, + "learning_rate": 7.850000000000001e-06, + "loss": 9112.1203, + "step": 39250 + }, + { + "epoch": 0.07930768391666027, + "grad_norm": 20071.185546875, + "learning_rate": 7.852e-06, + "loss": 6002.3492, + "step": 39260 + }, + { + "epoch": 0.07932788454934409, + "grad_norm": 11183.6201171875, + "learning_rate": 7.854e-06, + "loss": 7156.3344, + "step": 39270 + }, + { + "epoch": 0.0793480851820279, + "grad_norm": 32118.0703125, + "learning_rate": 7.856e-06, + "loss": 10656.0594, + "step": 39280 + }, + { + "epoch": 0.07936828581471171, + "grad_norm": 44166.46875, + "learning_rate": 7.858000000000002e-06, + "loss": 5369.6539, + "step": 39290 + }, + { + "epoch": 0.07938848644739553, + "grad_norm": 21081.16796875, + "learning_rate": 7.860000000000001e-06, + "loss": 10256.2648, + "step": 39300 + }, + { + "epoch": 0.07940868708007935, + "grad_norm": 10182.9150390625, + "learning_rate": 7.862e-06, + "loss": 10033.1227, + "step": 39310 + }, + { + "epoch": 0.07942888771276316, + "grad_norm": 1470.249267578125, + "learning_rate": 7.864000000000001e-06, + "loss": 4550.4828, + "step": 39320 + }, + { + "epoch": 0.07944908834544698, + "grad_norm": 27540.55078125, + "learning_rate": 7.866e-06, + "loss": 6826.5344, + "step": 39330 + }, + { + "epoch": 0.0794692889781308, + "grad_norm": 36149.02734375, + "learning_rate": 7.868000000000002e-06, + "loss": 9029.5781, + "step": 39340 + }, + { + "epoch": 0.07948948961081462, + "grad_norm": 3568.762939453125, + "learning_rate": 7.870000000000001e-06, + "loss": 5724.157, + "step": 39350 + }, + { + "epoch": 0.07950969024349842, + "grad_norm": 5272.69384765625, + "learning_rate": 7.872e-06, + "loss": 7658.2867, + "step": 39360 + }, + { + "epoch": 0.07952989087618224, + "grad_norm": 424699.375, + "learning_rate": 7.874000000000001e-06, + "loss": 18252.0297, + "step": 39370 + }, + { + "epoch": 0.07955009150886606, + "grad_norm": 20806.73046875, + "learning_rate": 7.876e-06, + "loss": 7895.0359, + "step": 39380 + }, + { + "epoch": 0.07957029214154987, + "grad_norm": 5812.39111328125, + "learning_rate": 7.878e-06, + "loss": 6520.9648, + "step": 39390 + }, + { + "epoch": 0.07959049277423369, + "grad_norm": 1856.93994140625, + "learning_rate": 7.88e-06, + "loss": 4436.4223, + "step": 39400 + }, + { + "epoch": 0.07961069340691751, + "grad_norm": 2287.673828125, + "learning_rate": 7.882e-06, + "loss": 2875.5867, + "step": 39410 + }, + { + "epoch": 0.07963089403960132, + "grad_norm": 13493.3984375, + "learning_rate": 7.884000000000001e-06, + "loss": 11791.1008, + "step": 39420 + }, + { + "epoch": 0.07965109467228514, + "grad_norm": 27417.876953125, + "learning_rate": 7.886e-06, + "loss": 8008.8773, + "step": 39430 + }, + { + "epoch": 0.07967129530496896, + "grad_norm": 53117.76953125, + "learning_rate": 7.888e-06, + "loss": 5120.9574, + "step": 39440 + }, + { + "epoch": 0.07969149593765276, + "grad_norm": 6856.09912109375, + "learning_rate": 7.89e-06, + "loss": 22773.1281, + "step": 39450 + }, + { + "epoch": 0.07971169657033658, + "grad_norm": 61453.45703125, + "learning_rate": 7.892e-06, + "loss": 13026.2703, + "step": 39460 + }, + { + "epoch": 0.0797318972030204, + "grad_norm": 6462.0712890625, + "learning_rate": 7.894000000000001e-06, + "loss": 7083.657, + "step": 39470 + }, + { + "epoch": 0.07975209783570421, + "grad_norm": 5349.27490234375, + "learning_rate": 7.896e-06, + "loss": 7593.6477, + "step": 39480 + }, + { + "epoch": 0.07977229846838803, + "grad_norm": 4775.26416015625, + "learning_rate": 7.898e-06, + "loss": 6373.457, + "step": 39490 + }, + { + "epoch": 0.07979249910107185, + "grad_norm": 50843.53515625, + "learning_rate": 7.9e-06, + "loss": 3221.4289, + "step": 39500 + }, + { + "epoch": 0.07981269973375565, + "grad_norm": 16590.5234375, + "learning_rate": 7.902000000000002e-06, + "loss": 6641.2484, + "step": 39510 + }, + { + "epoch": 0.07983290036643947, + "grad_norm": 35050.47265625, + "learning_rate": 7.904000000000001e-06, + "loss": 10845.5977, + "step": 39520 + }, + { + "epoch": 0.0798531009991233, + "grad_norm": 1822.9227294921875, + "learning_rate": 7.906e-06, + "loss": 7904.9844, + "step": 39530 + }, + { + "epoch": 0.07987330163180711, + "grad_norm": 37353.57421875, + "learning_rate": 7.908e-06, + "loss": 8067.4883, + "step": 39540 + }, + { + "epoch": 0.07989350226449092, + "grad_norm": 8692.8759765625, + "learning_rate": 7.91e-06, + "loss": 7172.4055, + "step": 39550 + }, + { + "epoch": 0.07991370289717474, + "grad_norm": 11064.4775390625, + "learning_rate": 7.912000000000001e-06, + "loss": 5279.5918, + "step": 39560 + }, + { + "epoch": 0.07993390352985856, + "grad_norm": 10562.86328125, + "learning_rate": 7.914e-06, + "loss": 3503.2949, + "step": 39570 + }, + { + "epoch": 0.07995410416254237, + "grad_norm": 23425.70703125, + "learning_rate": 7.916e-06, + "loss": 4686.7547, + "step": 39580 + }, + { + "epoch": 0.07997430479522619, + "grad_norm": 7258.06689453125, + "learning_rate": 7.918e-06, + "loss": 6779.9375, + "step": 39590 + }, + { + "epoch": 0.07999450542791, + "grad_norm": 4635.4521484375, + "learning_rate": 7.92e-06, + "loss": 14427.4312, + "step": 39600 + }, + { + "epoch": 0.08001470606059381, + "grad_norm": 8265.740234375, + "learning_rate": 7.922000000000001e-06, + "loss": 9973.7086, + "step": 39610 + }, + { + "epoch": 0.08003490669327763, + "grad_norm": 16172.6416015625, + "learning_rate": 7.924e-06, + "loss": 10623.2484, + "step": 39620 + }, + { + "epoch": 0.08005510732596145, + "grad_norm": 840.1923828125, + "learning_rate": 7.926e-06, + "loss": 9912.6297, + "step": 39630 + }, + { + "epoch": 0.08007530795864526, + "grad_norm": 2480.493896484375, + "learning_rate": 7.928e-06, + "loss": 15150.5781, + "step": 39640 + }, + { + "epoch": 0.08009550859132908, + "grad_norm": 78794.6640625, + "learning_rate": 7.93e-06, + "loss": 7859.0437, + "step": 39650 + }, + { + "epoch": 0.0801157092240129, + "grad_norm": 212819.1875, + "learning_rate": 7.932000000000001e-06, + "loss": 17844.4781, + "step": 39660 + }, + { + "epoch": 0.0801359098566967, + "grad_norm": 9275.9423828125, + "learning_rate": 7.934e-06, + "loss": 7794.3336, + "step": 39670 + }, + { + "epoch": 0.08015611048938052, + "grad_norm": 10715.5087890625, + "learning_rate": 7.936e-06, + "loss": 11467.7039, + "step": 39680 + }, + { + "epoch": 0.08017631112206434, + "grad_norm": 1159.8009033203125, + "learning_rate": 7.938000000000001e-06, + "loss": 7910.8805, + "step": 39690 + }, + { + "epoch": 0.08019651175474816, + "grad_norm": 6904.748046875, + "learning_rate": 7.94e-06, + "loss": 6762.0234, + "step": 39700 + }, + { + "epoch": 0.08021671238743197, + "grad_norm": 6402.654296875, + "learning_rate": 7.942000000000001e-06, + "loss": 2423.9895, + "step": 39710 + }, + { + "epoch": 0.08023691302011579, + "grad_norm": 12372.7451171875, + "learning_rate": 7.944e-06, + "loss": 9259.1102, + "step": 39720 + }, + { + "epoch": 0.08025711365279961, + "grad_norm": 89204.4375, + "learning_rate": 7.946e-06, + "loss": 8363.6391, + "step": 39730 + }, + { + "epoch": 0.08027731428548342, + "grad_norm": 9137.5205078125, + "learning_rate": 7.948e-06, + "loss": 2324.202, + "step": 39740 + }, + { + "epoch": 0.08029751491816724, + "grad_norm": 11512.412109375, + "learning_rate": 7.950000000000002e-06, + "loss": 7455.2242, + "step": 39750 + }, + { + "epoch": 0.08031771555085106, + "grad_norm": 1066.3184814453125, + "learning_rate": 7.952000000000001e-06, + "loss": 4981.3871, + "step": 39760 + }, + { + "epoch": 0.08033791618353486, + "grad_norm": 3490.462646484375, + "learning_rate": 7.954e-06, + "loss": 6925.3781, + "step": 39770 + }, + { + "epoch": 0.08035811681621868, + "grad_norm": 5001.97021484375, + "learning_rate": 7.956e-06, + "loss": 11823.757, + "step": 39780 + }, + { + "epoch": 0.0803783174489025, + "grad_norm": 2519.555908203125, + "learning_rate": 7.958e-06, + "loss": 8447.1703, + "step": 39790 + }, + { + "epoch": 0.08039851808158631, + "grad_norm": 120925.078125, + "learning_rate": 7.960000000000002e-06, + "loss": 4831.9508, + "step": 39800 + }, + { + "epoch": 0.08041871871427013, + "grad_norm": 6694.7041015625, + "learning_rate": 7.962000000000001e-06, + "loss": 8032.9359, + "step": 39810 + }, + { + "epoch": 0.08043891934695395, + "grad_norm": 27789.1875, + "learning_rate": 7.964e-06, + "loss": 10730.4875, + "step": 39820 + }, + { + "epoch": 0.08045911997963776, + "grad_norm": 1674.1368408203125, + "learning_rate": 7.966e-06, + "loss": 2252.2666, + "step": 39830 + }, + { + "epoch": 0.08047932061232158, + "grad_norm": 15192.9921875, + "learning_rate": 7.968e-06, + "loss": 3249.5508, + "step": 39840 + }, + { + "epoch": 0.0804995212450054, + "grad_norm": 6048.7236328125, + "learning_rate": 7.970000000000002e-06, + "loss": 4109.6121, + "step": 39850 + }, + { + "epoch": 0.08051972187768922, + "grad_norm": 76045.71875, + "learning_rate": 7.972000000000001e-06, + "loss": 3513.9805, + "step": 39860 + }, + { + "epoch": 0.08053992251037302, + "grad_norm": 7345.72705078125, + "learning_rate": 7.974e-06, + "loss": 3144.8152, + "step": 39870 + }, + { + "epoch": 0.08056012314305684, + "grad_norm": 32987.421875, + "learning_rate": 7.976000000000001e-06, + "loss": 5960.3602, + "step": 39880 + }, + { + "epoch": 0.08058032377574066, + "grad_norm": 10156.55859375, + "learning_rate": 7.978e-06, + "loss": 10736.057, + "step": 39890 + }, + { + "epoch": 0.08060052440842447, + "grad_norm": 24059.533203125, + "learning_rate": 7.980000000000002e-06, + "loss": 8147.3453, + "step": 39900 + }, + { + "epoch": 0.08062072504110829, + "grad_norm": 3350.020263671875, + "learning_rate": 7.982e-06, + "loss": 7568.5711, + "step": 39910 + }, + { + "epoch": 0.08064092567379211, + "grad_norm": 94524.4765625, + "learning_rate": 7.984e-06, + "loss": 4751.4383, + "step": 39920 + }, + { + "epoch": 0.08066112630647591, + "grad_norm": 21472.30859375, + "learning_rate": 7.986000000000001e-06, + "loss": 13051.0266, + "step": 39930 + }, + { + "epoch": 0.08068132693915973, + "grad_norm": 7241.80908203125, + "learning_rate": 7.988e-06, + "loss": 8013.0195, + "step": 39940 + }, + { + "epoch": 0.08070152757184355, + "grad_norm": 617.0857543945312, + "learning_rate": 7.990000000000001e-06, + "loss": 19353.3875, + "step": 39950 + }, + { + "epoch": 0.08072172820452736, + "grad_norm": 121070.609375, + "learning_rate": 7.992e-06, + "loss": 14800.2375, + "step": 39960 + }, + { + "epoch": 0.08074192883721118, + "grad_norm": 109551.15625, + "learning_rate": 7.994e-06, + "loss": 6261.1711, + "step": 39970 + }, + { + "epoch": 0.080762129469895, + "grad_norm": 4198.646484375, + "learning_rate": 7.996000000000001e-06, + "loss": 4440.3973, + "step": 39980 + }, + { + "epoch": 0.0807823301025788, + "grad_norm": 139552.265625, + "learning_rate": 7.998e-06, + "loss": 8700.2586, + "step": 39990 + }, + { + "epoch": 0.08080253073526263, + "grad_norm": 50463.21484375, + "learning_rate": 8.000000000000001e-06, + "loss": 3406.3352, + "step": 40000 + }, + { + "epoch": 0.08082273136794645, + "grad_norm": 198400.421875, + "learning_rate": 8.002e-06, + "loss": 13932.0719, + "step": 40010 + }, + { + "epoch": 0.08084293200063027, + "grad_norm": 3389.3583984375, + "learning_rate": 8.004e-06, + "loss": 12058.0063, + "step": 40020 + }, + { + "epoch": 0.08086313263331407, + "grad_norm": 1874.8421630859375, + "learning_rate": 8.006000000000001e-06, + "loss": 3531.9094, + "step": 40030 + }, + { + "epoch": 0.08088333326599789, + "grad_norm": 6670.62841796875, + "learning_rate": 8.008e-06, + "loss": 7674.7781, + "step": 40040 + }, + { + "epoch": 0.08090353389868171, + "grad_norm": 26396.09375, + "learning_rate": 8.010000000000001e-06, + "loss": 3276.2258, + "step": 40050 + }, + { + "epoch": 0.08092373453136552, + "grad_norm": 1641.13818359375, + "learning_rate": 8.012e-06, + "loss": 5871.7188, + "step": 40060 + }, + { + "epoch": 0.08094393516404934, + "grad_norm": 3913.25390625, + "learning_rate": 8.014e-06, + "loss": 10002.1039, + "step": 40070 + }, + { + "epoch": 0.08096413579673316, + "grad_norm": 17457.3671875, + "learning_rate": 8.016e-06, + "loss": 5320.191, + "step": 40080 + }, + { + "epoch": 0.08098433642941696, + "grad_norm": 222769.203125, + "learning_rate": 8.018e-06, + "loss": 9811.7344, + "step": 40090 + }, + { + "epoch": 0.08100453706210078, + "grad_norm": 244.69468688964844, + "learning_rate": 8.020000000000001e-06, + "loss": 2965.475, + "step": 40100 + }, + { + "epoch": 0.0810247376947846, + "grad_norm": 51949.609375, + "learning_rate": 8.022e-06, + "loss": 8560.1195, + "step": 40110 + }, + { + "epoch": 0.08104493832746841, + "grad_norm": 14071.1787109375, + "learning_rate": 8.024000000000001e-06, + "loss": 6589.793, + "step": 40120 + }, + { + "epoch": 0.08106513896015223, + "grad_norm": 1308.237060546875, + "learning_rate": 8.026e-06, + "loss": 6922.2047, + "step": 40130 + }, + { + "epoch": 0.08108533959283605, + "grad_norm": 36805.61328125, + "learning_rate": 8.028e-06, + "loss": 16121.5016, + "step": 40140 + }, + { + "epoch": 0.08110554022551986, + "grad_norm": 11331.1845703125, + "learning_rate": 8.030000000000001e-06, + "loss": 12517.6867, + "step": 40150 + }, + { + "epoch": 0.08112574085820368, + "grad_norm": 48504.421875, + "learning_rate": 8.032e-06, + "loss": 8231.3391, + "step": 40160 + }, + { + "epoch": 0.0811459414908875, + "grad_norm": 19058.189453125, + "learning_rate": 8.034000000000001e-06, + "loss": 11318.0656, + "step": 40170 + }, + { + "epoch": 0.08116614212357132, + "grad_norm": 582.1078491210938, + "learning_rate": 8.036e-06, + "loss": 6447.1027, + "step": 40180 + }, + { + "epoch": 0.08118634275625512, + "grad_norm": 25558.0625, + "learning_rate": 8.038e-06, + "loss": 11609.4438, + "step": 40190 + }, + { + "epoch": 0.08120654338893894, + "grad_norm": 12635.6357421875, + "learning_rate": 8.040000000000001e-06, + "loss": 4526.7852, + "step": 40200 + }, + { + "epoch": 0.08122674402162276, + "grad_norm": 25416.9375, + "learning_rate": 8.042e-06, + "loss": 13535.3531, + "step": 40210 + }, + { + "epoch": 0.08124694465430657, + "grad_norm": 70479.453125, + "learning_rate": 8.044000000000001e-06, + "loss": 8055.8852, + "step": 40220 + }, + { + "epoch": 0.08126714528699039, + "grad_norm": 165233.625, + "learning_rate": 8.046e-06, + "loss": 12946.3969, + "step": 40230 + }, + { + "epoch": 0.08128734591967421, + "grad_norm": 19213.5, + "learning_rate": 8.048e-06, + "loss": 15753.8594, + "step": 40240 + }, + { + "epoch": 0.08130754655235801, + "grad_norm": 57206.0859375, + "learning_rate": 8.050000000000001e-06, + "loss": 17089.7531, + "step": 40250 + }, + { + "epoch": 0.08132774718504183, + "grad_norm": 1228.0928955078125, + "learning_rate": 8.052e-06, + "loss": 3976.2289, + "step": 40260 + }, + { + "epoch": 0.08134794781772565, + "grad_norm": 11848.275390625, + "learning_rate": 8.054000000000001e-06, + "loss": 8484.875, + "step": 40270 + }, + { + "epoch": 0.08136814845040946, + "grad_norm": 276266.1875, + "learning_rate": 8.056e-06, + "loss": 12426.9141, + "step": 40280 + }, + { + "epoch": 0.08138834908309328, + "grad_norm": 61559.86328125, + "learning_rate": 8.058e-06, + "loss": 11425.925, + "step": 40290 + }, + { + "epoch": 0.0814085497157771, + "grad_norm": 1291.7330322265625, + "learning_rate": 8.06e-06, + "loss": 5568.6199, + "step": 40300 + }, + { + "epoch": 0.08142875034846091, + "grad_norm": 5998.4697265625, + "learning_rate": 8.062000000000002e-06, + "loss": 7887.6211, + "step": 40310 + }, + { + "epoch": 0.08144895098114473, + "grad_norm": 10118.466796875, + "learning_rate": 8.064000000000001e-06, + "loss": 4644.3594, + "step": 40320 + }, + { + "epoch": 0.08146915161382855, + "grad_norm": 1915.4429931640625, + "learning_rate": 8.066e-06, + "loss": 6367.2359, + "step": 40330 + }, + { + "epoch": 0.08148935224651237, + "grad_norm": 42212.15234375, + "learning_rate": 8.068e-06, + "loss": 4104.6332, + "step": 40340 + }, + { + "epoch": 0.08150955287919617, + "grad_norm": 144573.578125, + "learning_rate": 8.07e-06, + "loss": 13091.3047, + "step": 40350 + }, + { + "epoch": 0.08152975351188, + "grad_norm": 4482.333984375, + "learning_rate": 8.072000000000002e-06, + "loss": 8352.6117, + "step": 40360 + }, + { + "epoch": 0.08154995414456381, + "grad_norm": 1080.2662353515625, + "learning_rate": 8.074000000000001e-06, + "loss": 8282.7953, + "step": 40370 + }, + { + "epoch": 0.08157015477724762, + "grad_norm": 97753.1640625, + "learning_rate": 8.076e-06, + "loss": 10584.6508, + "step": 40380 + }, + { + "epoch": 0.08159035540993144, + "grad_norm": 15257.6845703125, + "learning_rate": 8.078e-06, + "loss": 5093.4477, + "step": 40390 + }, + { + "epoch": 0.08161055604261526, + "grad_norm": 31820.259765625, + "learning_rate": 8.08e-06, + "loss": 6351.3605, + "step": 40400 + }, + { + "epoch": 0.08163075667529907, + "grad_norm": 27772.41015625, + "learning_rate": 8.082000000000002e-06, + "loss": 5235.5594, + "step": 40410 + }, + { + "epoch": 0.08165095730798289, + "grad_norm": 1329.22802734375, + "learning_rate": 8.084000000000001e-06, + "loss": 8911.1406, + "step": 40420 + }, + { + "epoch": 0.0816711579406667, + "grad_norm": 1274.9581298828125, + "learning_rate": 8.086e-06, + "loss": 1705.7658, + "step": 40430 + }, + { + "epoch": 0.08169135857335051, + "grad_norm": 20254.853515625, + "learning_rate": 8.088e-06, + "loss": 4269.5426, + "step": 40440 + }, + { + "epoch": 0.08171155920603433, + "grad_norm": 2080.0126953125, + "learning_rate": 8.09e-06, + "loss": 5273.509, + "step": 40450 + }, + { + "epoch": 0.08173175983871815, + "grad_norm": 185210.015625, + "learning_rate": 8.092000000000001e-06, + "loss": 7783.7781, + "step": 40460 + }, + { + "epoch": 0.08175196047140196, + "grad_norm": 0.0, + "learning_rate": 8.094e-06, + "loss": 3303.5586, + "step": 40470 + }, + { + "epoch": 0.08177216110408578, + "grad_norm": 41679.72265625, + "learning_rate": 8.096e-06, + "loss": 11615.9133, + "step": 40480 + }, + { + "epoch": 0.0817923617367696, + "grad_norm": 41106.48828125, + "learning_rate": 8.098000000000001e-06, + "loss": 8146.9812, + "step": 40490 + }, + { + "epoch": 0.08181256236945342, + "grad_norm": 30397.8828125, + "learning_rate": 8.1e-06, + "loss": 8991.0578, + "step": 40500 + }, + { + "epoch": 0.08183276300213722, + "grad_norm": 17588.310546875, + "learning_rate": 8.102000000000001e-06, + "loss": 5484.3359, + "step": 40510 + }, + { + "epoch": 0.08185296363482104, + "grad_norm": 47193.8515625, + "learning_rate": 8.104e-06, + "loss": 6891.1305, + "step": 40520 + }, + { + "epoch": 0.08187316426750486, + "grad_norm": 5683.59814453125, + "learning_rate": 8.106e-06, + "loss": 7957.1953, + "step": 40530 + }, + { + "epoch": 0.08189336490018867, + "grad_norm": 4634.17919921875, + "learning_rate": 8.108000000000001e-06, + "loss": 6651.2094, + "step": 40540 + }, + { + "epoch": 0.08191356553287249, + "grad_norm": 81087.453125, + "learning_rate": 8.110000000000002e-06, + "loss": 7722.2008, + "step": 40550 + }, + { + "epoch": 0.08193376616555631, + "grad_norm": 2660.91845703125, + "learning_rate": 8.112000000000001e-06, + "loss": 10131.1477, + "step": 40560 + }, + { + "epoch": 0.08195396679824012, + "grad_norm": 1882.829833984375, + "learning_rate": 8.114e-06, + "loss": 7819.1078, + "step": 40570 + }, + { + "epoch": 0.08197416743092394, + "grad_norm": 3168.822998046875, + "learning_rate": 8.116e-06, + "loss": 2534.802, + "step": 40580 + }, + { + "epoch": 0.08199436806360776, + "grad_norm": 99651.921875, + "learning_rate": 8.118000000000001e-06, + "loss": 6540.0562, + "step": 40590 + }, + { + "epoch": 0.08201456869629156, + "grad_norm": 6229.23486328125, + "learning_rate": 8.120000000000002e-06, + "loss": 4923.6848, + "step": 40600 + }, + { + "epoch": 0.08203476932897538, + "grad_norm": 94453.828125, + "learning_rate": 8.122000000000001e-06, + "loss": 11084.7305, + "step": 40610 + }, + { + "epoch": 0.0820549699616592, + "grad_norm": 77655.078125, + "learning_rate": 8.124e-06, + "loss": 10169.3242, + "step": 40620 + }, + { + "epoch": 0.08207517059434301, + "grad_norm": 4220.26611328125, + "learning_rate": 8.126e-06, + "loss": 3540.909, + "step": 40630 + }, + { + "epoch": 0.08209537122702683, + "grad_norm": 13621.0712890625, + "learning_rate": 8.128e-06, + "loss": 6303.1934, + "step": 40640 + }, + { + "epoch": 0.08211557185971065, + "grad_norm": 33185.1796875, + "learning_rate": 8.13e-06, + "loss": 8103.4086, + "step": 40650 + }, + { + "epoch": 0.08213577249239447, + "grad_norm": 31883.16796875, + "learning_rate": 8.132000000000001e-06, + "loss": 5901.8852, + "step": 40660 + }, + { + "epoch": 0.08215597312507827, + "grad_norm": 11449.9443359375, + "learning_rate": 8.134e-06, + "loss": 9256.4297, + "step": 40670 + }, + { + "epoch": 0.0821761737577621, + "grad_norm": 29135.91015625, + "learning_rate": 8.136000000000001e-06, + "loss": 7337.9438, + "step": 40680 + }, + { + "epoch": 0.08219637439044591, + "grad_norm": 14136.83203125, + "learning_rate": 8.138e-06, + "loss": 6730.8055, + "step": 40690 + }, + { + "epoch": 0.08221657502312972, + "grad_norm": 13265.0869140625, + "learning_rate": 8.14e-06, + "loss": 4310.3727, + "step": 40700 + }, + { + "epoch": 0.08223677565581354, + "grad_norm": 15514.4140625, + "learning_rate": 8.142000000000001e-06, + "loss": 5226.1828, + "step": 40710 + }, + { + "epoch": 0.08225697628849736, + "grad_norm": 4495.73193359375, + "learning_rate": 8.144e-06, + "loss": 6832.6266, + "step": 40720 + }, + { + "epoch": 0.08227717692118117, + "grad_norm": 1404.868408203125, + "learning_rate": 8.146000000000001e-06, + "loss": 7836.4141, + "step": 40730 + }, + { + "epoch": 0.08229737755386499, + "grad_norm": 3226.86572265625, + "learning_rate": 8.148e-06, + "loss": 8494.9594, + "step": 40740 + }, + { + "epoch": 0.0823175781865488, + "grad_norm": 55690.30078125, + "learning_rate": 8.15e-06, + "loss": 9383.1547, + "step": 40750 + }, + { + "epoch": 0.08233777881923261, + "grad_norm": 301680.78125, + "learning_rate": 8.152000000000001e-06, + "loss": 13518.7516, + "step": 40760 + }, + { + "epoch": 0.08235797945191643, + "grad_norm": 2112.427490234375, + "learning_rate": 8.154e-06, + "loss": 7242.3891, + "step": 40770 + }, + { + "epoch": 0.08237818008460025, + "grad_norm": 22923.16015625, + "learning_rate": 8.156000000000001e-06, + "loss": 8100.8172, + "step": 40780 + }, + { + "epoch": 0.08239838071728406, + "grad_norm": 74857.6171875, + "learning_rate": 8.158e-06, + "loss": 9677.0531, + "step": 40790 + }, + { + "epoch": 0.08241858134996788, + "grad_norm": 2077.906982421875, + "learning_rate": 8.16e-06, + "loss": 9760.7586, + "step": 40800 + }, + { + "epoch": 0.0824387819826517, + "grad_norm": 14872.99609375, + "learning_rate": 8.162e-06, + "loss": 5884.8477, + "step": 40810 + }, + { + "epoch": 0.08245898261533552, + "grad_norm": 26264.728515625, + "learning_rate": 8.164e-06, + "loss": 4220.2937, + "step": 40820 + }, + { + "epoch": 0.08247918324801932, + "grad_norm": 242636.8125, + "learning_rate": 8.166000000000001e-06, + "loss": 21058.2812, + "step": 40830 + }, + { + "epoch": 0.08249938388070314, + "grad_norm": 22350.21875, + "learning_rate": 8.168e-06, + "loss": 4613.8895, + "step": 40840 + }, + { + "epoch": 0.08251958451338696, + "grad_norm": 107402.1484375, + "learning_rate": 8.17e-06, + "loss": 5720.1836, + "step": 40850 + }, + { + "epoch": 0.08253978514607077, + "grad_norm": 5334.63330078125, + "learning_rate": 8.172e-06, + "loss": 6233.7301, + "step": 40860 + }, + { + "epoch": 0.08255998577875459, + "grad_norm": 7512.33984375, + "learning_rate": 8.174e-06, + "loss": 4921.4266, + "step": 40870 + }, + { + "epoch": 0.08258018641143841, + "grad_norm": 24535.8359375, + "learning_rate": 8.176000000000001e-06, + "loss": 9589.1375, + "step": 40880 + }, + { + "epoch": 0.08260038704412222, + "grad_norm": 66499.375, + "learning_rate": 8.178e-06, + "loss": 9480.3016, + "step": 40890 + }, + { + "epoch": 0.08262058767680604, + "grad_norm": 205601.734375, + "learning_rate": 8.18e-06, + "loss": 11940.1953, + "step": 40900 + }, + { + "epoch": 0.08264078830948986, + "grad_norm": 123722.3984375, + "learning_rate": 8.182e-06, + "loss": 6029.293, + "step": 40910 + }, + { + "epoch": 0.08266098894217366, + "grad_norm": 1824.897216796875, + "learning_rate": 8.184000000000002e-06, + "loss": 5084.9781, + "step": 40920 + }, + { + "epoch": 0.08268118957485748, + "grad_norm": 14161.8515625, + "learning_rate": 8.186000000000001e-06, + "loss": 13331.1234, + "step": 40930 + }, + { + "epoch": 0.0827013902075413, + "grad_norm": 55774.5078125, + "learning_rate": 8.188e-06, + "loss": 15325.6484, + "step": 40940 + }, + { + "epoch": 0.08272159084022511, + "grad_norm": 26874.474609375, + "learning_rate": 8.19e-06, + "loss": 7399.3547, + "step": 40950 + }, + { + "epoch": 0.08274179147290893, + "grad_norm": 7118.62744140625, + "learning_rate": 8.192e-06, + "loss": 6999.493, + "step": 40960 + }, + { + "epoch": 0.08276199210559275, + "grad_norm": 100531.984375, + "learning_rate": 8.194000000000002e-06, + "loss": 14011.65, + "step": 40970 + }, + { + "epoch": 0.08278219273827657, + "grad_norm": 160399.140625, + "learning_rate": 8.196e-06, + "loss": 12354.1617, + "step": 40980 + }, + { + "epoch": 0.08280239337096038, + "grad_norm": 5848.95751953125, + "learning_rate": 8.198e-06, + "loss": 3082.4672, + "step": 40990 + }, + { + "epoch": 0.0828225940036442, + "grad_norm": 8428.5556640625, + "learning_rate": 8.2e-06, + "loss": 13015.293, + "step": 41000 + }, + { + "epoch": 0.08284279463632802, + "grad_norm": 4287.513671875, + "learning_rate": 8.202e-06, + "loss": 7013.9977, + "step": 41010 + }, + { + "epoch": 0.08286299526901182, + "grad_norm": 7267.35693359375, + "learning_rate": 8.204000000000001e-06, + "loss": 1764.1967, + "step": 41020 + }, + { + "epoch": 0.08288319590169564, + "grad_norm": 48839.79296875, + "learning_rate": 8.206e-06, + "loss": 7416.1516, + "step": 41030 + }, + { + "epoch": 0.08290339653437946, + "grad_norm": 103499.2265625, + "learning_rate": 8.208e-06, + "loss": 4673.3094, + "step": 41040 + }, + { + "epoch": 0.08292359716706327, + "grad_norm": 38088.94140625, + "learning_rate": 8.210000000000001e-06, + "loss": 3913.682, + "step": 41050 + }, + { + "epoch": 0.08294379779974709, + "grad_norm": 20646.595703125, + "learning_rate": 8.212e-06, + "loss": 15146.4859, + "step": 41060 + }, + { + "epoch": 0.08296399843243091, + "grad_norm": 20211.49609375, + "learning_rate": 8.214000000000001e-06, + "loss": 6794.0445, + "step": 41070 + }, + { + "epoch": 0.08298419906511471, + "grad_norm": 9815.189453125, + "learning_rate": 8.216e-06, + "loss": 1912.9625, + "step": 41080 + }, + { + "epoch": 0.08300439969779853, + "grad_norm": 2281.5830078125, + "learning_rate": 8.218e-06, + "loss": 5258.448, + "step": 41090 + }, + { + "epoch": 0.08302460033048235, + "grad_norm": 112271.7265625, + "learning_rate": 8.220000000000001e-06, + "loss": 9048.7, + "step": 41100 + }, + { + "epoch": 0.08304480096316616, + "grad_norm": 36797.2421875, + "learning_rate": 8.222000000000002e-06, + "loss": 6316.8016, + "step": 41110 + }, + { + "epoch": 0.08306500159584998, + "grad_norm": 28801.236328125, + "learning_rate": 8.224000000000001e-06, + "loss": 3657.8195, + "step": 41120 + }, + { + "epoch": 0.0830852022285338, + "grad_norm": 76225.296875, + "learning_rate": 8.226e-06, + "loss": 5141.2758, + "step": 41130 + }, + { + "epoch": 0.08310540286121762, + "grad_norm": 7201.908203125, + "learning_rate": 8.228e-06, + "loss": 3611.9148, + "step": 41140 + }, + { + "epoch": 0.08312560349390143, + "grad_norm": 1219.21044921875, + "learning_rate": 8.23e-06, + "loss": 11574.3977, + "step": 41150 + }, + { + "epoch": 0.08314580412658525, + "grad_norm": 814.0338745117188, + "learning_rate": 8.232000000000002e-06, + "loss": 10017.6828, + "step": 41160 + }, + { + "epoch": 0.08316600475926907, + "grad_norm": 11060.9013671875, + "learning_rate": 8.234000000000001e-06, + "loss": 3199.4172, + "step": 41170 + }, + { + "epoch": 0.08318620539195287, + "grad_norm": 25903.88671875, + "learning_rate": 8.236e-06, + "loss": 15512.2375, + "step": 41180 + }, + { + "epoch": 0.08320640602463669, + "grad_norm": 5494.11669921875, + "learning_rate": 8.238e-06, + "loss": 14494.5094, + "step": 41190 + }, + { + "epoch": 0.08322660665732051, + "grad_norm": 14773.970703125, + "learning_rate": 8.24e-06, + "loss": 8335.5977, + "step": 41200 + }, + { + "epoch": 0.08324680729000432, + "grad_norm": 1082.69482421875, + "learning_rate": 8.242000000000002e-06, + "loss": 10625.1453, + "step": 41210 + }, + { + "epoch": 0.08326700792268814, + "grad_norm": 4312.66064453125, + "learning_rate": 8.244000000000001e-06, + "loss": 3599.4402, + "step": 41220 + }, + { + "epoch": 0.08328720855537196, + "grad_norm": 3712.744873046875, + "learning_rate": 8.246e-06, + "loss": 7054.4055, + "step": 41230 + }, + { + "epoch": 0.08330740918805576, + "grad_norm": 114726.5546875, + "learning_rate": 8.248e-06, + "loss": 5479.2152, + "step": 41240 + }, + { + "epoch": 0.08332760982073958, + "grad_norm": 1347.6270751953125, + "learning_rate": 8.25e-06, + "loss": 10970.2039, + "step": 41250 + }, + { + "epoch": 0.0833478104534234, + "grad_norm": 5829.4560546875, + "learning_rate": 8.252000000000002e-06, + "loss": 15760.2453, + "step": 41260 + }, + { + "epoch": 0.08336801108610721, + "grad_norm": 5060.927734375, + "learning_rate": 8.254000000000001e-06, + "loss": 7730.5898, + "step": 41270 + }, + { + "epoch": 0.08338821171879103, + "grad_norm": 16385.765625, + "learning_rate": 8.256e-06, + "loss": 8036.0609, + "step": 41280 + }, + { + "epoch": 0.08340841235147485, + "grad_norm": 4734.267578125, + "learning_rate": 8.258000000000001e-06, + "loss": 7002.5055, + "step": 41290 + }, + { + "epoch": 0.08342861298415867, + "grad_norm": 17410.02734375, + "learning_rate": 8.26e-06, + "loss": 5573.9242, + "step": 41300 + }, + { + "epoch": 0.08344881361684248, + "grad_norm": 13985.080078125, + "learning_rate": 8.262000000000002e-06, + "loss": 7824.8117, + "step": 41310 + }, + { + "epoch": 0.0834690142495263, + "grad_norm": 55945.12109375, + "learning_rate": 8.264e-06, + "loss": 17612.0109, + "step": 41320 + }, + { + "epoch": 0.08348921488221012, + "grad_norm": 100237.703125, + "learning_rate": 8.266e-06, + "loss": 12425.1938, + "step": 41330 + }, + { + "epoch": 0.08350941551489392, + "grad_norm": 78680.2421875, + "learning_rate": 8.268000000000001e-06, + "loss": 11090.5492, + "step": 41340 + }, + { + "epoch": 0.08352961614757774, + "grad_norm": 39930.0625, + "learning_rate": 8.27e-06, + "loss": 8777.8687, + "step": 41350 + }, + { + "epoch": 0.08354981678026156, + "grad_norm": 8343.9677734375, + "learning_rate": 8.272000000000001e-06, + "loss": 11805.3586, + "step": 41360 + }, + { + "epoch": 0.08357001741294537, + "grad_norm": 63257.31640625, + "learning_rate": 8.274e-06, + "loss": 5435.0145, + "step": 41370 + }, + { + "epoch": 0.08359021804562919, + "grad_norm": 116018.5, + "learning_rate": 8.276e-06, + "loss": 10138.4969, + "step": 41380 + }, + { + "epoch": 0.08361041867831301, + "grad_norm": 257709.28125, + "learning_rate": 8.278000000000001e-06, + "loss": 14712.2844, + "step": 41390 + }, + { + "epoch": 0.08363061931099681, + "grad_norm": 1248.3209228515625, + "learning_rate": 8.28e-06, + "loss": 10066.275, + "step": 41400 + }, + { + "epoch": 0.08365081994368063, + "grad_norm": 56860.54296875, + "learning_rate": 8.282000000000001e-06, + "loss": 13141.9406, + "step": 41410 + }, + { + "epoch": 0.08367102057636445, + "grad_norm": 369.15606689453125, + "learning_rate": 8.284e-06, + "loss": 3351.6078, + "step": 41420 + }, + { + "epoch": 0.08369122120904826, + "grad_norm": 116067.78125, + "learning_rate": 8.286e-06, + "loss": 6864.1227, + "step": 41430 + }, + { + "epoch": 0.08371142184173208, + "grad_norm": 7510.9677734375, + "learning_rate": 8.288000000000001e-06, + "loss": 5731.6566, + "step": 41440 + }, + { + "epoch": 0.0837316224744159, + "grad_norm": 20417.4609375, + "learning_rate": 8.29e-06, + "loss": 5320.3297, + "step": 41450 + }, + { + "epoch": 0.08375182310709972, + "grad_norm": 11668.5693359375, + "learning_rate": 8.292000000000001e-06, + "loss": 6707.7344, + "step": 41460 + }, + { + "epoch": 0.08377202373978353, + "grad_norm": 22317.2578125, + "learning_rate": 8.294e-06, + "loss": 9096.1117, + "step": 41470 + }, + { + "epoch": 0.08379222437246735, + "grad_norm": 42304.11328125, + "learning_rate": 8.296000000000002e-06, + "loss": 9606.8453, + "step": 41480 + }, + { + "epoch": 0.08381242500515117, + "grad_norm": 9555.82421875, + "learning_rate": 8.298000000000001e-06, + "loss": 2802.8943, + "step": 41490 + }, + { + "epoch": 0.08383262563783497, + "grad_norm": 236936.953125, + "learning_rate": 8.3e-06, + "loss": 7314.8891, + "step": 41500 + }, + { + "epoch": 0.0838528262705188, + "grad_norm": 56028.38671875, + "learning_rate": 8.302000000000001e-06, + "loss": 4152.777, + "step": 41510 + }, + { + "epoch": 0.08387302690320261, + "grad_norm": 112062.1171875, + "learning_rate": 8.304e-06, + "loss": 6520.5078, + "step": 41520 + }, + { + "epoch": 0.08389322753588642, + "grad_norm": 2481.0478515625, + "learning_rate": 8.306000000000001e-06, + "loss": 5270.9754, + "step": 41530 + }, + { + "epoch": 0.08391342816857024, + "grad_norm": 2536.166748046875, + "learning_rate": 8.308e-06, + "loss": 14075.2469, + "step": 41540 + }, + { + "epoch": 0.08393362880125406, + "grad_norm": 179814.625, + "learning_rate": 8.31e-06, + "loss": 5538.25, + "step": 41550 + }, + { + "epoch": 0.08395382943393787, + "grad_norm": 9087.1171875, + "learning_rate": 8.312000000000001e-06, + "loss": 7486.8555, + "step": 41560 + }, + { + "epoch": 0.08397403006662169, + "grad_norm": 90505.390625, + "learning_rate": 8.314e-06, + "loss": 7115.8164, + "step": 41570 + }, + { + "epoch": 0.0839942306993055, + "grad_norm": 29490.439453125, + "learning_rate": 8.316000000000001e-06, + "loss": 6338.8461, + "step": 41580 + }, + { + "epoch": 0.08401443133198931, + "grad_norm": 8396.1962890625, + "learning_rate": 8.318e-06, + "loss": 5669.9238, + "step": 41590 + }, + { + "epoch": 0.08403463196467313, + "grad_norm": 931.0748901367188, + "learning_rate": 8.32e-06, + "loss": 6092.6551, + "step": 41600 + }, + { + "epoch": 0.08405483259735695, + "grad_norm": 17669.689453125, + "learning_rate": 8.322000000000001e-06, + "loss": 10788.1562, + "step": 41610 + }, + { + "epoch": 0.08407503323004077, + "grad_norm": 109818.4140625, + "learning_rate": 8.324e-06, + "loss": 8523.875, + "step": 41620 + }, + { + "epoch": 0.08409523386272458, + "grad_norm": 2154.557373046875, + "learning_rate": 8.326000000000001e-06, + "loss": 3069.64, + "step": 41630 + }, + { + "epoch": 0.0841154344954084, + "grad_norm": 8143.3037109375, + "learning_rate": 8.328e-06, + "loss": 5419.5227, + "step": 41640 + }, + { + "epoch": 0.08413563512809222, + "grad_norm": 3998.93408203125, + "learning_rate": 8.33e-06, + "loss": 10531.0578, + "step": 41650 + }, + { + "epoch": 0.08415583576077602, + "grad_norm": 3506.487060546875, + "learning_rate": 8.332000000000001e-06, + "loss": 7433.2867, + "step": 41660 + }, + { + "epoch": 0.08417603639345984, + "grad_norm": 475.2569274902344, + "learning_rate": 8.334e-06, + "loss": 1908.1389, + "step": 41670 + }, + { + "epoch": 0.08419623702614366, + "grad_norm": 27469.650390625, + "learning_rate": 8.336000000000001e-06, + "loss": 10805.4133, + "step": 41680 + }, + { + "epoch": 0.08421643765882747, + "grad_norm": 87671.140625, + "learning_rate": 8.338e-06, + "loss": 4320.0445, + "step": 41690 + }, + { + "epoch": 0.08423663829151129, + "grad_norm": 3160.3095703125, + "learning_rate": 8.34e-06, + "loss": 2594.9328, + "step": 41700 + }, + { + "epoch": 0.08425683892419511, + "grad_norm": 1452.7987060546875, + "learning_rate": 8.342e-06, + "loss": 6387.3562, + "step": 41710 + }, + { + "epoch": 0.08427703955687892, + "grad_norm": 8222.1748046875, + "learning_rate": 8.344000000000002e-06, + "loss": 3764.209, + "step": 41720 + }, + { + "epoch": 0.08429724018956274, + "grad_norm": 564.2569580078125, + "learning_rate": 8.346000000000001e-06, + "loss": 3810.8324, + "step": 41730 + }, + { + "epoch": 0.08431744082224656, + "grad_norm": 14399.9697265625, + "learning_rate": 8.348e-06, + "loss": 3908.6234, + "step": 41740 + }, + { + "epoch": 0.08433764145493036, + "grad_norm": 2448.87158203125, + "learning_rate": 8.35e-06, + "loss": 15068.0359, + "step": 41750 + }, + { + "epoch": 0.08435784208761418, + "grad_norm": 42398.54296875, + "learning_rate": 8.352e-06, + "loss": 7184.9453, + "step": 41760 + }, + { + "epoch": 0.084378042720298, + "grad_norm": 28950.689453125, + "learning_rate": 8.354000000000002e-06, + "loss": 6740.318, + "step": 41770 + }, + { + "epoch": 0.08439824335298182, + "grad_norm": 47085.52734375, + "learning_rate": 8.356000000000001e-06, + "loss": 5860.8484, + "step": 41780 + }, + { + "epoch": 0.08441844398566563, + "grad_norm": 54108.6640625, + "learning_rate": 8.358e-06, + "loss": 6646.1625, + "step": 41790 + }, + { + "epoch": 0.08443864461834945, + "grad_norm": 163795.5625, + "learning_rate": 8.36e-06, + "loss": 7454.1359, + "step": 41800 + }, + { + "epoch": 0.08445884525103327, + "grad_norm": 3492.627197265625, + "learning_rate": 8.362e-06, + "loss": 6664.3211, + "step": 41810 + }, + { + "epoch": 0.08447904588371707, + "grad_norm": 20185.677734375, + "learning_rate": 8.364000000000002e-06, + "loss": 22852.5844, + "step": 41820 + }, + { + "epoch": 0.0844992465164009, + "grad_norm": 2030.0076904296875, + "learning_rate": 8.366000000000001e-06, + "loss": 12974.6875, + "step": 41830 + }, + { + "epoch": 0.08451944714908471, + "grad_norm": 9584.1787109375, + "learning_rate": 8.368e-06, + "loss": 4533.775, + "step": 41840 + }, + { + "epoch": 0.08453964778176852, + "grad_norm": 4517.90625, + "learning_rate": 8.370000000000001e-06, + "loss": 5883.1625, + "step": 41850 + }, + { + "epoch": 0.08455984841445234, + "grad_norm": 10755.6611328125, + "learning_rate": 8.372e-06, + "loss": 10413.6922, + "step": 41860 + }, + { + "epoch": 0.08458004904713616, + "grad_norm": 21950.08984375, + "learning_rate": 8.374000000000001e-06, + "loss": 9778.5484, + "step": 41870 + }, + { + "epoch": 0.08460024967981997, + "grad_norm": 3933.177490234375, + "learning_rate": 8.376e-06, + "loss": 7730.9859, + "step": 41880 + }, + { + "epoch": 0.08462045031250379, + "grad_norm": 12222.2763671875, + "learning_rate": 8.378e-06, + "loss": 8659.7414, + "step": 41890 + }, + { + "epoch": 0.0846406509451876, + "grad_norm": 10905.509765625, + "learning_rate": 8.380000000000001e-06, + "loss": 5915.8578, + "step": 41900 + }, + { + "epoch": 0.08466085157787141, + "grad_norm": 105395.125, + "learning_rate": 8.382e-06, + "loss": 9507.8828, + "step": 41910 + }, + { + "epoch": 0.08468105221055523, + "grad_norm": 3424.2529296875, + "learning_rate": 8.384000000000001e-06, + "loss": 7726.7664, + "step": 41920 + }, + { + "epoch": 0.08470125284323905, + "grad_norm": 33967.18359375, + "learning_rate": 8.386e-06, + "loss": 8855.0, + "step": 41930 + }, + { + "epoch": 0.08472145347592287, + "grad_norm": 7797.1220703125, + "learning_rate": 8.388e-06, + "loss": 3839.827, + "step": 41940 + }, + { + "epoch": 0.08474165410860668, + "grad_norm": 7541.84033203125, + "learning_rate": 8.390000000000001e-06, + "loss": 3090.4375, + "step": 41950 + }, + { + "epoch": 0.0847618547412905, + "grad_norm": 17574.537109375, + "learning_rate": 8.392e-06, + "loss": 5040.1543, + "step": 41960 + }, + { + "epoch": 0.08478205537397432, + "grad_norm": 1030.0333251953125, + "learning_rate": 8.394000000000001e-06, + "loss": 5935.3414, + "step": 41970 + }, + { + "epoch": 0.08480225600665812, + "grad_norm": 6994.45458984375, + "learning_rate": 8.396e-06, + "loss": 9699.6352, + "step": 41980 + }, + { + "epoch": 0.08482245663934194, + "grad_norm": 10546.6142578125, + "learning_rate": 8.398e-06, + "loss": 3870.4992, + "step": 41990 + }, + { + "epoch": 0.08484265727202576, + "grad_norm": 9743.3564453125, + "learning_rate": 8.400000000000001e-06, + "loss": 4205.3859, + "step": 42000 + }, + { + "epoch": 0.08486285790470957, + "grad_norm": 87897.9765625, + "learning_rate": 8.402e-06, + "loss": 4640.4523, + "step": 42010 + }, + { + "epoch": 0.08488305853739339, + "grad_norm": 2097.16796875, + "learning_rate": 8.404000000000001e-06, + "loss": 13778.1781, + "step": 42020 + }, + { + "epoch": 0.08490325917007721, + "grad_norm": 184030.875, + "learning_rate": 8.406e-06, + "loss": 13229.1594, + "step": 42030 + }, + { + "epoch": 0.08492345980276102, + "grad_norm": 155910.6875, + "learning_rate": 8.408e-06, + "loss": 21251.0531, + "step": 42040 + }, + { + "epoch": 0.08494366043544484, + "grad_norm": 5056.4873046875, + "learning_rate": 8.41e-06, + "loss": 5738.8969, + "step": 42050 + }, + { + "epoch": 0.08496386106812866, + "grad_norm": 559.0930786132812, + "learning_rate": 8.412e-06, + "loss": 3532.5789, + "step": 42060 + }, + { + "epoch": 0.08498406170081246, + "grad_norm": 8758.919921875, + "learning_rate": 8.414000000000001e-06, + "loss": 2842.2705, + "step": 42070 + }, + { + "epoch": 0.08500426233349628, + "grad_norm": 15465.9951171875, + "learning_rate": 8.416e-06, + "loss": 6270.0727, + "step": 42080 + }, + { + "epoch": 0.0850244629661801, + "grad_norm": 8027.24072265625, + "learning_rate": 8.418000000000001e-06, + "loss": 4812.3309, + "step": 42090 + }, + { + "epoch": 0.08504466359886392, + "grad_norm": 609.443603515625, + "learning_rate": 8.42e-06, + "loss": 10894.525, + "step": 42100 + }, + { + "epoch": 0.08506486423154773, + "grad_norm": 7076.048828125, + "learning_rate": 8.422e-06, + "loss": 5881.668, + "step": 42110 + }, + { + "epoch": 0.08508506486423155, + "grad_norm": 24785.140625, + "learning_rate": 8.424000000000001e-06, + "loss": 1699.6574, + "step": 42120 + }, + { + "epoch": 0.08510526549691537, + "grad_norm": 760.8279418945312, + "learning_rate": 8.426e-06, + "loss": 11730.9977, + "step": 42130 + }, + { + "epoch": 0.08512546612959918, + "grad_norm": 747.1117553710938, + "learning_rate": 8.428000000000001e-06, + "loss": 20974.7047, + "step": 42140 + }, + { + "epoch": 0.085145666762283, + "grad_norm": 41948.50390625, + "learning_rate": 8.43e-06, + "loss": 5694.0992, + "step": 42150 + }, + { + "epoch": 0.08516586739496682, + "grad_norm": 16357.6708984375, + "learning_rate": 8.432e-06, + "loss": 10420.9578, + "step": 42160 + }, + { + "epoch": 0.08518606802765062, + "grad_norm": 1879.9781494140625, + "learning_rate": 8.434000000000001e-06, + "loss": 7895.2289, + "step": 42170 + }, + { + "epoch": 0.08520626866033444, + "grad_norm": 39903.55078125, + "learning_rate": 8.436e-06, + "loss": 4979.2742, + "step": 42180 + }, + { + "epoch": 0.08522646929301826, + "grad_norm": 1339.744384765625, + "learning_rate": 8.438000000000001e-06, + "loss": 6245.391, + "step": 42190 + }, + { + "epoch": 0.08524666992570207, + "grad_norm": 1816.0333251953125, + "learning_rate": 8.44e-06, + "loss": 4067.9527, + "step": 42200 + }, + { + "epoch": 0.08526687055838589, + "grad_norm": 71905.3125, + "learning_rate": 8.442e-06, + "loss": 8999.5078, + "step": 42210 + }, + { + "epoch": 0.08528707119106971, + "grad_norm": 12694.8916015625, + "learning_rate": 8.444e-06, + "loss": 10203.0578, + "step": 42220 + }, + { + "epoch": 0.08530727182375351, + "grad_norm": 179630.40625, + "learning_rate": 8.446e-06, + "loss": 10750.3625, + "step": 42230 + }, + { + "epoch": 0.08532747245643733, + "grad_norm": 3834.916259765625, + "learning_rate": 8.448000000000001e-06, + "loss": 1932.7451, + "step": 42240 + }, + { + "epoch": 0.08534767308912115, + "grad_norm": 73394.078125, + "learning_rate": 8.45e-06, + "loss": 11361.5117, + "step": 42250 + }, + { + "epoch": 0.08536787372180497, + "grad_norm": 931.38818359375, + "learning_rate": 8.452e-06, + "loss": 3650.0078, + "step": 42260 + }, + { + "epoch": 0.08538807435448878, + "grad_norm": 6572.1572265625, + "learning_rate": 8.454e-06, + "loss": 13468.2078, + "step": 42270 + }, + { + "epoch": 0.0854082749871726, + "grad_norm": 15417.1220703125, + "learning_rate": 8.456000000000002e-06, + "loss": 12843.9617, + "step": 42280 + }, + { + "epoch": 0.08542847561985642, + "grad_norm": 2588.291748046875, + "learning_rate": 8.458000000000001e-06, + "loss": 6050.5621, + "step": 42290 + }, + { + "epoch": 0.08544867625254023, + "grad_norm": 44936.625, + "learning_rate": 8.46e-06, + "loss": 5221.3516, + "step": 42300 + }, + { + "epoch": 0.08546887688522405, + "grad_norm": 1781.3179931640625, + "learning_rate": 8.462e-06, + "loss": 8240.8266, + "step": 42310 + }, + { + "epoch": 0.08548907751790787, + "grad_norm": 24681.775390625, + "learning_rate": 8.464e-06, + "loss": 3954.1141, + "step": 42320 + }, + { + "epoch": 0.08550927815059167, + "grad_norm": 576.2008666992188, + "learning_rate": 8.466000000000002e-06, + "loss": 7188.2523, + "step": 42330 + }, + { + "epoch": 0.08552947878327549, + "grad_norm": 33491.93359375, + "learning_rate": 8.468000000000001e-06, + "loss": 3419.0141, + "step": 42340 + }, + { + "epoch": 0.08554967941595931, + "grad_norm": 3526.057373046875, + "learning_rate": 8.47e-06, + "loss": 5723.3562, + "step": 42350 + }, + { + "epoch": 0.08556988004864312, + "grad_norm": 743.8654174804688, + "learning_rate": 8.472e-06, + "loss": 1950.6512, + "step": 42360 + }, + { + "epoch": 0.08559008068132694, + "grad_norm": 15869.1298828125, + "learning_rate": 8.474e-06, + "loss": 9307.7625, + "step": 42370 + }, + { + "epoch": 0.08561028131401076, + "grad_norm": 29683.7265625, + "learning_rate": 8.476000000000002e-06, + "loss": 6335.1082, + "step": 42380 + }, + { + "epoch": 0.08563048194669456, + "grad_norm": 23074.740234375, + "learning_rate": 8.478e-06, + "loss": 11643.9516, + "step": 42390 + }, + { + "epoch": 0.08565068257937838, + "grad_norm": 114632.734375, + "learning_rate": 8.48e-06, + "loss": 7694.8813, + "step": 42400 + }, + { + "epoch": 0.0856708832120622, + "grad_norm": 21166.189453125, + "learning_rate": 8.482e-06, + "loss": 10006.7461, + "step": 42410 + }, + { + "epoch": 0.08569108384474602, + "grad_norm": 15509.728515625, + "learning_rate": 8.484e-06, + "loss": 7547.9719, + "step": 42420 + }, + { + "epoch": 0.08571128447742983, + "grad_norm": 2775.473388671875, + "learning_rate": 8.486000000000001e-06, + "loss": 5050.9586, + "step": 42430 + }, + { + "epoch": 0.08573148511011365, + "grad_norm": 292850.53125, + "learning_rate": 8.488e-06, + "loss": 13793.7687, + "step": 42440 + }, + { + "epoch": 0.08575168574279747, + "grad_norm": 45287.13671875, + "learning_rate": 8.49e-06, + "loss": 10064.95, + "step": 42450 + }, + { + "epoch": 0.08577188637548128, + "grad_norm": 75873.78125, + "learning_rate": 8.492000000000001e-06, + "loss": 6721.2344, + "step": 42460 + }, + { + "epoch": 0.0857920870081651, + "grad_norm": 24411.111328125, + "learning_rate": 8.494e-06, + "loss": 3590.3309, + "step": 42470 + }, + { + "epoch": 0.08581228764084892, + "grad_norm": 432.0838928222656, + "learning_rate": 8.496000000000001e-06, + "loss": 6882.2875, + "step": 42480 + }, + { + "epoch": 0.08583248827353272, + "grad_norm": 4706.6728515625, + "learning_rate": 8.498e-06, + "loss": 4429.4547, + "step": 42490 + }, + { + "epoch": 0.08585268890621654, + "grad_norm": 31477.52734375, + "learning_rate": 8.5e-06, + "loss": 8158.0484, + "step": 42500 + }, + { + "epoch": 0.08587288953890036, + "grad_norm": 4305.46923828125, + "learning_rate": 8.502000000000001e-06, + "loss": 7542.6008, + "step": 42510 + }, + { + "epoch": 0.08589309017158417, + "grad_norm": 12946.8310546875, + "learning_rate": 8.504000000000002e-06, + "loss": 6732.8016, + "step": 42520 + }, + { + "epoch": 0.08591329080426799, + "grad_norm": 7430.1025390625, + "learning_rate": 8.506000000000001e-06, + "loss": 3261.8887, + "step": 42530 + }, + { + "epoch": 0.08593349143695181, + "grad_norm": 150075.421875, + "learning_rate": 8.508e-06, + "loss": 8701.1898, + "step": 42540 + }, + { + "epoch": 0.08595369206963561, + "grad_norm": 9417.49609375, + "learning_rate": 8.51e-06, + "loss": 8934.2711, + "step": 42550 + }, + { + "epoch": 0.08597389270231943, + "grad_norm": 91239.53125, + "learning_rate": 8.512e-06, + "loss": 9881.0484, + "step": 42560 + }, + { + "epoch": 0.08599409333500325, + "grad_norm": 20459.77734375, + "learning_rate": 8.514000000000002e-06, + "loss": 17576.4797, + "step": 42570 + }, + { + "epoch": 0.08601429396768706, + "grad_norm": 5483.46630859375, + "learning_rate": 8.516000000000001e-06, + "loss": 14457.2109, + "step": 42580 + }, + { + "epoch": 0.08603449460037088, + "grad_norm": 0.0, + "learning_rate": 8.518e-06, + "loss": 5349.5102, + "step": 42590 + }, + { + "epoch": 0.0860546952330547, + "grad_norm": 4582.451171875, + "learning_rate": 8.52e-06, + "loss": 12027.6383, + "step": 42600 + }, + { + "epoch": 0.08607489586573852, + "grad_norm": 7730.94775390625, + "learning_rate": 8.522e-06, + "loss": 3084.7609, + "step": 42610 + }, + { + "epoch": 0.08609509649842233, + "grad_norm": 38280.34375, + "learning_rate": 8.524000000000002e-06, + "loss": 7778.3977, + "step": 42620 + }, + { + "epoch": 0.08611529713110615, + "grad_norm": 58602.37109375, + "learning_rate": 8.526000000000001e-06, + "loss": 4445.9816, + "step": 42630 + }, + { + "epoch": 0.08613549776378997, + "grad_norm": 14067.98828125, + "learning_rate": 8.528e-06, + "loss": 5560.3422, + "step": 42640 + }, + { + "epoch": 0.08615569839647377, + "grad_norm": 12596.4580078125, + "learning_rate": 8.530000000000001e-06, + "loss": 4817.659, + "step": 42650 + }, + { + "epoch": 0.0861758990291576, + "grad_norm": 23123.6640625, + "learning_rate": 8.532e-06, + "loss": 7589.7234, + "step": 42660 + }, + { + "epoch": 0.08619609966184141, + "grad_norm": 28133.767578125, + "learning_rate": 8.534000000000002e-06, + "loss": 4344.1961, + "step": 42670 + }, + { + "epoch": 0.08621630029452522, + "grad_norm": 17531.466796875, + "learning_rate": 8.536000000000001e-06, + "loss": 10397.8836, + "step": 42680 + }, + { + "epoch": 0.08623650092720904, + "grad_norm": 17015.896484375, + "learning_rate": 8.538e-06, + "loss": 3705.4406, + "step": 42690 + }, + { + "epoch": 0.08625670155989286, + "grad_norm": 8218.8994140625, + "learning_rate": 8.540000000000001e-06, + "loss": 6581.8523, + "step": 42700 + }, + { + "epoch": 0.08627690219257667, + "grad_norm": 1409.3759765625, + "learning_rate": 8.542e-06, + "loss": 14191.6234, + "step": 42710 + }, + { + "epoch": 0.08629710282526049, + "grad_norm": 93717.984375, + "learning_rate": 8.544000000000002e-06, + "loss": 7559.0445, + "step": 42720 + }, + { + "epoch": 0.0863173034579443, + "grad_norm": 17980.529296875, + "learning_rate": 8.546000000000001e-06, + "loss": 9186.7703, + "step": 42730 + }, + { + "epoch": 0.08633750409062811, + "grad_norm": 19608.068359375, + "learning_rate": 8.548e-06, + "loss": 2968.849, + "step": 42740 + }, + { + "epoch": 0.08635770472331193, + "grad_norm": 9654.609375, + "learning_rate": 8.550000000000001e-06, + "loss": 1926.9014, + "step": 42750 + }, + { + "epoch": 0.08637790535599575, + "grad_norm": 24028.353515625, + "learning_rate": 8.552e-06, + "loss": 3384.6781, + "step": 42760 + }, + { + "epoch": 0.08639810598867957, + "grad_norm": 23292.248046875, + "learning_rate": 8.554000000000001e-06, + "loss": 6553.9555, + "step": 42770 + }, + { + "epoch": 0.08641830662136338, + "grad_norm": 74895.9140625, + "learning_rate": 8.556e-06, + "loss": 9333.8797, + "step": 42780 + }, + { + "epoch": 0.0864385072540472, + "grad_norm": 6635.8369140625, + "learning_rate": 8.558e-06, + "loss": 2636.4043, + "step": 42790 + }, + { + "epoch": 0.08645870788673102, + "grad_norm": 79034.25, + "learning_rate": 8.560000000000001e-06, + "loss": 9239.343, + "step": 42800 + }, + { + "epoch": 0.08647890851941482, + "grad_norm": 14894.5947265625, + "learning_rate": 8.562e-06, + "loss": 6204.968, + "step": 42810 + }, + { + "epoch": 0.08649910915209864, + "grad_norm": 3300.013916015625, + "learning_rate": 8.564000000000001e-06, + "loss": 3259.5904, + "step": 42820 + }, + { + "epoch": 0.08651930978478246, + "grad_norm": 17110.267578125, + "learning_rate": 8.566e-06, + "loss": 12400.5516, + "step": 42830 + }, + { + "epoch": 0.08653951041746627, + "grad_norm": 6234.0595703125, + "learning_rate": 8.568e-06, + "loss": 6601.7586, + "step": 42840 + }, + { + "epoch": 0.08655971105015009, + "grad_norm": 58715.99609375, + "learning_rate": 8.570000000000001e-06, + "loss": 8675.3547, + "step": 42850 + }, + { + "epoch": 0.08657991168283391, + "grad_norm": 34523.375, + "learning_rate": 8.572e-06, + "loss": 3192.2941, + "step": 42860 + }, + { + "epoch": 0.08660011231551772, + "grad_norm": 36147.63671875, + "learning_rate": 8.574000000000001e-06, + "loss": 8154.7125, + "step": 42870 + }, + { + "epoch": 0.08662031294820154, + "grad_norm": 115677.8203125, + "learning_rate": 8.576e-06, + "loss": 10788.7617, + "step": 42880 + }, + { + "epoch": 0.08664051358088536, + "grad_norm": 4519.44775390625, + "learning_rate": 8.578000000000002e-06, + "loss": 4988.0031, + "step": 42890 + }, + { + "epoch": 0.08666071421356916, + "grad_norm": 7859.56201171875, + "learning_rate": 8.580000000000001e-06, + "loss": 12170.5805, + "step": 42900 + }, + { + "epoch": 0.08668091484625298, + "grad_norm": 4871.1923828125, + "learning_rate": 8.582e-06, + "loss": 3297.523, + "step": 42910 + }, + { + "epoch": 0.0867011154789368, + "grad_norm": 9115.0966796875, + "learning_rate": 8.584000000000001e-06, + "loss": 6736.7312, + "step": 42920 + }, + { + "epoch": 0.08672131611162062, + "grad_norm": 50490.43359375, + "learning_rate": 8.586e-06, + "loss": 7586.9844, + "step": 42930 + }, + { + "epoch": 0.08674151674430443, + "grad_norm": 127547.9453125, + "learning_rate": 8.588000000000001e-06, + "loss": 9934.3797, + "step": 42940 + }, + { + "epoch": 0.08676171737698825, + "grad_norm": 34989.1171875, + "learning_rate": 8.59e-06, + "loss": 3174.1262, + "step": 42950 + }, + { + "epoch": 0.08678191800967207, + "grad_norm": 692.2158813476562, + "learning_rate": 8.592e-06, + "loss": 2278.4861, + "step": 42960 + }, + { + "epoch": 0.08680211864235587, + "grad_norm": 1030.243896484375, + "learning_rate": 8.594000000000001e-06, + "loss": 7157.1586, + "step": 42970 + }, + { + "epoch": 0.0868223192750397, + "grad_norm": 2935.587890625, + "learning_rate": 8.596e-06, + "loss": 11101.8484, + "step": 42980 + }, + { + "epoch": 0.08684251990772351, + "grad_norm": 23925.638671875, + "learning_rate": 8.598000000000001e-06, + "loss": 3261.4328, + "step": 42990 + }, + { + "epoch": 0.08686272054040732, + "grad_norm": 3471.11181640625, + "learning_rate": 8.6e-06, + "loss": 7611.4203, + "step": 43000 + }, + { + "epoch": 0.08688292117309114, + "grad_norm": 2114.6162109375, + "learning_rate": 8.602e-06, + "loss": 2781.1283, + "step": 43010 + }, + { + "epoch": 0.08690312180577496, + "grad_norm": 1311.1876220703125, + "learning_rate": 8.604000000000001e-06, + "loss": 17404.6562, + "step": 43020 + }, + { + "epoch": 0.08692332243845877, + "grad_norm": 1892.941650390625, + "learning_rate": 8.606e-06, + "loss": 12440.4086, + "step": 43030 + }, + { + "epoch": 0.08694352307114259, + "grad_norm": 2438.401123046875, + "learning_rate": 8.608000000000001e-06, + "loss": 7283.8062, + "step": 43040 + }, + { + "epoch": 0.0869637237038264, + "grad_norm": 30439.4765625, + "learning_rate": 8.61e-06, + "loss": 6755.5086, + "step": 43050 + }, + { + "epoch": 0.08698392433651021, + "grad_norm": 960.8984985351562, + "learning_rate": 8.612e-06, + "loss": 11701.0594, + "step": 43060 + }, + { + "epoch": 0.08700412496919403, + "grad_norm": 2281.63330078125, + "learning_rate": 8.614000000000001e-06, + "loss": 3260.4299, + "step": 43070 + }, + { + "epoch": 0.08702432560187785, + "grad_norm": 122969.5859375, + "learning_rate": 8.616000000000002e-06, + "loss": 5814.3574, + "step": 43080 + }, + { + "epoch": 0.08704452623456167, + "grad_norm": 12492.1015625, + "learning_rate": 8.618000000000001e-06, + "loss": 6834.0719, + "step": 43090 + }, + { + "epoch": 0.08706472686724548, + "grad_norm": 5765.86474609375, + "learning_rate": 8.62e-06, + "loss": 9437.8266, + "step": 43100 + }, + { + "epoch": 0.0870849274999293, + "grad_norm": 2993.81591796875, + "learning_rate": 8.622e-06, + "loss": 9365.6117, + "step": 43110 + }, + { + "epoch": 0.08710512813261312, + "grad_norm": 98167.5078125, + "learning_rate": 8.624e-06, + "loss": 5324.718, + "step": 43120 + }, + { + "epoch": 0.08712532876529692, + "grad_norm": 478.24981689453125, + "learning_rate": 8.626000000000002e-06, + "loss": 4020.5125, + "step": 43130 + }, + { + "epoch": 0.08714552939798074, + "grad_norm": 882.7974853515625, + "learning_rate": 8.628000000000001e-06, + "loss": 4450.4637, + "step": 43140 + }, + { + "epoch": 0.08716573003066456, + "grad_norm": 62112.42578125, + "learning_rate": 8.63e-06, + "loss": 7608.8383, + "step": 43150 + }, + { + "epoch": 0.08718593066334837, + "grad_norm": 54.323543548583984, + "learning_rate": 8.632e-06, + "loss": 3426.6352, + "step": 43160 + }, + { + "epoch": 0.08720613129603219, + "grad_norm": 52656.578125, + "learning_rate": 8.634e-06, + "loss": 4242.4039, + "step": 43170 + }, + { + "epoch": 0.08722633192871601, + "grad_norm": 1821.691162109375, + "learning_rate": 8.636000000000002e-06, + "loss": 8475.0469, + "step": 43180 + }, + { + "epoch": 0.08724653256139982, + "grad_norm": 20563.171875, + "learning_rate": 8.638000000000001e-06, + "loss": 7806.3406, + "step": 43190 + }, + { + "epoch": 0.08726673319408364, + "grad_norm": 24413.19140625, + "learning_rate": 8.64e-06, + "loss": 5928.0293, + "step": 43200 + }, + { + "epoch": 0.08728693382676746, + "grad_norm": 117780.8515625, + "learning_rate": 8.642e-06, + "loss": 6261.2945, + "step": 43210 + }, + { + "epoch": 0.08730713445945126, + "grad_norm": 6032.95703125, + "learning_rate": 8.644e-06, + "loss": 5653.559, + "step": 43220 + }, + { + "epoch": 0.08732733509213508, + "grad_norm": 15985.51953125, + "learning_rate": 8.646000000000002e-06, + "loss": 3754.0008, + "step": 43230 + }, + { + "epoch": 0.0873475357248189, + "grad_norm": 23975.087890625, + "learning_rate": 8.648000000000001e-06, + "loss": 6159.7387, + "step": 43240 + }, + { + "epoch": 0.08736773635750272, + "grad_norm": 15911.3466796875, + "learning_rate": 8.65e-06, + "loss": 3317.441, + "step": 43250 + }, + { + "epoch": 0.08738793699018653, + "grad_norm": 2101.354248046875, + "learning_rate": 8.652000000000001e-06, + "loss": 12845.3297, + "step": 43260 + }, + { + "epoch": 0.08740813762287035, + "grad_norm": 9919.5400390625, + "learning_rate": 8.654e-06, + "loss": 5711.3117, + "step": 43270 + }, + { + "epoch": 0.08742833825555417, + "grad_norm": 13844.4482421875, + "learning_rate": 8.656000000000001e-06, + "loss": 5315.7254, + "step": 43280 + }, + { + "epoch": 0.08744853888823798, + "grad_norm": 1258.619873046875, + "learning_rate": 8.658e-06, + "loss": 10777.1789, + "step": 43290 + }, + { + "epoch": 0.0874687395209218, + "grad_norm": 8996.552734375, + "learning_rate": 8.66e-06, + "loss": 12684.7414, + "step": 43300 + }, + { + "epoch": 0.08748894015360562, + "grad_norm": 58936.28125, + "learning_rate": 8.662000000000001e-06, + "loss": 10557.0562, + "step": 43310 + }, + { + "epoch": 0.08750914078628942, + "grad_norm": 27101.56640625, + "learning_rate": 8.664e-06, + "loss": 3575.4758, + "step": 43320 + }, + { + "epoch": 0.08752934141897324, + "grad_norm": 1083.2188720703125, + "learning_rate": 8.666000000000001e-06, + "loss": 2879.4959, + "step": 43330 + }, + { + "epoch": 0.08754954205165706, + "grad_norm": 29778.4453125, + "learning_rate": 8.668e-06, + "loss": 7549.1906, + "step": 43340 + }, + { + "epoch": 0.08756974268434087, + "grad_norm": 5482.57421875, + "learning_rate": 8.67e-06, + "loss": 5145.9168, + "step": 43350 + }, + { + "epoch": 0.08758994331702469, + "grad_norm": 722.3234252929688, + "learning_rate": 8.672000000000001e-06, + "loss": 3330.0828, + "step": 43360 + }, + { + "epoch": 0.08761014394970851, + "grad_norm": 21928.259765625, + "learning_rate": 8.674e-06, + "loss": 7113.9047, + "step": 43370 + }, + { + "epoch": 0.08763034458239231, + "grad_norm": 10745.376953125, + "learning_rate": 8.676000000000001e-06, + "loss": 11027.0297, + "step": 43380 + }, + { + "epoch": 0.08765054521507613, + "grad_norm": 4205.30224609375, + "learning_rate": 8.678e-06, + "loss": 15032.5516, + "step": 43390 + }, + { + "epoch": 0.08767074584775995, + "grad_norm": 2871.8388671875, + "learning_rate": 8.68e-06, + "loss": 5960.4277, + "step": 43400 + }, + { + "epoch": 0.08769094648044377, + "grad_norm": 20159.904296875, + "learning_rate": 8.682000000000001e-06, + "loss": 4817.1203, + "step": 43410 + }, + { + "epoch": 0.08771114711312758, + "grad_norm": 88337.0703125, + "learning_rate": 8.684e-06, + "loss": 12107.4562, + "step": 43420 + }, + { + "epoch": 0.0877313477458114, + "grad_norm": 5558.98046875, + "learning_rate": 8.686000000000001e-06, + "loss": 3863.9621, + "step": 43430 + }, + { + "epoch": 0.08775154837849522, + "grad_norm": 27442.12890625, + "learning_rate": 8.688e-06, + "loss": 5834.3922, + "step": 43440 + }, + { + "epoch": 0.08777174901117903, + "grad_norm": 11417.419921875, + "learning_rate": 8.690000000000002e-06, + "loss": 8431.1734, + "step": 43450 + }, + { + "epoch": 0.08779194964386285, + "grad_norm": 940.79150390625, + "learning_rate": 8.692e-06, + "loss": 4544.8027, + "step": 43460 + }, + { + "epoch": 0.08781215027654667, + "grad_norm": 27283.9453125, + "learning_rate": 8.694e-06, + "loss": 1693.8945, + "step": 43470 + }, + { + "epoch": 0.08783235090923047, + "grad_norm": 86016.5859375, + "learning_rate": 8.696000000000001e-06, + "loss": 5637.873, + "step": 43480 + }, + { + "epoch": 0.08785255154191429, + "grad_norm": 17278.748046875, + "learning_rate": 8.698e-06, + "loss": 4307.5078, + "step": 43490 + }, + { + "epoch": 0.08787275217459811, + "grad_norm": 35007.88671875, + "learning_rate": 8.700000000000001e-06, + "loss": 3358.9887, + "step": 43500 + }, + { + "epoch": 0.08789295280728192, + "grad_norm": 16836.3046875, + "learning_rate": 8.702e-06, + "loss": 10563.4164, + "step": 43510 + }, + { + "epoch": 0.08791315343996574, + "grad_norm": 33972.57421875, + "learning_rate": 8.704e-06, + "loss": 6850.7, + "step": 43520 + }, + { + "epoch": 0.08793335407264956, + "grad_norm": 3041.985107421875, + "learning_rate": 8.706000000000001e-06, + "loss": 9318.6477, + "step": 43530 + }, + { + "epoch": 0.08795355470533336, + "grad_norm": 626.1815185546875, + "learning_rate": 8.708e-06, + "loss": 4322.4828, + "step": 43540 + }, + { + "epoch": 0.08797375533801718, + "grad_norm": 17289.228515625, + "learning_rate": 8.710000000000001e-06, + "loss": 15484.1219, + "step": 43550 + }, + { + "epoch": 0.087993955970701, + "grad_norm": 5306.55615234375, + "learning_rate": 8.712e-06, + "loss": 5262.3859, + "step": 43560 + }, + { + "epoch": 0.08801415660338482, + "grad_norm": 13060.87109375, + "learning_rate": 8.714e-06, + "loss": 10274.1047, + "step": 43570 + }, + { + "epoch": 0.08803435723606863, + "grad_norm": 559.179931640625, + "learning_rate": 8.716000000000001e-06, + "loss": 10729.3883, + "step": 43580 + }, + { + "epoch": 0.08805455786875245, + "grad_norm": 122303.5703125, + "learning_rate": 8.718e-06, + "loss": 14823.8062, + "step": 43590 + }, + { + "epoch": 0.08807475850143627, + "grad_norm": 25978.1953125, + "learning_rate": 8.720000000000001e-06, + "loss": 15861.8984, + "step": 43600 + }, + { + "epoch": 0.08809495913412008, + "grad_norm": 337272.90625, + "learning_rate": 8.722e-06, + "loss": 12158.3219, + "step": 43610 + }, + { + "epoch": 0.0881151597668039, + "grad_norm": 8536.861328125, + "learning_rate": 8.724e-06, + "loss": 11981.3273, + "step": 43620 + }, + { + "epoch": 0.08813536039948772, + "grad_norm": 182918.515625, + "learning_rate": 8.726e-06, + "loss": 10243.4344, + "step": 43630 + }, + { + "epoch": 0.08815556103217152, + "grad_norm": 489.6578674316406, + "learning_rate": 8.728e-06, + "loss": 7145.2797, + "step": 43640 + }, + { + "epoch": 0.08817576166485534, + "grad_norm": 85536.9609375, + "learning_rate": 8.730000000000001e-06, + "loss": 4849.2477, + "step": 43650 + }, + { + "epoch": 0.08819596229753916, + "grad_norm": 86142.078125, + "learning_rate": 8.732e-06, + "loss": 10752.1305, + "step": 43660 + }, + { + "epoch": 0.08821616293022297, + "grad_norm": 39510.8125, + "learning_rate": 8.734e-06, + "loss": 5337.6434, + "step": 43670 + }, + { + "epoch": 0.08823636356290679, + "grad_norm": 2563.265380859375, + "learning_rate": 8.736e-06, + "loss": 7153.9984, + "step": 43680 + }, + { + "epoch": 0.08825656419559061, + "grad_norm": 39236.51171875, + "learning_rate": 8.738000000000002e-06, + "loss": 2361.2977, + "step": 43690 + }, + { + "epoch": 0.08827676482827441, + "grad_norm": 6687.01611328125, + "learning_rate": 8.740000000000001e-06, + "loss": 3038.7982, + "step": 43700 + }, + { + "epoch": 0.08829696546095823, + "grad_norm": 41310.30078125, + "learning_rate": 8.742e-06, + "loss": 6679.3555, + "step": 43710 + }, + { + "epoch": 0.08831716609364205, + "grad_norm": 11741.427734375, + "learning_rate": 8.744e-06, + "loss": 2064.432, + "step": 43720 + }, + { + "epoch": 0.08833736672632587, + "grad_norm": 176.59854125976562, + "learning_rate": 8.746e-06, + "loss": 5874.6062, + "step": 43730 + }, + { + "epoch": 0.08835756735900968, + "grad_norm": 3382.465576171875, + "learning_rate": 8.748000000000002e-06, + "loss": 9191.4203, + "step": 43740 + }, + { + "epoch": 0.0883777679916935, + "grad_norm": 1147.739501953125, + "learning_rate": 8.750000000000001e-06, + "loss": 10855.2305, + "step": 43750 + }, + { + "epoch": 0.08839796862437732, + "grad_norm": 9366.2890625, + "learning_rate": 8.752e-06, + "loss": 6995.1062, + "step": 43760 + }, + { + "epoch": 0.08841816925706113, + "grad_norm": 1008.5233764648438, + "learning_rate": 8.754e-06, + "loss": 3612.3457, + "step": 43770 + }, + { + "epoch": 0.08843836988974495, + "grad_norm": 17842.919921875, + "learning_rate": 8.756e-06, + "loss": 4537.7273, + "step": 43780 + }, + { + "epoch": 0.08845857052242877, + "grad_norm": 34202.74609375, + "learning_rate": 8.758000000000002e-06, + "loss": 6226.1266, + "step": 43790 + }, + { + "epoch": 0.08847877115511257, + "grad_norm": 3179.661865234375, + "learning_rate": 8.76e-06, + "loss": 10457.5016, + "step": 43800 + }, + { + "epoch": 0.0884989717877964, + "grad_norm": 10855.5078125, + "learning_rate": 8.762e-06, + "loss": 9986.0797, + "step": 43810 + }, + { + "epoch": 0.08851917242048021, + "grad_norm": 25282.37890625, + "learning_rate": 8.764e-06, + "loss": 6030.3547, + "step": 43820 + }, + { + "epoch": 0.08853937305316402, + "grad_norm": 488.19287109375, + "learning_rate": 8.766e-06, + "loss": 4028.6535, + "step": 43830 + }, + { + "epoch": 0.08855957368584784, + "grad_norm": 35849.88671875, + "learning_rate": 8.768000000000001e-06, + "loss": 9491.7758, + "step": 43840 + }, + { + "epoch": 0.08857977431853166, + "grad_norm": 50581.41796875, + "learning_rate": 8.77e-06, + "loss": 8868.6727, + "step": 43850 + }, + { + "epoch": 0.08859997495121547, + "grad_norm": 105326.53125, + "learning_rate": 8.772e-06, + "loss": 6113.975, + "step": 43860 + }, + { + "epoch": 0.08862017558389929, + "grad_norm": 60405.046875, + "learning_rate": 8.774000000000001e-06, + "loss": 8634.5094, + "step": 43870 + }, + { + "epoch": 0.0886403762165831, + "grad_norm": 1804.5911865234375, + "learning_rate": 8.776e-06, + "loss": 3212.5701, + "step": 43880 + }, + { + "epoch": 0.08866057684926693, + "grad_norm": 47915.23046875, + "learning_rate": 8.778000000000001e-06, + "loss": 2576.8355, + "step": 43890 + }, + { + "epoch": 0.08868077748195073, + "grad_norm": 2680.22216796875, + "learning_rate": 8.78e-06, + "loss": 7019.8734, + "step": 43900 + }, + { + "epoch": 0.08870097811463455, + "grad_norm": 33635.20703125, + "learning_rate": 8.782e-06, + "loss": 9839.1523, + "step": 43910 + }, + { + "epoch": 0.08872117874731837, + "grad_norm": 16669.96875, + "learning_rate": 8.784000000000001e-06, + "loss": 10174.8813, + "step": 43920 + }, + { + "epoch": 0.08874137938000218, + "grad_norm": 3424.16162109375, + "learning_rate": 8.786000000000002e-06, + "loss": 10051.5039, + "step": 43930 + }, + { + "epoch": 0.088761580012686, + "grad_norm": 3760.71875, + "learning_rate": 8.788000000000001e-06, + "loss": 8237.5617, + "step": 43940 + }, + { + "epoch": 0.08878178064536982, + "grad_norm": 53303.87890625, + "learning_rate": 8.79e-06, + "loss": 10845.8391, + "step": 43950 + }, + { + "epoch": 0.08880198127805362, + "grad_norm": 7743.87646484375, + "learning_rate": 8.792e-06, + "loss": 5149.466, + "step": 43960 + }, + { + "epoch": 0.08882218191073744, + "grad_norm": 59569.0546875, + "learning_rate": 8.794e-06, + "loss": 5824.0297, + "step": 43970 + }, + { + "epoch": 0.08884238254342126, + "grad_norm": 212050.859375, + "learning_rate": 8.796000000000002e-06, + "loss": 10484.15, + "step": 43980 + }, + { + "epoch": 0.08886258317610507, + "grad_norm": 2923.400634765625, + "learning_rate": 8.798000000000001e-06, + "loss": 4713.7762, + "step": 43990 + }, + { + "epoch": 0.08888278380878889, + "grad_norm": 3165.862548828125, + "learning_rate": 8.8e-06, + "loss": 8525.3195, + "step": 44000 + }, + { + "epoch": 0.08890298444147271, + "grad_norm": 7599.05615234375, + "learning_rate": 8.802e-06, + "loss": 4336.3328, + "step": 44010 + }, + { + "epoch": 0.08892318507415652, + "grad_norm": 4750.70458984375, + "learning_rate": 8.804e-06, + "loss": 5062.5086, + "step": 44020 + }, + { + "epoch": 0.08894338570684034, + "grad_norm": 9423.9150390625, + "learning_rate": 8.806000000000002e-06, + "loss": 2065.4082, + "step": 44030 + }, + { + "epoch": 0.08896358633952416, + "grad_norm": 27266.9453125, + "learning_rate": 8.808000000000001e-06, + "loss": 16603.7734, + "step": 44040 + }, + { + "epoch": 0.08898378697220798, + "grad_norm": 8010.51953125, + "learning_rate": 8.81e-06, + "loss": 5645.8848, + "step": 44050 + }, + { + "epoch": 0.08900398760489178, + "grad_norm": 5886.6142578125, + "learning_rate": 8.812000000000001e-06, + "loss": 14145.7922, + "step": 44060 + }, + { + "epoch": 0.0890241882375756, + "grad_norm": 5887.01025390625, + "learning_rate": 8.814e-06, + "loss": 6500.7168, + "step": 44070 + }, + { + "epoch": 0.08904438887025942, + "grad_norm": 3745.574462890625, + "learning_rate": 8.816000000000002e-06, + "loss": 3971.1117, + "step": 44080 + }, + { + "epoch": 0.08906458950294323, + "grad_norm": 2813.719482421875, + "learning_rate": 8.818000000000001e-06, + "loss": 2961.5752, + "step": 44090 + }, + { + "epoch": 0.08908479013562705, + "grad_norm": 47705.91796875, + "learning_rate": 8.82e-06, + "loss": 4245.5789, + "step": 44100 + }, + { + "epoch": 0.08910499076831087, + "grad_norm": 39064.48828125, + "learning_rate": 8.822000000000001e-06, + "loss": 9064.857, + "step": 44110 + }, + { + "epoch": 0.08912519140099467, + "grad_norm": 15508.0107421875, + "learning_rate": 8.824e-06, + "loss": 7344.3906, + "step": 44120 + }, + { + "epoch": 0.0891453920336785, + "grad_norm": 2724.16455078125, + "learning_rate": 8.826000000000002e-06, + "loss": 5194.8012, + "step": 44130 + }, + { + "epoch": 0.08916559266636231, + "grad_norm": 23267.625, + "learning_rate": 8.828000000000001e-06, + "loss": 10830.0781, + "step": 44140 + }, + { + "epoch": 0.08918579329904612, + "grad_norm": 1783.4652099609375, + "learning_rate": 8.83e-06, + "loss": 4763.3934, + "step": 44150 + }, + { + "epoch": 0.08920599393172994, + "grad_norm": 322163.15625, + "learning_rate": 8.832000000000001e-06, + "loss": 18111.6813, + "step": 44160 + }, + { + "epoch": 0.08922619456441376, + "grad_norm": 79953.3046875, + "learning_rate": 8.834e-06, + "loss": 10568.2727, + "step": 44170 + }, + { + "epoch": 0.08924639519709757, + "grad_norm": 264923.5, + "learning_rate": 8.836000000000001e-06, + "loss": 9843.1719, + "step": 44180 + }, + { + "epoch": 0.08926659582978139, + "grad_norm": 81759.0078125, + "learning_rate": 8.838e-06, + "loss": 12870.9922, + "step": 44190 + }, + { + "epoch": 0.0892867964624652, + "grad_norm": 2817.69921875, + "learning_rate": 8.84e-06, + "loss": 6589.2594, + "step": 44200 + }, + { + "epoch": 0.08930699709514903, + "grad_norm": 20147.65625, + "learning_rate": 8.842000000000001e-06, + "loss": 5545.8836, + "step": 44210 + }, + { + "epoch": 0.08932719772783283, + "grad_norm": 18354.892578125, + "learning_rate": 8.844e-06, + "loss": 4177.3902, + "step": 44220 + }, + { + "epoch": 0.08934739836051665, + "grad_norm": 9926.8642578125, + "learning_rate": 8.846000000000001e-06, + "loss": 3102.4668, + "step": 44230 + }, + { + "epoch": 0.08936759899320047, + "grad_norm": 10900.041015625, + "learning_rate": 8.848e-06, + "loss": 6808.4445, + "step": 44240 + }, + { + "epoch": 0.08938779962588428, + "grad_norm": 4330.3076171875, + "learning_rate": 8.85e-06, + "loss": 11306.4961, + "step": 44250 + }, + { + "epoch": 0.0894080002585681, + "grad_norm": 63601.9296875, + "learning_rate": 8.852000000000001e-06, + "loss": 5587.8234, + "step": 44260 + }, + { + "epoch": 0.08942820089125192, + "grad_norm": 38524.12890625, + "learning_rate": 8.854e-06, + "loss": 14073.65, + "step": 44270 + }, + { + "epoch": 0.08944840152393572, + "grad_norm": 35240.50390625, + "learning_rate": 8.856000000000001e-06, + "loss": 2565.4086, + "step": 44280 + }, + { + "epoch": 0.08946860215661954, + "grad_norm": 102730.5390625, + "learning_rate": 8.858e-06, + "loss": 5927.6043, + "step": 44290 + }, + { + "epoch": 0.08948880278930336, + "grad_norm": 21058.0546875, + "learning_rate": 8.860000000000002e-06, + "loss": 7795.1328, + "step": 44300 + }, + { + "epoch": 0.08950900342198717, + "grad_norm": 1402.8397216796875, + "learning_rate": 8.862000000000001e-06, + "loss": 2262.7555, + "step": 44310 + }, + { + "epoch": 0.08952920405467099, + "grad_norm": 4071.8916015625, + "learning_rate": 8.864e-06, + "loss": 4676.7559, + "step": 44320 + }, + { + "epoch": 0.08954940468735481, + "grad_norm": 15761.798828125, + "learning_rate": 8.866000000000001e-06, + "loss": 5297.2879, + "step": 44330 + }, + { + "epoch": 0.08956960532003862, + "grad_norm": 15987.0419921875, + "learning_rate": 8.868e-06, + "loss": 4267.5984, + "step": 44340 + }, + { + "epoch": 0.08958980595272244, + "grad_norm": 2222.99609375, + "learning_rate": 8.870000000000001e-06, + "loss": 7991.0437, + "step": 44350 + }, + { + "epoch": 0.08961000658540626, + "grad_norm": 18607.3984375, + "learning_rate": 8.872e-06, + "loss": 3230.274, + "step": 44360 + }, + { + "epoch": 0.08963020721809008, + "grad_norm": 17186.421875, + "learning_rate": 8.874e-06, + "loss": 2952.4293, + "step": 44370 + }, + { + "epoch": 0.08965040785077388, + "grad_norm": 20476.029296875, + "learning_rate": 8.876e-06, + "loss": 17663.5891, + "step": 44380 + }, + { + "epoch": 0.0896706084834577, + "grad_norm": 8146.42578125, + "learning_rate": 8.878e-06, + "loss": 7276.5984, + "step": 44390 + }, + { + "epoch": 0.08969080911614152, + "grad_norm": 11436.9296875, + "learning_rate": 8.880000000000001e-06, + "loss": 12283.1734, + "step": 44400 + }, + { + "epoch": 0.08971100974882533, + "grad_norm": 17938.5234375, + "learning_rate": 8.882e-06, + "loss": 4724.7629, + "step": 44410 + }, + { + "epoch": 0.08973121038150915, + "grad_norm": 14950.9033203125, + "learning_rate": 8.884e-06, + "loss": 10119.9547, + "step": 44420 + }, + { + "epoch": 0.08975141101419297, + "grad_norm": 34840.25390625, + "learning_rate": 8.886000000000001e-06, + "loss": 6388.3344, + "step": 44430 + }, + { + "epoch": 0.08977161164687678, + "grad_norm": 1823.0430908203125, + "learning_rate": 8.888e-06, + "loss": 10000.3555, + "step": 44440 + }, + { + "epoch": 0.0897918122795606, + "grad_norm": 13385.505859375, + "learning_rate": 8.890000000000001e-06, + "loss": 7638.8969, + "step": 44450 + }, + { + "epoch": 0.08981201291224442, + "grad_norm": 6697.8759765625, + "learning_rate": 8.892e-06, + "loss": 3066.2594, + "step": 44460 + }, + { + "epoch": 0.08983221354492822, + "grad_norm": 902.4928588867188, + "learning_rate": 8.894e-06, + "loss": 8760.1281, + "step": 44470 + }, + { + "epoch": 0.08985241417761204, + "grad_norm": 874.2611694335938, + "learning_rate": 8.896000000000001e-06, + "loss": 7769.7055, + "step": 44480 + }, + { + "epoch": 0.08987261481029586, + "grad_norm": 150183.515625, + "learning_rate": 8.898000000000002e-06, + "loss": 8759.8719, + "step": 44490 + }, + { + "epoch": 0.08989281544297967, + "grad_norm": 7031.51953125, + "learning_rate": 8.900000000000001e-06, + "loss": 15445.0344, + "step": 44500 + }, + { + "epoch": 0.08991301607566349, + "grad_norm": 1440.5185546875, + "learning_rate": 8.902e-06, + "loss": 4515.2633, + "step": 44510 + }, + { + "epoch": 0.08993321670834731, + "grad_norm": 11672.1416015625, + "learning_rate": 8.904e-06, + "loss": 5253.1859, + "step": 44520 + }, + { + "epoch": 0.08995341734103113, + "grad_norm": 90976.546875, + "learning_rate": 8.906e-06, + "loss": 4359.5422, + "step": 44530 + }, + { + "epoch": 0.08997361797371493, + "grad_norm": 533.4171142578125, + "learning_rate": 8.908000000000002e-06, + "loss": 10991.0367, + "step": 44540 + }, + { + "epoch": 0.08999381860639875, + "grad_norm": 47591.78125, + "learning_rate": 8.910000000000001e-06, + "loss": 5396.4961, + "step": 44550 + }, + { + "epoch": 0.09001401923908257, + "grad_norm": 28444.70703125, + "learning_rate": 8.912e-06, + "loss": 1892.9906, + "step": 44560 + }, + { + "epoch": 0.09003421987176638, + "grad_norm": 617.1309814453125, + "learning_rate": 8.914e-06, + "loss": 4039.5656, + "step": 44570 + }, + { + "epoch": 0.0900544205044502, + "grad_norm": 2023.4478759765625, + "learning_rate": 8.916e-06, + "loss": 6474.5312, + "step": 44580 + }, + { + "epoch": 0.09007462113713402, + "grad_norm": 7078.8671875, + "learning_rate": 8.918000000000002e-06, + "loss": 12504.5328, + "step": 44590 + }, + { + "epoch": 0.09009482176981783, + "grad_norm": 6588.9365234375, + "learning_rate": 8.920000000000001e-06, + "loss": 9035.2672, + "step": 44600 + }, + { + "epoch": 0.09011502240250165, + "grad_norm": 165300.03125, + "learning_rate": 8.922e-06, + "loss": 7339.4609, + "step": 44610 + }, + { + "epoch": 0.09013522303518547, + "grad_norm": 41930.08203125, + "learning_rate": 8.924e-06, + "loss": 3113.4674, + "step": 44620 + }, + { + "epoch": 0.09015542366786927, + "grad_norm": 8051.87451171875, + "learning_rate": 8.926e-06, + "loss": 7051.9984, + "step": 44630 + }, + { + "epoch": 0.09017562430055309, + "grad_norm": 2331.20654296875, + "learning_rate": 8.928000000000002e-06, + "loss": 6659.5117, + "step": 44640 + }, + { + "epoch": 0.09019582493323691, + "grad_norm": 121024.71875, + "learning_rate": 8.930000000000001e-06, + "loss": 7909.6281, + "step": 44650 + }, + { + "epoch": 0.09021602556592072, + "grad_norm": 2325.47607421875, + "learning_rate": 8.932e-06, + "loss": 5299.1898, + "step": 44660 + }, + { + "epoch": 0.09023622619860454, + "grad_norm": 38952.015625, + "learning_rate": 8.934000000000001e-06, + "loss": 4304.7707, + "step": 44670 + }, + { + "epoch": 0.09025642683128836, + "grad_norm": 121658.171875, + "learning_rate": 8.936e-06, + "loss": 8402.4937, + "step": 44680 + }, + { + "epoch": 0.09027662746397218, + "grad_norm": 19330.974609375, + "learning_rate": 8.938000000000001e-06, + "loss": 6145.2703, + "step": 44690 + }, + { + "epoch": 0.09029682809665598, + "grad_norm": 5099.98779296875, + "learning_rate": 8.94e-06, + "loss": 3469.3875, + "step": 44700 + }, + { + "epoch": 0.0903170287293398, + "grad_norm": 1686.4632568359375, + "learning_rate": 8.942e-06, + "loss": 3014.258, + "step": 44710 + }, + { + "epoch": 0.09033722936202362, + "grad_norm": 17702.076171875, + "learning_rate": 8.944000000000001e-06, + "loss": 2730.6607, + "step": 44720 + }, + { + "epoch": 0.09035742999470743, + "grad_norm": 50964.578125, + "learning_rate": 8.946e-06, + "loss": 9548.8008, + "step": 44730 + }, + { + "epoch": 0.09037763062739125, + "grad_norm": 52185.03515625, + "learning_rate": 8.948000000000001e-06, + "loss": 4951.6305, + "step": 44740 + }, + { + "epoch": 0.09039783126007507, + "grad_norm": 5775.76171875, + "learning_rate": 8.95e-06, + "loss": 7181.2836, + "step": 44750 + }, + { + "epoch": 0.09041803189275888, + "grad_norm": 43785.859375, + "learning_rate": 8.952e-06, + "loss": 4365.5816, + "step": 44760 + }, + { + "epoch": 0.0904382325254427, + "grad_norm": 13587.2392578125, + "learning_rate": 8.954000000000001e-06, + "loss": 6233.6703, + "step": 44770 + }, + { + "epoch": 0.09045843315812652, + "grad_norm": 42287.62890625, + "learning_rate": 8.956e-06, + "loss": 8590.2328, + "step": 44780 + }, + { + "epoch": 0.09047863379081032, + "grad_norm": 1808.6695556640625, + "learning_rate": 8.958000000000001e-06, + "loss": 5967.9062, + "step": 44790 + }, + { + "epoch": 0.09049883442349414, + "grad_norm": 24483.56640625, + "learning_rate": 8.96e-06, + "loss": 6766.5898, + "step": 44800 + }, + { + "epoch": 0.09051903505617796, + "grad_norm": 17262.177734375, + "learning_rate": 8.962e-06, + "loss": 6737.2273, + "step": 44810 + }, + { + "epoch": 0.09053923568886177, + "grad_norm": 5425.4853515625, + "learning_rate": 8.964000000000001e-06, + "loss": 7337.7508, + "step": 44820 + }, + { + "epoch": 0.09055943632154559, + "grad_norm": 17105.26171875, + "learning_rate": 8.966e-06, + "loss": 16029.5734, + "step": 44830 + }, + { + "epoch": 0.09057963695422941, + "grad_norm": 314.35626220703125, + "learning_rate": 8.968000000000001e-06, + "loss": 6404.9273, + "step": 44840 + }, + { + "epoch": 0.09059983758691323, + "grad_norm": 29561.28125, + "learning_rate": 8.97e-06, + "loss": 6885.3664, + "step": 44850 + }, + { + "epoch": 0.09062003821959703, + "grad_norm": 12014.833984375, + "learning_rate": 8.972000000000002e-06, + "loss": 3900.1234, + "step": 44860 + }, + { + "epoch": 0.09064023885228085, + "grad_norm": 15907.294921875, + "learning_rate": 8.974e-06, + "loss": 3714.3258, + "step": 44870 + }, + { + "epoch": 0.09066043948496467, + "grad_norm": 0.0, + "learning_rate": 8.976e-06, + "loss": 3095.0273, + "step": 44880 + }, + { + "epoch": 0.09068064011764848, + "grad_norm": 146493.96875, + "learning_rate": 8.978000000000001e-06, + "loss": 5912.0297, + "step": 44890 + }, + { + "epoch": 0.0907008407503323, + "grad_norm": 6042.15771484375, + "learning_rate": 8.98e-06, + "loss": 10625.6742, + "step": 44900 + }, + { + "epoch": 0.09072104138301612, + "grad_norm": 15618.9453125, + "learning_rate": 8.982000000000001e-06, + "loss": 8473.5906, + "step": 44910 + }, + { + "epoch": 0.09074124201569993, + "grad_norm": 133320.953125, + "learning_rate": 8.984e-06, + "loss": 13231.4266, + "step": 44920 + }, + { + "epoch": 0.09076144264838375, + "grad_norm": 2092.87060546875, + "learning_rate": 8.986e-06, + "loss": 9075.7922, + "step": 44930 + }, + { + "epoch": 0.09078164328106757, + "grad_norm": 11268.2978515625, + "learning_rate": 8.988000000000001e-06, + "loss": 5974.3457, + "step": 44940 + }, + { + "epoch": 0.09080184391375137, + "grad_norm": 36942.1171875, + "learning_rate": 8.99e-06, + "loss": 13008.5477, + "step": 44950 + }, + { + "epoch": 0.0908220445464352, + "grad_norm": 1680.7752685546875, + "learning_rate": 8.992000000000001e-06, + "loss": 4176.4059, + "step": 44960 + }, + { + "epoch": 0.09084224517911901, + "grad_norm": 43563.6875, + "learning_rate": 8.994e-06, + "loss": 8431.275, + "step": 44970 + }, + { + "epoch": 0.09086244581180282, + "grad_norm": 10064.7314453125, + "learning_rate": 8.996e-06, + "loss": 7490.1969, + "step": 44980 + }, + { + "epoch": 0.09088264644448664, + "grad_norm": 7663.24853515625, + "learning_rate": 8.998000000000001e-06, + "loss": 6218.2703, + "step": 44990 + }, + { + "epoch": 0.09090284707717046, + "grad_norm": 2283.617919921875, + "learning_rate": 9e-06, + "loss": 2628.2178, + "step": 45000 + }, + { + "epoch": 0.09092304770985428, + "grad_norm": 45976.13671875, + "learning_rate": 9.002000000000001e-06, + "loss": 3713.1641, + "step": 45010 + }, + { + "epoch": 0.09094324834253809, + "grad_norm": 24650.30859375, + "learning_rate": 9.004e-06, + "loss": 7162.1109, + "step": 45020 + }, + { + "epoch": 0.0909634489752219, + "grad_norm": 3176.364501953125, + "learning_rate": 9.006e-06, + "loss": 2845.4563, + "step": 45030 + }, + { + "epoch": 0.09098364960790573, + "grad_norm": 2519.592529296875, + "learning_rate": 9.008e-06, + "loss": 4099.698, + "step": 45040 + }, + { + "epoch": 0.09100385024058953, + "grad_norm": 8492.9423828125, + "learning_rate": 9.01e-06, + "loss": 9208.6891, + "step": 45050 + }, + { + "epoch": 0.09102405087327335, + "grad_norm": 307.83514404296875, + "learning_rate": 9.012000000000001e-06, + "loss": 3187.7328, + "step": 45060 + }, + { + "epoch": 0.09104425150595717, + "grad_norm": 69759.7265625, + "learning_rate": 9.014e-06, + "loss": 9360.0672, + "step": 45070 + }, + { + "epoch": 0.09106445213864098, + "grad_norm": 194851.96875, + "learning_rate": 9.016e-06, + "loss": 9314.6695, + "step": 45080 + }, + { + "epoch": 0.0910846527713248, + "grad_norm": 10450.5498046875, + "learning_rate": 9.018e-06, + "loss": 4673.1934, + "step": 45090 + }, + { + "epoch": 0.09110485340400862, + "grad_norm": 23762.75390625, + "learning_rate": 9.020000000000002e-06, + "loss": 3342.9184, + "step": 45100 + }, + { + "epoch": 0.09112505403669242, + "grad_norm": 212963.234375, + "learning_rate": 9.022000000000001e-06, + "loss": 7952.7312, + "step": 45110 + }, + { + "epoch": 0.09114525466937624, + "grad_norm": 38524.87109375, + "learning_rate": 9.024e-06, + "loss": 5604.0719, + "step": 45120 + }, + { + "epoch": 0.09116545530206006, + "grad_norm": 11886.5634765625, + "learning_rate": 9.026e-06, + "loss": 8736.4359, + "step": 45130 + }, + { + "epoch": 0.09118565593474387, + "grad_norm": 12573.654296875, + "learning_rate": 9.028e-06, + "loss": 4839.2652, + "step": 45140 + }, + { + "epoch": 0.09120585656742769, + "grad_norm": 1477.5821533203125, + "learning_rate": 9.030000000000002e-06, + "loss": 8361.95, + "step": 45150 + }, + { + "epoch": 0.09122605720011151, + "grad_norm": 1251.0693359375, + "learning_rate": 9.032000000000001e-06, + "loss": 5228.8305, + "step": 45160 + }, + { + "epoch": 0.09124625783279533, + "grad_norm": 24380.09375, + "learning_rate": 9.034e-06, + "loss": 5879.4234, + "step": 45170 + }, + { + "epoch": 0.09126645846547914, + "grad_norm": 2014.7879638671875, + "learning_rate": 9.036e-06, + "loss": 3093.9766, + "step": 45180 + }, + { + "epoch": 0.09128665909816296, + "grad_norm": 256453.984375, + "learning_rate": 9.038e-06, + "loss": 12677.1656, + "step": 45190 + }, + { + "epoch": 0.09130685973084678, + "grad_norm": 5344.60498046875, + "learning_rate": 9.040000000000002e-06, + "loss": 2981.8299, + "step": 45200 + }, + { + "epoch": 0.09132706036353058, + "grad_norm": 81292.1796875, + "learning_rate": 9.042e-06, + "loss": 3773.6687, + "step": 45210 + }, + { + "epoch": 0.0913472609962144, + "grad_norm": 2995.000732421875, + "learning_rate": 9.044e-06, + "loss": 6166.5066, + "step": 45220 + }, + { + "epoch": 0.09136746162889822, + "grad_norm": 2004.9326171875, + "learning_rate": 9.046000000000001e-06, + "loss": 3593.4773, + "step": 45230 + }, + { + "epoch": 0.09138766226158203, + "grad_norm": 21988.18359375, + "learning_rate": 9.048e-06, + "loss": 7405.3391, + "step": 45240 + }, + { + "epoch": 0.09140786289426585, + "grad_norm": 369.7669677734375, + "learning_rate": 9.050000000000001e-06, + "loss": 13698.2141, + "step": 45250 + }, + { + "epoch": 0.09142806352694967, + "grad_norm": 1243.689208984375, + "learning_rate": 9.052e-06, + "loss": 5927.1449, + "step": 45260 + }, + { + "epoch": 0.09144826415963347, + "grad_norm": 13871.978515625, + "learning_rate": 9.054e-06, + "loss": 5810.3297, + "step": 45270 + }, + { + "epoch": 0.0914684647923173, + "grad_norm": 185.1154022216797, + "learning_rate": 9.056000000000001e-06, + "loss": 1840.3484, + "step": 45280 + }, + { + "epoch": 0.09148866542500111, + "grad_norm": 2715.781982421875, + "learning_rate": 9.058000000000002e-06, + "loss": 7410.3687, + "step": 45290 + }, + { + "epoch": 0.09150886605768492, + "grad_norm": 12139.1982421875, + "learning_rate": 9.060000000000001e-06, + "loss": 2663.2486, + "step": 45300 + }, + { + "epoch": 0.09152906669036874, + "grad_norm": 65403.54296875, + "learning_rate": 9.062e-06, + "loss": 9852.95, + "step": 45310 + }, + { + "epoch": 0.09154926732305256, + "grad_norm": 259147.234375, + "learning_rate": 9.064e-06, + "loss": 8786.7789, + "step": 45320 + }, + { + "epoch": 0.09156946795573638, + "grad_norm": 3887.17724609375, + "learning_rate": 9.066000000000001e-06, + "loss": 6760.0023, + "step": 45330 + }, + { + "epoch": 0.09158966858842019, + "grad_norm": 2139.015869140625, + "learning_rate": 9.068000000000002e-06, + "loss": 6629.293, + "step": 45340 + }, + { + "epoch": 0.091609869221104, + "grad_norm": 51249.48046875, + "learning_rate": 9.070000000000001e-06, + "loss": 8673.5859, + "step": 45350 + }, + { + "epoch": 0.09163006985378783, + "grad_norm": 22458.09765625, + "learning_rate": 9.072e-06, + "loss": 5059.2367, + "step": 45360 + }, + { + "epoch": 0.09165027048647163, + "grad_norm": 1069.78515625, + "learning_rate": 9.074e-06, + "loss": 4965.7598, + "step": 45370 + }, + { + "epoch": 0.09167047111915545, + "grad_norm": 5201.2197265625, + "learning_rate": 9.076000000000001e-06, + "loss": 4958.8719, + "step": 45380 + }, + { + "epoch": 0.09169067175183927, + "grad_norm": 1508.6400146484375, + "learning_rate": 9.078000000000002e-06, + "loss": 11359.5758, + "step": 45390 + }, + { + "epoch": 0.09171087238452308, + "grad_norm": 54289.91015625, + "learning_rate": 9.080000000000001e-06, + "loss": 6661.8602, + "step": 45400 + }, + { + "epoch": 0.0917310730172069, + "grad_norm": 35893.80859375, + "learning_rate": 9.082e-06, + "loss": 7087.2539, + "step": 45410 + }, + { + "epoch": 0.09175127364989072, + "grad_norm": 114027.0625, + "learning_rate": 9.084e-06, + "loss": 7135.45, + "step": 45420 + }, + { + "epoch": 0.09177147428257452, + "grad_norm": 2027.4510498046875, + "learning_rate": 9.086e-06, + "loss": 1882.9342, + "step": 45430 + }, + { + "epoch": 0.09179167491525834, + "grad_norm": 3394.994873046875, + "learning_rate": 9.088000000000002e-06, + "loss": 6164.6016, + "step": 45440 + }, + { + "epoch": 0.09181187554794216, + "grad_norm": 24439.33203125, + "learning_rate": 9.090000000000001e-06, + "loss": 10926.343, + "step": 45450 + }, + { + "epoch": 0.09183207618062597, + "grad_norm": 177395.0625, + "learning_rate": 9.092e-06, + "loss": 9148.5625, + "step": 45460 + }, + { + "epoch": 0.09185227681330979, + "grad_norm": 15087.3095703125, + "learning_rate": 9.094000000000001e-06, + "loss": 5772.2055, + "step": 45470 + }, + { + "epoch": 0.09187247744599361, + "grad_norm": 2095.62255859375, + "learning_rate": 9.096e-06, + "loss": 3755.8699, + "step": 45480 + }, + { + "epoch": 0.09189267807867743, + "grad_norm": 448.9129638671875, + "learning_rate": 9.098000000000002e-06, + "loss": 2594.3586, + "step": 45490 + }, + { + "epoch": 0.09191287871136124, + "grad_norm": 3046.087646484375, + "learning_rate": 9.100000000000001e-06, + "loss": 4410.6641, + "step": 45500 + }, + { + "epoch": 0.09193307934404506, + "grad_norm": 79700.34375, + "learning_rate": 9.102e-06, + "loss": 6636.8164, + "step": 45510 + }, + { + "epoch": 0.09195327997672888, + "grad_norm": 88157.703125, + "learning_rate": 9.104000000000001e-06, + "loss": 7356.5711, + "step": 45520 + }, + { + "epoch": 0.09197348060941268, + "grad_norm": 23094.861328125, + "learning_rate": 9.106e-06, + "loss": 5875.7207, + "step": 45530 + }, + { + "epoch": 0.0919936812420965, + "grad_norm": 1934.6251220703125, + "learning_rate": 9.108000000000002e-06, + "loss": 4954.4645, + "step": 45540 + }, + { + "epoch": 0.09201388187478032, + "grad_norm": 16462.630859375, + "learning_rate": 9.110000000000001e-06, + "loss": 3274.5391, + "step": 45550 + }, + { + "epoch": 0.09203408250746413, + "grad_norm": 192045.78125, + "learning_rate": 9.112e-06, + "loss": 8839.1375, + "step": 45560 + }, + { + "epoch": 0.09205428314014795, + "grad_norm": 4044.241943359375, + "learning_rate": 9.114000000000001e-06, + "loss": 2314.3668, + "step": 45570 + }, + { + "epoch": 0.09207448377283177, + "grad_norm": 24937.076171875, + "learning_rate": 9.116e-06, + "loss": 2900.9447, + "step": 45580 + }, + { + "epoch": 0.09209468440551558, + "grad_norm": 56201.36328125, + "learning_rate": 9.118000000000001e-06, + "loss": 6304.8727, + "step": 45590 + }, + { + "epoch": 0.0921148850381994, + "grad_norm": 15185.01171875, + "learning_rate": 9.12e-06, + "loss": 5909.8586, + "step": 45600 + }, + { + "epoch": 0.09213508567088322, + "grad_norm": 5175.0205078125, + "learning_rate": 9.122e-06, + "loss": 12448.8672, + "step": 45610 + }, + { + "epoch": 0.09215528630356702, + "grad_norm": 33842.92578125, + "learning_rate": 9.124000000000001e-06, + "loss": 8155.6305, + "step": 45620 + }, + { + "epoch": 0.09217548693625084, + "grad_norm": 2162.885986328125, + "learning_rate": 9.126e-06, + "loss": 3629.2465, + "step": 45630 + }, + { + "epoch": 0.09219568756893466, + "grad_norm": 10125.0146484375, + "learning_rate": 9.128e-06, + "loss": 4950.7305, + "step": 45640 + }, + { + "epoch": 0.09221588820161847, + "grad_norm": 17154.53515625, + "learning_rate": 9.13e-06, + "loss": 6038.0195, + "step": 45650 + }, + { + "epoch": 0.09223608883430229, + "grad_norm": 27215.51171875, + "learning_rate": 9.132000000000002e-06, + "loss": 4933.643, + "step": 45660 + }, + { + "epoch": 0.09225628946698611, + "grad_norm": 336120.03125, + "learning_rate": 9.134000000000001e-06, + "loss": 15923.9344, + "step": 45670 + }, + { + "epoch": 0.09227649009966993, + "grad_norm": 12176.833984375, + "learning_rate": 9.136e-06, + "loss": 2942.5279, + "step": 45680 + }, + { + "epoch": 0.09229669073235373, + "grad_norm": 44145.20703125, + "learning_rate": 9.138e-06, + "loss": 4927.9656, + "step": 45690 + }, + { + "epoch": 0.09231689136503755, + "grad_norm": 1163.8741455078125, + "learning_rate": 9.14e-06, + "loss": 6931.807, + "step": 45700 + }, + { + "epoch": 0.09233709199772137, + "grad_norm": 15656.3994140625, + "learning_rate": 9.142000000000002e-06, + "loss": 4629.0125, + "step": 45710 + }, + { + "epoch": 0.09235729263040518, + "grad_norm": 24044.76171875, + "learning_rate": 9.144000000000001e-06, + "loss": 11898.4172, + "step": 45720 + }, + { + "epoch": 0.092377493263089, + "grad_norm": 40236.24609375, + "learning_rate": 9.146e-06, + "loss": 2598.0156, + "step": 45730 + }, + { + "epoch": 0.09239769389577282, + "grad_norm": 2166.1494140625, + "learning_rate": 9.148e-06, + "loss": 8958.4367, + "step": 45740 + }, + { + "epoch": 0.09241789452845663, + "grad_norm": 132500.203125, + "learning_rate": 9.15e-06, + "loss": 7624.0406, + "step": 45750 + }, + { + "epoch": 0.09243809516114045, + "grad_norm": 11879.7802734375, + "learning_rate": 9.152000000000001e-06, + "loss": 6264.3676, + "step": 45760 + }, + { + "epoch": 0.09245829579382427, + "grad_norm": 8251.63671875, + "learning_rate": 9.154e-06, + "loss": 4841.4734, + "step": 45770 + }, + { + "epoch": 0.09247849642650807, + "grad_norm": 1542.156005859375, + "learning_rate": 9.156e-06, + "loss": 8263.4234, + "step": 45780 + }, + { + "epoch": 0.09249869705919189, + "grad_norm": 2877.17919921875, + "learning_rate": 9.158e-06, + "loss": 8294.6688, + "step": 45790 + }, + { + "epoch": 0.09251889769187571, + "grad_norm": 1114.4339599609375, + "learning_rate": 9.16e-06, + "loss": 3669.7016, + "step": 45800 + }, + { + "epoch": 0.09253909832455952, + "grad_norm": 24515.18359375, + "learning_rate": 9.162000000000001e-06, + "loss": 8291.525, + "step": 45810 + }, + { + "epoch": 0.09255929895724334, + "grad_norm": 3293.1376953125, + "learning_rate": 9.164e-06, + "loss": 3856.7508, + "step": 45820 + }, + { + "epoch": 0.09257949958992716, + "grad_norm": 667.2909545898438, + "learning_rate": 9.166e-06, + "loss": 9088.1656, + "step": 45830 + }, + { + "epoch": 0.09259970022261098, + "grad_norm": 4996.2822265625, + "learning_rate": 9.168000000000001e-06, + "loss": 19672.9484, + "step": 45840 + }, + { + "epoch": 0.09261990085529478, + "grad_norm": 66330.203125, + "learning_rate": 9.17e-06, + "loss": 9288.3344, + "step": 45850 + }, + { + "epoch": 0.0926401014879786, + "grad_norm": 4278.74609375, + "learning_rate": 9.172000000000001e-06, + "loss": 5522.152, + "step": 45860 + }, + { + "epoch": 0.09266030212066242, + "grad_norm": 23178.546875, + "learning_rate": 9.174e-06, + "loss": 4588.5188, + "step": 45870 + }, + { + "epoch": 0.09268050275334623, + "grad_norm": 1420.1890869140625, + "learning_rate": 9.176e-06, + "loss": 2330.9771, + "step": 45880 + }, + { + "epoch": 0.09270070338603005, + "grad_norm": 21353.662109375, + "learning_rate": 9.178000000000001e-06, + "loss": 3142.3041, + "step": 45890 + }, + { + "epoch": 0.09272090401871387, + "grad_norm": 5567.28857421875, + "learning_rate": 9.180000000000002e-06, + "loss": 4210.7746, + "step": 45900 + }, + { + "epoch": 0.09274110465139768, + "grad_norm": 26970.966796875, + "learning_rate": 9.182000000000001e-06, + "loss": 3815.0465, + "step": 45910 + }, + { + "epoch": 0.0927613052840815, + "grad_norm": 1122.7415771484375, + "learning_rate": 9.184e-06, + "loss": 9768.9727, + "step": 45920 + }, + { + "epoch": 0.09278150591676532, + "grad_norm": 15002.224609375, + "learning_rate": 9.186e-06, + "loss": 6370.3625, + "step": 45930 + }, + { + "epoch": 0.09280170654944912, + "grad_norm": 40305.68359375, + "learning_rate": 9.188e-06, + "loss": 8617.5703, + "step": 45940 + }, + { + "epoch": 0.09282190718213294, + "grad_norm": 25306.2109375, + "learning_rate": 9.190000000000002e-06, + "loss": 3519.8766, + "step": 45950 + }, + { + "epoch": 0.09284210781481676, + "grad_norm": 109719.1875, + "learning_rate": 9.192000000000001e-06, + "loss": 4920.0656, + "step": 45960 + }, + { + "epoch": 0.09286230844750057, + "grad_norm": 1875.274169921875, + "learning_rate": 9.194e-06, + "loss": 3168.8094, + "step": 45970 + }, + { + "epoch": 0.09288250908018439, + "grad_norm": 27304.611328125, + "learning_rate": 9.196e-06, + "loss": 4557.0426, + "step": 45980 + }, + { + "epoch": 0.09290270971286821, + "grad_norm": 22298.22265625, + "learning_rate": 9.198e-06, + "loss": 8618.7742, + "step": 45990 + }, + { + "epoch": 0.09292291034555203, + "grad_norm": 42454.4765625, + "learning_rate": 9.200000000000002e-06, + "loss": 3526.0203, + "step": 46000 + }, + { + "epoch": 0.09294311097823584, + "grad_norm": 2885.59228515625, + "learning_rate": 9.202000000000001e-06, + "loss": 3725.7063, + "step": 46010 + }, + { + "epoch": 0.09296331161091966, + "grad_norm": 63408.359375, + "learning_rate": 9.204e-06, + "loss": 3036.5568, + "step": 46020 + }, + { + "epoch": 0.09298351224360348, + "grad_norm": 38217.625, + "learning_rate": 9.206000000000001e-06, + "loss": 4300.2801, + "step": 46030 + }, + { + "epoch": 0.09300371287628728, + "grad_norm": 14557.4345703125, + "learning_rate": 9.208e-06, + "loss": 2545.9309, + "step": 46040 + }, + { + "epoch": 0.0930239135089711, + "grad_norm": 2085.029296875, + "learning_rate": 9.210000000000002e-06, + "loss": 3587.7176, + "step": 46050 + }, + { + "epoch": 0.09304411414165492, + "grad_norm": 1130.28076171875, + "learning_rate": 9.212000000000001e-06, + "loss": 6832.0555, + "step": 46060 + }, + { + "epoch": 0.09306431477433873, + "grad_norm": 9786.7392578125, + "learning_rate": 9.214e-06, + "loss": 8517.9328, + "step": 46070 + }, + { + "epoch": 0.09308451540702255, + "grad_norm": 18741.796875, + "learning_rate": 9.216000000000001e-06, + "loss": 5865.4359, + "step": 46080 + }, + { + "epoch": 0.09310471603970637, + "grad_norm": 9791.3125, + "learning_rate": 9.218e-06, + "loss": 12482.6367, + "step": 46090 + }, + { + "epoch": 0.09312491667239017, + "grad_norm": 4059.78466796875, + "learning_rate": 9.220000000000002e-06, + "loss": 8661.182, + "step": 46100 + }, + { + "epoch": 0.093145117305074, + "grad_norm": 11239.232421875, + "learning_rate": 9.222e-06, + "loss": 2214.2977, + "step": 46110 + }, + { + "epoch": 0.09316531793775781, + "grad_norm": 4221.32763671875, + "learning_rate": 9.224e-06, + "loss": 5906.3324, + "step": 46120 + }, + { + "epoch": 0.09318551857044162, + "grad_norm": 8046.103515625, + "learning_rate": 9.226000000000001e-06, + "loss": 4754.8355, + "step": 46130 + }, + { + "epoch": 0.09320571920312544, + "grad_norm": 66785.4140625, + "learning_rate": 9.228e-06, + "loss": 4404.4559, + "step": 46140 + }, + { + "epoch": 0.09322591983580926, + "grad_norm": 1944.8724365234375, + "learning_rate": 9.230000000000001e-06, + "loss": 3934.3535, + "step": 46150 + }, + { + "epoch": 0.09324612046849308, + "grad_norm": 13058.998046875, + "learning_rate": 9.232e-06, + "loss": 6967.8727, + "step": 46160 + }, + { + "epoch": 0.09326632110117689, + "grad_norm": 34309.1640625, + "learning_rate": 9.234e-06, + "loss": 10879.1039, + "step": 46170 + }, + { + "epoch": 0.0932865217338607, + "grad_norm": 38579.73828125, + "learning_rate": 9.236000000000001e-06, + "loss": 7511.4102, + "step": 46180 + }, + { + "epoch": 0.09330672236654453, + "grad_norm": 82633.0078125, + "learning_rate": 9.238e-06, + "loss": 16073.1562, + "step": 46190 + }, + { + "epoch": 0.09332692299922833, + "grad_norm": 1225.5526123046875, + "learning_rate": 9.240000000000001e-06, + "loss": 3568.5609, + "step": 46200 + }, + { + "epoch": 0.09334712363191215, + "grad_norm": 8439.3447265625, + "learning_rate": 9.242e-06, + "loss": 9070.1898, + "step": 46210 + }, + { + "epoch": 0.09336732426459597, + "grad_norm": 3103.69287109375, + "learning_rate": 9.244e-06, + "loss": 5445.327, + "step": 46220 + }, + { + "epoch": 0.09338752489727978, + "grad_norm": 38891.81640625, + "learning_rate": 9.246000000000001e-06, + "loss": 3028.3324, + "step": 46230 + }, + { + "epoch": 0.0934077255299636, + "grad_norm": 940.0552368164062, + "learning_rate": 9.248e-06, + "loss": 2554.1268, + "step": 46240 + }, + { + "epoch": 0.09342792616264742, + "grad_norm": 84.91472625732422, + "learning_rate": 9.250000000000001e-06, + "loss": 9402.2516, + "step": 46250 + }, + { + "epoch": 0.09344812679533122, + "grad_norm": 74760.96875, + "learning_rate": 9.252e-06, + "loss": 9638.9969, + "step": 46260 + }, + { + "epoch": 0.09346832742801504, + "grad_norm": 4029.760009765625, + "learning_rate": 9.254000000000002e-06, + "loss": 6205.4062, + "step": 46270 + }, + { + "epoch": 0.09348852806069886, + "grad_norm": 9118.6845703125, + "learning_rate": 9.256e-06, + "loss": 3455.2547, + "step": 46280 + }, + { + "epoch": 0.09350872869338267, + "grad_norm": 49259.08203125, + "learning_rate": 9.258e-06, + "loss": 7907.2547, + "step": 46290 + }, + { + "epoch": 0.09352892932606649, + "grad_norm": 164155.515625, + "learning_rate": 9.260000000000001e-06, + "loss": 12061.5773, + "step": 46300 + }, + { + "epoch": 0.09354912995875031, + "grad_norm": 511.370849609375, + "learning_rate": 9.262e-06, + "loss": 9842.2328, + "step": 46310 + }, + { + "epoch": 0.09356933059143413, + "grad_norm": 36486.47265625, + "learning_rate": 9.264000000000001e-06, + "loss": 8878.0055, + "step": 46320 + }, + { + "epoch": 0.09358953122411794, + "grad_norm": 4981.1826171875, + "learning_rate": 9.266e-06, + "loss": 4999.8668, + "step": 46330 + }, + { + "epoch": 0.09360973185680176, + "grad_norm": 6093.4287109375, + "learning_rate": 9.268e-06, + "loss": 3362.5219, + "step": 46340 + }, + { + "epoch": 0.09362993248948558, + "grad_norm": 9625.982421875, + "learning_rate": 9.270000000000001e-06, + "loss": 6270.6754, + "step": 46350 + }, + { + "epoch": 0.09365013312216938, + "grad_norm": 1284.3067626953125, + "learning_rate": 9.272e-06, + "loss": 10280.7945, + "step": 46360 + }, + { + "epoch": 0.0936703337548532, + "grad_norm": 265.51226806640625, + "learning_rate": 9.274000000000001e-06, + "loss": 4451.5723, + "step": 46370 + }, + { + "epoch": 0.09369053438753702, + "grad_norm": 14686.130859375, + "learning_rate": 9.276e-06, + "loss": 8953.7961, + "step": 46380 + }, + { + "epoch": 0.09371073502022083, + "grad_norm": 25410.13671875, + "learning_rate": 9.278e-06, + "loss": 5124.3336, + "step": 46390 + }, + { + "epoch": 0.09373093565290465, + "grad_norm": 34688.359375, + "learning_rate": 9.280000000000001e-06, + "loss": 9312.3078, + "step": 46400 + }, + { + "epoch": 0.09375113628558847, + "grad_norm": 12255.60546875, + "learning_rate": 9.282e-06, + "loss": 6374.7563, + "step": 46410 + }, + { + "epoch": 0.09377133691827227, + "grad_norm": 104113.9296875, + "learning_rate": 9.284000000000001e-06, + "loss": 9862.1703, + "step": 46420 + }, + { + "epoch": 0.0937915375509561, + "grad_norm": 217900.953125, + "learning_rate": 9.286e-06, + "loss": 9762.1328, + "step": 46430 + }, + { + "epoch": 0.09381173818363991, + "grad_norm": 25723.904296875, + "learning_rate": 9.288e-06, + "loss": 5959.9883, + "step": 46440 + }, + { + "epoch": 0.09383193881632372, + "grad_norm": 69097.140625, + "learning_rate": 9.29e-06, + "loss": 8981.9281, + "step": 46450 + }, + { + "epoch": 0.09385213944900754, + "grad_norm": 10674.5322265625, + "learning_rate": 9.292000000000002e-06, + "loss": 7991.0844, + "step": 46460 + }, + { + "epoch": 0.09387234008169136, + "grad_norm": 83.86109924316406, + "learning_rate": 9.294000000000001e-06, + "loss": 4329.459, + "step": 46470 + }, + { + "epoch": 0.09389254071437518, + "grad_norm": 2718.66162109375, + "learning_rate": 9.296e-06, + "loss": 1034.4901, + "step": 46480 + }, + { + "epoch": 0.09391274134705899, + "grad_norm": 644.408447265625, + "learning_rate": 9.298e-06, + "loss": 2026.9023, + "step": 46490 + }, + { + "epoch": 0.0939329419797428, + "grad_norm": 23457.66015625, + "learning_rate": 9.3e-06, + "loss": 4264.5586, + "step": 46500 + }, + { + "epoch": 0.09395314261242663, + "grad_norm": 914.0183715820312, + "learning_rate": 9.302000000000002e-06, + "loss": 5207.7859, + "step": 46510 + }, + { + "epoch": 0.09397334324511043, + "grad_norm": 5940.56884765625, + "learning_rate": 9.304000000000001e-06, + "loss": 5475.4688, + "step": 46520 + }, + { + "epoch": 0.09399354387779425, + "grad_norm": 4553.59423828125, + "learning_rate": 9.306e-06, + "loss": 1151.8041, + "step": 46530 + }, + { + "epoch": 0.09401374451047807, + "grad_norm": 258.2912292480469, + "learning_rate": 9.308e-06, + "loss": 2264.1494, + "step": 46540 + }, + { + "epoch": 0.09403394514316188, + "grad_norm": 64627.09375, + "learning_rate": 9.31e-06, + "loss": 13929.7406, + "step": 46550 + }, + { + "epoch": 0.0940541457758457, + "grad_norm": 5150.03662109375, + "learning_rate": 9.312000000000002e-06, + "loss": 3827.2664, + "step": 46560 + }, + { + "epoch": 0.09407434640852952, + "grad_norm": 7894.232421875, + "learning_rate": 9.314000000000001e-06, + "loss": 2531.8518, + "step": 46570 + }, + { + "epoch": 0.09409454704121333, + "grad_norm": 1229.4598388671875, + "learning_rate": 9.316e-06, + "loss": 1738.4283, + "step": 46580 + }, + { + "epoch": 0.09411474767389715, + "grad_norm": 14101.2431640625, + "learning_rate": 9.318e-06, + "loss": 8714.5508, + "step": 46590 + }, + { + "epoch": 0.09413494830658097, + "grad_norm": 17774.587890625, + "learning_rate": 9.32e-06, + "loss": 1935.0945, + "step": 46600 + }, + { + "epoch": 0.09415514893926477, + "grad_norm": 12123.982421875, + "learning_rate": 9.322000000000002e-06, + "loss": 3151.7094, + "step": 46610 + }, + { + "epoch": 0.09417534957194859, + "grad_norm": 9709.556640625, + "learning_rate": 9.324000000000001e-06, + "loss": 4611.8598, + "step": 46620 + }, + { + "epoch": 0.09419555020463241, + "grad_norm": 20162.607421875, + "learning_rate": 9.326e-06, + "loss": 3507.332, + "step": 46630 + }, + { + "epoch": 0.09421575083731623, + "grad_norm": 48208.2890625, + "learning_rate": 9.328000000000001e-06, + "loss": 4749.8652, + "step": 46640 + }, + { + "epoch": 0.09423595147000004, + "grad_norm": 32503.6015625, + "learning_rate": 9.33e-06, + "loss": 14031.4328, + "step": 46650 + }, + { + "epoch": 0.09425615210268386, + "grad_norm": 4080.77587890625, + "learning_rate": 9.332000000000001e-06, + "loss": 3413.0656, + "step": 46660 + }, + { + "epoch": 0.09427635273536768, + "grad_norm": 98219.625, + "learning_rate": 9.334e-06, + "loss": 8663.4672, + "step": 46670 + }, + { + "epoch": 0.09429655336805148, + "grad_norm": 17570.828125, + "learning_rate": 9.336e-06, + "loss": 3510.2805, + "step": 46680 + }, + { + "epoch": 0.0943167540007353, + "grad_norm": 7439.98974609375, + "learning_rate": 9.338000000000001e-06, + "loss": 2928.5629, + "step": 46690 + }, + { + "epoch": 0.09433695463341912, + "grad_norm": 25933.634765625, + "learning_rate": 9.340000000000002e-06, + "loss": 2367.9191, + "step": 46700 + }, + { + "epoch": 0.09435715526610293, + "grad_norm": 194101.09375, + "learning_rate": 9.342000000000001e-06, + "loss": 8084.3891, + "step": 46710 + }, + { + "epoch": 0.09437735589878675, + "grad_norm": 53103.71484375, + "learning_rate": 9.344e-06, + "loss": 3100.3139, + "step": 46720 + }, + { + "epoch": 0.09439755653147057, + "grad_norm": 415.0269775390625, + "learning_rate": 9.346e-06, + "loss": 3247.7117, + "step": 46730 + }, + { + "epoch": 0.09441775716415438, + "grad_norm": 1854.7586669921875, + "learning_rate": 9.348000000000001e-06, + "loss": 1495.726, + "step": 46740 + }, + { + "epoch": 0.0944379577968382, + "grad_norm": 311.8507385253906, + "learning_rate": 9.350000000000002e-06, + "loss": 5506.3805, + "step": 46750 + }, + { + "epoch": 0.09445815842952202, + "grad_norm": 16338.6640625, + "learning_rate": 9.352000000000001e-06, + "loss": 6441.2105, + "step": 46760 + }, + { + "epoch": 0.09447835906220582, + "grad_norm": 10321.171875, + "learning_rate": 9.354e-06, + "loss": 3490.9875, + "step": 46770 + }, + { + "epoch": 0.09449855969488964, + "grad_norm": 735.6342163085938, + "learning_rate": 9.356e-06, + "loss": 2759.2555, + "step": 46780 + }, + { + "epoch": 0.09451876032757346, + "grad_norm": 8917.943359375, + "learning_rate": 9.358000000000001e-06, + "loss": 4562.3246, + "step": 46790 + }, + { + "epoch": 0.09453896096025728, + "grad_norm": 66946.609375, + "learning_rate": 9.360000000000002e-06, + "loss": 8386.0984, + "step": 46800 + }, + { + "epoch": 0.09455916159294109, + "grad_norm": 10159.080078125, + "learning_rate": 9.362000000000001e-06, + "loss": 9007.2789, + "step": 46810 + }, + { + "epoch": 0.09457936222562491, + "grad_norm": 15049.443359375, + "learning_rate": 9.364e-06, + "loss": 5117.3043, + "step": 46820 + }, + { + "epoch": 0.09459956285830873, + "grad_norm": 2521.685791015625, + "learning_rate": 9.366000000000001e-06, + "loss": 4702.5836, + "step": 46830 + }, + { + "epoch": 0.09461976349099253, + "grad_norm": 5852.55859375, + "learning_rate": 9.368e-06, + "loss": 3817.5547, + "step": 46840 + }, + { + "epoch": 0.09463996412367635, + "grad_norm": 12986.7099609375, + "learning_rate": 9.370000000000002e-06, + "loss": 3558.2465, + "step": 46850 + }, + { + "epoch": 0.09466016475636017, + "grad_norm": 1340.9345703125, + "learning_rate": 9.372000000000001e-06, + "loss": 4421.5695, + "step": 46860 + }, + { + "epoch": 0.09468036538904398, + "grad_norm": 121608.5078125, + "learning_rate": 9.374e-06, + "loss": 5966.2773, + "step": 46870 + }, + { + "epoch": 0.0947005660217278, + "grad_norm": 14761.32421875, + "learning_rate": 9.376000000000001e-06, + "loss": 5113.3508, + "step": 46880 + }, + { + "epoch": 0.09472076665441162, + "grad_norm": 49544.2109375, + "learning_rate": 9.378e-06, + "loss": 3286.6906, + "step": 46890 + }, + { + "epoch": 0.09474096728709543, + "grad_norm": 2084.42822265625, + "learning_rate": 9.38e-06, + "loss": 12649.8797, + "step": 46900 + }, + { + "epoch": 0.09476116791977925, + "grad_norm": 15925.80078125, + "learning_rate": 9.382000000000001e-06, + "loss": 2080.118, + "step": 46910 + }, + { + "epoch": 0.09478136855246307, + "grad_norm": 7628.59912109375, + "learning_rate": 9.384e-06, + "loss": 1418.2197, + "step": 46920 + }, + { + "epoch": 0.09480156918514687, + "grad_norm": 16361.4541015625, + "learning_rate": 9.386000000000001e-06, + "loss": 5125.4301, + "step": 46930 + }, + { + "epoch": 0.09482176981783069, + "grad_norm": 16435.08203125, + "learning_rate": 9.388e-06, + "loss": 7992.8922, + "step": 46940 + }, + { + "epoch": 0.09484197045051451, + "grad_norm": 1749.7650146484375, + "learning_rate": 9.39e-06, + "loss": 1934.1242, + "step": 46950 + }, + { + "epoch": 0.09486217108319833, + "grad_norm": 2487.18701171875, + "learning_rate": 9.392000000000001e-06, + "loss": 3328.8379, + "step": 46960 + }, + { + "epoch": 0.09488237171588214, + "grad_norm": 204289.296875, + "learning_rate": 9.394e-06, + "loss": 6793.5992, + "step": 46970 + }, + { + "epoch": 0.09490257234856596, + "grad_norm": 203122.734375, + "learning_rate": 9.396000000000001e-06, + "loss": 12966.4547, + "step": 46980 + }, + { + "epoch": 0.09492277298124978, + "grad_norm": 25091.40625, + "learning_rate": 9.398e-06, + "loss": 6343.2387, + "step": 46990 + }, + { + "epoch": 0.09494297361393358, + "grad_norm": 98504.2109375, + "learning_rate": 9.4e-06, + "loss": 11029.3117, + "step": 47000 + }, + { + "epoch": 0.0949631742466174, + "grad_norm": 32749.228515625, + "learning_rate": 9.402e-06, + "loss": 3066.7611, + "step": 47010 + }, + { + "epoch": 0.09498337487930122, + "grad_norm": 6009.70068359375, + "learning_rate": 9.404e-06, + "loss": 2166.6316, + "step": 47020 + }, + { + "epoch": 0.09500357551198503, + "grad_norm": 77504.625, + "learning_rate": 9.406000000000001e-06, + "loss": 7715.3703, + "step": 47030 + }, + { + "epoch": 0.09502377614466885, + "grad_norm": 2134.14111328125, + "learning_rate": 9.408e-06, + "loss": 5578.3789, + "step": 47040 + }, + { + "epoch": 0.09504397677735267, + "grad_norm": 30237.390625, + "learning_rate": 9.41e-06, + "loss": 10153.0, + "step": 47050 + }, + { + "epoch": 0.09506417741003648, + "grad_norm": 292.236328125, + "learning_rate": 9.412e-06, + "loss": 3779.7629, + "step": 47060 + }, + { + "epoch": 0.0950843780427203, + "grad_norm": 40110.0, + "learning_rate": 9.414000000000002e-06, + "loss": 6635.8953, + "step": 47070 + }, + { + "epoch": 0.09510457867540412, + "grad_norm": 3829.6435546875, + "learning_rate": 9.416000000000001e-06, + "loss": 2315.365, + "step": 47080 + }, + { + "epoch": 0.09512477930808792, + "grad_norm": 3715.175537109375, + "learning_rate": 9.418e-06, + "loss": 4051.1758, + "step": 47090 + }, + { + "epoch": 0.09514497994077174, + "grad_norm": 33079.67578125, + "learning_rate": 9.42e-06, + "loss": 2316.802, + "step": 47100 + }, + { + "epoch": 0.09516518057345556, + "grad_norm": 68306.8984375, + "learning_rate": 9.422e-06, + "loss": 10724.9195, + "step": 47110 + }, + { + "epoch": 0.09518538120613938, + "grad_norm": 23591.0546875, + "learning_rate": 9.424000000000002e-06, + "loss": 3454.2422, + "step": 47120 + }, + { + "epoch": 0.09520558183882319, + "grad_norm": 4547.474609375, + "learning_rate": 9.426000000000001e-06, + "loss": 4483.2848, + "step": 47130 + }, + { + "epoch": 0.09522578247150701, + "grad_norm": 5645.38525390625, + "learning_rate": 9.428e-06, + "loss": 8069.168, + "step": 47140 + }, + { + "epoch": 0.09524598310419083, + "grad_norm": 4792.5556640625, + "learning_rate": 9.43e-06, + "loss": 3330.9336, + "step": 47150 + }, + { + "epoch": 0.09526618373687464, + "grad_norm": 2036.1781005859375, + "learning_rate": 9.432e-06, + "loss": 2033.4641, + "step": 47160 + }, + { + "epoch": 0.09528638436955846, + "grad_norm": 20107.673828125, + "learning_rate": 9.434000000000001e-06, + "loss": 1136.0939, + "step": 47170 + }, + { + "epoch": 0.09530658500224228, + "grad_norm": 940.0614013671875, + "learning_rate": 9.436e-06, + "loss": 2734.2383, + "step": 47180 + }, + { + "epoch": 0.09532678563492608, + "grad_norm": 3218.762451171875, + "learning_rate": 9.438e-06, + "loss": 7951.7578, + "step": 47190 + }, + { + "epoch": 0.0953469862676099, + "grad_norm": 2993.165771484375, + "learning_rate": 9.440000000000001e-06, + "loss": 4516.1059, + "step": 47200 + }, + { + "epoch": 0.09536718690029372, + "grad_norm": 8240.9765625, + "learning_rate": 9.442e-06, + "loss": 4972.227, + "step": 47210 + }, + { + "epoch": 0.09538738753297753, + "grad_norm": 9429.712890625, + "learning_rate": 9.444000000000001e-06, + "loss": 3081.1863, + "step": 47220 + }, + { + "epoch": 0.09540758816566135, + "grad_norm": 46983.12109375, + "learning_rate": 9.446e-06, + "loss": 5770.7176, + "step": 47230 + }, + { + "epoch": 0.09542778879834517, + "grad_norm": 44515.109375, + "learning_rate": 9.448e-06, + "loss": 11691.0266, + "step": 47240 + }, + { + "epoch": 0.09544798943102897, + "grad_norm": 23918.205078125, + "learning_rate": 9.450000000000001e-06, + "loss": 12352.1219, + "step": 47250 + }, + { + "epoch": 0.0954681900637128, + "grad_norm": 8759.10546875, + "learning_rate": 9.452000000000002e-06, + "loss": 9468.7219, + "step": 47260 + }, + { + "epoch": 0.09548839069639661, + "grad_norm": 25946.177734375, + "learning_rate": 9.454000000000001e-06, + "loss": 6491.6938, + "step": 47270 + }, + { + "epoch": 0.09550859132908043, + "grad_norm": 6641.51123046875, + "learning_rate": 9.456e-06, + "loss": 6204.4387, + "step": 47280 + }, + { + "epoch": 0.09552879196176424, + "grad_norm": 73697.1796875, + "learning_rate": 9.458e-06, + "loss": 16863.9719, + "step": 47290 + }, + { + "epoch": 0.09554899259444806, + "grad_norm": 10206.9453125, + "learning_rate": 9.460000000000001e-06, + "loss": 11295.9977, + "step": 47300 + }, + { + "epoch": 0.09556919322713188, + "grad_norm": 1228.3505859375, + "learning_rate": 9.462000000000002e-06, + "loss": 6390.0637, + "step": 47310 + }, + { + "epoch": 0.09558939385981569, + "grad_norm": 2728.415283203125, + "learning_rate": 9.464000000000001e-06, + "loss": 4516.6395, + "step": 47320 + }, + { + "epoch": 0.0956095944924995, + "grad_norm": 16846.513671875, + "learning_rate": 9.466e-06, + "loss": 5023.416, + "step": 47330 + }, + { + "epoch": 0.09562979512518333, + "grad_norm": 36077.0, + "learning_rate": 9.468e-06, + "loss": 9104.7648, + "step": 47340 + }, + { + "epoch": 0.09564999575786713, + "grad_norm": 10577.4248046875, + "learning_rate": 9.47e-06, + "loss": 9920.0305, + "step": 47350 + }, + { + "epoch": 0.09567019639055095, + "grad_norm": 664.6073608398438, + "learning_rate": 9.472000000000002e-06, + "loss": 7150.1812, + "step": 47360 + }, + { + "epoch": 0.09569039702323477, + "grad_norm": 61756.63671875, + "learning_rate": 9.474000000000001e-06, + "loss": 6609.5437, + "step": 47370 + }, + { + "epoch": 0.09571059765591858, + "grad_norm": 50364.87109375, + "learning_rate": 9.476e-06, + "loss": 11505.993, + "step": 47380 + }, + { + "epoch": 0.0957307982886024, + "grad_norm": 6315.095703125, + "learning_rate": 9.478e-06, + "loss": 2740.2295, + "step": 47390 + }, + { + "epoch": 0.09575099892128622, + "grad_norm": 8221.9716796875, + "learning_rate": 9.48e-06, + "loss": 7283.35, + "step": 47400 + }, + { + "epoch": 0.09577119955397002, + "grad_norm": 19968.375, + "learning_rate": 9.482000000000002e-06, + "loss": 7442.1648, + "step": 47410 + }, + { + "epoch": 0.09579140018665384, + "grad_norm": 5585.2353515625, + "learning_rate": 9.484000000000001e-06, + "loss": 2699.4287, + "step": 47420 + }, + { + "epoch": 0.09581160081933766, + "grad_norm": 4435.9375, + "learning_rate": 9.486e-06, + "loss": 10388.9961, + "step": 47430 + }, + { + "epoch": 0.09583180145202148, + "grad_norm": 49169.921875, + "learning_rate": 9.488000000000001e-06, + "loss": 6717.7648, + "step": 47440 + }, + { + "epoch": 0.09585200208470529, + "grad_norm": 50405.1328125, + "learning_rate": 9.49e-06, + "loss": 8107.7094, + "step": 47450 + }, + { + "epoch": 0.09587220271738911, + "grad_norm": 411.3293762207031, + "learning_rate": 9.492000000000002e-06, + "loss": 4217.127, + "step": 47460 + }, + { + "epoch": 0.09589240335007293, + "grad_norm": 16523.638671875, + "learning_rate": 9.494000000000001e-06, + "loss": 4579.4945, + "step": 47470 + }, + { + "epoch": 0.09591260398275674, + "grad_norm": 96516.2734375, + "learning_rate": 9.496e-06, + "loss": 6757.6344, + "step": 47480 + }, + { + "epoch": 0.09593280461544056, + "grad_norm": 98359.2109375, + "learning_rate": 9.498000000000001e-06, + "loss": 7163.993, + "step": 47490 + }, + { + "epoch": 0.09595300524812438, + "grad_norm": 2577.853515625, + "learning_rate": 9.5e-06, + "loss": 4237.825, + "step": 47500 + }, + { + "epoch": 0.09597320588080818, + "grad_norm": 9542.0517578125, + "learning_rate": 9.502000000000002e-06, + "loss": 3454.8594, + "step": 47510 + }, + { + "epoch": 0.095993406513492, + "grad_norm": 8879.5146484375, + "learning_rate": 9.504e-06, + "loss": 4651.9281, + "step": 47520 + }, + { + "epoch": 0.09601360714617582, + "grad_norm": 3509.4443359375, + "learning_rate": 9.506e-06, + "loss": 12266.3797, + "step": 47530 + }, + { + "epoch": 0.09603380777885963, + "grad_norm": 6651.50439453125, + "learning_rate": 9.508000000000001e-06, + "loss": 6134.0578, + "step": 47540 + }, + { + "epoch": 0.09605400841154345, + "grad_norm": 16111.640625, + "learning_rate": 9.51e-06, + "loss": 3982.8402, + "step": 47550 + }, + { + "epoch": 0.09607420904422727, + "grad_norm": 25727.71484375, + "learning_rate": 9.512000000000001e-06, + "loss": 6860.4383, + "step": 47560 + }, + { + "epoch": 0.09609440967691107, + "grad_norm": 69216.0703125, + "learning_rate": 9.514e-06, + "loss": 13682.8141, + "step": 47570 + }, + { + "epoch": 0.0961146103095949, + "grad_norm": 24299.638671875, + "learning_rate": 9.516e-06, + "loss": 4662.05, + "step": 47580 + }, + { + "epoch": 0.09613481094227871, + "grad_norm": 4676.32373046875, + "learning_rate": 9.518000000000001e-06, + "loss": 4760.3676, + "step": 47590 + }, + { + "epoch": 0.09615501157496253, + "grad_norm": 12706.15625, + "learning_rate": 9.52e-06, + "loss": 5287.8672, + "step": 47600 + }, + { + "epoch": 0.09617521220764634, + "grad_norm": 71997.5, + "learning_rate": 9.522000000000001e-06, + "loss": 3623.116, + "step": 47610 + }, + { + "epoch": 0.09619541284033016, + "grad_norm": 67700.6640625, + "learning_rate": 9.524e-06, + "loss": 4410.275, + "step": 47620 + }, + { + "epoch": 0.09621561347301398, + "grad_norm": 5315.2158203125, + "learning_rate": 9.526000000000002e-06, + "loss": 6401.4727, + "step": 47630 + }, + { + "epoch": 0.09623581410569779, + "grad_norm": 1375.597900390625, + "learning_rate": 9.528000000000001e-06, + "loss": 8637.7031, + "step": 47640 + }, + { + "epoch": 0.0962560147383816, + "grad_norm": 70517.359375, + "learning_rate": 9.53e-06, + "loss": 7415.5531, + "step": 47650 + }, + { + "epoch": 0.09627621537106543, + "grad_norm": 8431.51953125, + "learning_rate": 9.532000000000001e-06, + "loss": 6234.1832, + "step": 47660 + }, + { + "epoch": 0.09629641600374923, + "grad_norm": 337.2398376464844, + "learning_rate": 9.534e-06, + "loss": 5457.3805, + "step": 47670 + }, + { + "epoch": 0.09631661663643305, + "grad_norm": 3473.8828125, + "learning_rate": 9.536000000000002e-06, + "loss": 7708.1203, + "step": 47680 + }, + { + "epoch": 0.09633681726911687, + "grad_norm": 1580.6319580078125, + "learning_rate": 9.538e-06, + "loss": 1501.1697, + "step": 47690 + }, + { + "epoch": 0.09635701790180068, + "grad_norm": 64382.34765625, + "learning_rate": 9.54e-06, + "loss": 5400.5281, + "step": 47700 + }, + { + "epoch": 0.0963772185344845, + "grad_norm": 8359.2255859375, + "learning_rate": 9.542000000000001e-06, + "loss": 4106.793, + "step": 47710 + }, + { + "epoch": 0.09639741916716832, + "grad_norm": 17185.4375, + "learning_rate": 9.544e-06, + "loss": 5722.0875, + "step": 47720 + }, + { + "epoch": 0.09641761979985213, + "grad_norm": 1718.479736328125, + "learning_rate": 9.546000000000001e-06, + "loss": 3107.1086, + "step": 47730 + }, + { + "epoch": 0.09643782043253595, + "grad_norm": 6674.9267578125, + "learning_rate": 9.548e-06, + "loss": 3371.6508, + "step": 47740 + }, + { + "epoch": 0.09645802106521977, + "grad_norm": 180.99594116210938, + "learning_rate": 9.55e-06, + "loss": 5236.7711, + "step": 47750 + }, + { + "epoch": 0.09647822169790359, + "grad_norm": 877.25732421875, + "learning_rate": 9.552000000000001e-06, + "loss": 2142.725, + "step": 47760 + }, + { + "epoch": 0.09649842233058739, + "grad_norm": 71860.6015625, + "learning_rate": 9.554e-06, + "loss": 8043.593, + "step": 47770 + }, + { + "epoch": 0.09651862296327121, + "grad_norm": 8577.7392578125, + "learning_rate": 9.556000000000001e-06, + "loss": 4044.4707, + "step": 47780 + }, + { + "epoch": 0.09653882359595503, + "grad_norm": 145491.015625, + "learning_rate": 9.558e-06, + "loss": 11099.3773, + "step": 47790 + }, + { + "epoch": 0.09655902422863884, + "grad_norm": 5416.5830078125, + "learning_rate": 9.56e-06, + "loss": 11254.2641, + "step": 47800 + }, + { + "epoch": 0.09657922486132266, + "grad_norm": 4096.50732421875, + "learning_rate": 9.562000000000001e-06, + "loss": 10742.8695, + "step": 47810 + }, + { + "epoch": 0.09659942549400648, + "grad_norm": 7864.40478515625, + "learning_rate": 9.564e-06, + "loss": 8514.9008, + "step": 47820 + }, + { + "epoch": 0.09661962612669028, + "grad_norm": 13343.1904296875, + "learning_rate": 9.566000000000001e-06, + "loss": 3932.5551, + "step": 47830 + }, + { + "epoch": 0.0966398267593741, + "grad_norm": 31615.05078125, + "learning_rate": 9.568e-06, + "loss": 3842.2656, + "step": 47840 + }, + { + "epoch": 0.09666002739205792, + "grad_norm": 141849.34375, + "learning_rate": 9.57e-06, + "loss": 11805.2273, + "step": 47850 + }, + { + "epoch": 0.09668022802474173, + "grad_norm": 11413.2783203125, + "learning_rate": 9.572000000000001e-06, + "loss": 9315.9828, + "step": 47860 + }, + { + "epoch": 0.09670042865742555, + "grad_norm": 28708.41015625, + "learning_rate": 9.574000000000002e-06, + "loss": 4677.1148, + "step": 47870 + }, + { + "epoch": 0.09672062929010937, + "grad_norm": 26475.681640625, + "learning_rate": 9.576000000000001e-06, + "loss": 10930.8211, + "step": 47880 + }, + { + "epoch": 0.09674082992279318, + "grad_norm": 11038.482421875, + "learning_rate": 9.578e-06, + "loss": 3333.7879, + "step": 47890 + }, + { + "epoch": 0.096761030555477, + "grad_norm": 5486.94140625, + "learning_rate": 9.58e-06, + "loss": 3118.8812, + "step": 47900 + }, + { + "epoch": 0.09678123118816082, + "grad_norm": 65763.7890625, + "learning_rate": 9.582e-06, + "loss": 7035.4219, + "step": 47910 + }, + { + "epoch": 0.09680143182084464, + "grad_norm": 10144.52734375, + "learning_rate": 9.584000000000002e-06, + "loss": 2075.0854, + "step": 47920 + }, + { + "epoch": 0.09682163245352844, + "grad_norm": 10503.4130859375, + "learning_rate": 9.586000000000001e-06, + "loss": 4782.7566, + "step": 47930 + }, + { + "epoch": 0.09684183308621226, + "grad_norm": 6360.97705078125, + "learning_rate": 9.588e-06, + "loss": 5232.7066, + "step": 47940 + }, + { + "epoch": 0.09686203371889608, + "grad_norm": 19298.16015625, + "learning_rate": 9.59e-06, + "loss": 5403.5223, + "step": 47950 + }, + { + "epoch": 0.09688223435157989, + "grad_norm": 2491.8076171875, + "learning_rate": 9.592e-06, + "loss": 1350.8729, + "step": 47960 + }, + { + "epoch": 0.09690243498426371, + "grad_norm": 2936.600830078125, + "learning_rate": 9.594000000000002e-06, + "loss": 5235.9066, + "step": 47970 + }, + { + "epoch": 0.09692263561694753, + "grad_norm": 33862.48828125, + "learning_rate": 9.596000000000001e-06, + "loss": 3899.4129, + "step": 47980 + }, + { + "epoch": 0.09694283624963133, + "grad_norm": 2514.659423828125, + "learning_rate": 9.598e-06, + "loss": 6785.6234, + "step": 47990 + }, + { + "epoch": 0.09696303688231515, + "grad_norm": 23165.9375, + "learning_rate": 9.600000000000001e-06, + "loss": 7124.8438, + "step": 48000 + }, + { + "epoch": 0.09698323751499897, + "grad_norm": 975.541015625, + "learning_rate": 9.602e-06, + "loss": 8480.8281, + "step": 48010 + }, + { + "epoch": 0.09700343814768278, + "grad_norm": 2287.907958984375, + "learning_rate": 9.604000000000002e-06, + "loss": 5334.8059, + "step": 48020 + }, + { + "epoch": 0.0970236387803666, + "grad_norm": 7279.2255859375, + "learning_rate": 9.606000000000001e-06, + "loss": 8860.5555, + "step": 48030 + }, + { + "epoch": 0.09704383941305042, + "grad_norm": 1519.546630859375, + "learning_rate": 9.608e-06, + "loss": 2761.1773, + "step": 48040 + }, + { + "epoch": 0.09706404004573423, + "grad_norm": 9271.9501953125, + "learning_rate": 9.610000000000001e-06, + "loss": 6380.0785, + "step": 48050 + }, + { + "epoch": 0.09708424067841805, + "grad_norm": 17695.109375, + "learning_rate": 9.612000000000002e-06, + "loss": 3789.9965, + "step": 48060 + }, + { + "epoch": 0.09710444131110187, + "grad_norm": 114.56668853759766, + "learning_rate": 9.614000000000001e-06, + "loss": 9060.2211, + "step": 48070 + }, + { + "epoch": 0.09712464194378569, + "grad_norm": 12649.779296875, + "learning_rate": 9.616e-06, + "loss": 7706.7703, + "step": 48080 + }, + { + "epoch": 0.09714484257646949, + "grad_norm": 2436.09912109375, + "learning_rate": 9.618e-06, + "loss": 5789.7715, + "step": 48090 + }, + { + "epoch": 0.09716504320915331, + "grad_norm": 7112.75244140625, + "learning_rate": 9.620000000000001e-06, + "loss": 4952.0375, + "step": 48100 + }, + { + "epoch": 0.09718524384183713, + "grad_norm": 21177.8359375, + "learning_rate": 9.622000000000002e-06, + "loss": 7131.2828, + "step": 48110 + }, + { + "epoch": 0.09720544447452094, + "grad_norm": 9176.126953125, + "learning_rate": 9.624000000000001e-06, + "loss": 2865.0541, + "step": 48120 + }, + { + "epoch": 0.09722564510720476, + "grad_norm": 18319.17578125, + "learning_rate": 9.626e-06, + "loss": 1887.0475, + "step": 48130 + }, + { + "epoch": 0.09724584573988858, + "grad_norm": 37758.8515625, + "learning_rate": 9.628e-06, + "loss": 2268.7758, + "step": 48140 + }, + { + "epoch": 0.09726604637257238, + "grad_norm": 185565.046875, + "learning_rate": 9.630000000000001e-06, + "loss": 12925.6375, + "step": 48150 + }, + { + "epoch": 0.0972862470052562, + "grad_norm": 55377.02734375, + "learning_rate": 9.632e-06, + "loss": 6936.6547, + "step": 48160 + }, + { + "epoch": 0.09730644763794002, + "grad_norm": 8978.5263671875, + "learning_rate": 9.634000000000001e-06, + "loss": 2803.69, + "step": 48170 + }, + { + "epoch": 0.09732664827062383, + "grad_norm": 5143.880859375, + "learning_rate": 9.636e-06, + "loss": 3451.3621, + "step": 48180 + }, + { + "epoch": 0.09734684890330765, + "grad_norm": 3256.5361328125, + "learning_rate": 9.638e-06, + "loss": 5818.2098, + "step": 48190 + }, + { + "epoch": 0.09736704953599147, + "grad_norm": 2552.91162109375, + "learning_rate": 9.640000000000001e-06, + "loss": 10364.2422, + "step": 48200 + }, + { + "epoch": 0.09738725016867528, + "grad_norm": 1166.16162109375, + "learning_rate": 9.642e-06, + "loss": 5807.8527, + "step": 48210 + }, + { + "epoch": 0.0974074508013591, + "grad_norm": 1209.7908935546875, + "learning_rate": 9.644000000000001e-06, + "loss": 2651.267, + "step": 48220 + }, + { + "epoch": 0.09742765143404292, + "grad_norm": 54273.60546875, + "learning_rate": 9.646e-06, + "loss": 3696.5184, + "step": 48230 + }, + { + "epoch": 0.09744785206672674, + "grad_norm": 8252.306640625, + "learning_rate": 9.648000000000001e-06, + "loss": 7622.4492, + "step": 48240 + }, + { + "epoch": 0.09746805269941054, + "grad_norm": 8494.697265625, + "learning_rate": 9.65e-06, + "loss": 3848.4488, + "step": 48250 + }, + { + "epoch": 0.09748825333209436, + "grad_norm": 38940.6796875, + "learning_rate": 9.652e-06, + "loss": 4031.8402, + "step": 48260 + }, + { + "epoch": 0.09750845396477818, + "grad_norm": 46446.17578125, + "learning_rate": 9.654000000000001e-06, + "loss": 4999.4211, + "step": 48270 + }, + { + "epoch": 0.09752865459746199, + "grad_norm": 6009.11572265625, + "learning_rate": 9.656e-06, + "loss": 8389.1758, + "step": 48280 + }, + { + "epoch": 0.09754885523014581, + "grad_norm": 183341.171875, + "learning_rate": 9.658000000000001e-06, + "loss": 9869.3547, + "step": 48290 + }, + { + "epoch": 0.09756905586282963, + "grad_norm": 46513.296875, + "learning_rate": 9.66e-06, + "loss": 4158.7203, + "step": 48300 + }, + { + "epoch": 0.09758925649551344, + "grad_norm": 757.3812866210938, + "learning_rate": 9.662e-06, + "loss": 3639.475, + "step": 48310 + }, + { + "epoch": 0.09760945712819726, + "grad_norm": 5447.048828125, + "learning_rate": 9.664000000000001e-06, + "loss": 3065.5846, + "step": 48320 + }, + { + "epoch": 0.09762965776088108, + "grad_norm": 641.079345703125, + "learning_rate": 9.666e-06, + "loss": 2695.4473, + "step": 48330 + }, + { + "epoch": 0.09764985839356488, + "grad_norm": 48122.578125, + "learning_rate": 9.668000000000001e-06, + "loss": 5252.377, + "step": 48340 + }, + { + "epoch": 0.0976700590262487, + "grad_norm": 19764.240234375, + "learning_rate": 9.67e-06, + "loss": 4435.098, + "step": 48350 + }, + { + "epoch": 0.09769025965893252, + "grad_norm": 55186.6015625, + "learning_rate": 9.672e-06, + "loss": 7580.6258, + "step": 48360 + }, + { + "epoch": 0.09771046029161633, + "grad_norm": 6087.28076171875, + "learning_rate": 9.674000000000001e-06, + "loss": 2255.6201, + "step": 48370 + }, + { + "epoch": 0.09773066092430015, + "grad_norm": 4582.48828125, + "learning_rate": 9.676e-06, + "loss": 5276.1742, + "step": 48380 + }, + { + "epoch": 0.09775086155698397, + "grad_norm": 426.3363037109375, + "learning_rate": 9.678000000000001e-06, + "loss": 5788.4805, + "step": 48390 + }, + { + "epoch": 0.09777106218966779, + "grad_norm": 62356.25390625, + "learning_rate": 9.68e-06, + "loss": 4848.4902, + "step": 48400 + }, + { + "epoch": 0.0977912628223516, + "grad_norm": 6097.31201171875, + "learning_rate": 9.682e-06, + "loss": 10195.0797, + "step": 48410 + }, + { + "epoch": 0.09781146345503541, + "grad_norm": 8453.240234375, + "learning_rate": 9.684e-06, + "loss": 4076.4582, + "step": 48420 + }, + { + "epoch": 0.09783166408771923, + "grad_norm": 140824.890625, + "learning_rate": 9.686000000000002e-06, + "loss": 8201.9211, + "step": 48430 + }, + { + "epoch": 0.09785186472040304, + "grad_norm": 5974.01220703125, + "learning_rate": 9.688000000000001e-06, + "loss": 6194.0523, + "step": 48440 + }, + { + "epoch": 0.09787206535308686, + "grad_norm": 31836.119140625, + "learning_rate": 9.69e-06, + "loss": 9450.1063, + "step": 48450 + }, + { + "epoch": 0.09789226598577068, + "grad_norm": 21745.8046875, + "learning_rate": 9.692e-06, + "loss": 8009.0516, + "step": 48460 + }, + { + "epoch": 0.09791246661845449, + "grad_norm": 2209.5751953125, + "learning_rate": 9.694e-06, + "loss": 2661.7705, + "step": 48470 + }, + { + "epoch": 0.0979326672511383, + "grad_norm": 21796.888671875, + "learning_rate": 9.696000000000002e-06, + "loss": 2774.3555, + "step": 48480 + }, + { + "epoch": 0.09795286788382213, + "grad_norm": 1900.7177734375, + "learning_rate": 9.698000000000001e-06, + "loss": 4140.3027, + "step": 48490 + }, + { + "epoch": 0.09797306851650593, + "grad_norm": 5018.60595703125, + "learning_rate": 9.7e-06, + "loss": 10207.5773, + "step": 48500 + }, + { + "epoch": 0.09799326914918975, + "grad_norm": 63012.11328125, + "learning_rate": 9.702e-06, + "loss": 11424.4523, + "step": 48510 + }, + { + "epoch": 0.09801346978187357, + "grad_norm": 8863.3662109375, + "learning_rate": 9.704e-06, + "loss": 7381.6617, + "step": 48520 + }, + { + "epoch": 0.09803367041455738, + "grad_norm": 33422.8828125, + "learning_rate": 9.706000000000002e-06, + "loss": 3891.825, + "step": 48530 + }, + { + "epoch": 0.0980538710472412, + "grad_norm": 2959.888916015625, + "learning_rate": 9.708000000000001e-06, + "loss": 3603.5176, + "step": 48540 + }, + { + "epoch": 0.09807407167992502, + "grad_norm": 6804.93017578125, + "learning_rate": 9.71e-06, + "loss": 7303.0711, + "step": 48550 + }, + { + "epoch": 0.09809427231260884, + "grad_norm": 121482.7265625, + "learning_rate": 9.712e-06, + "loss": 4997.425, + "step": 48560 + }, + { + "epoch": 0.09811447294529264, + "grad_norm": 24590.498046875, + "learning_rate": 9.714e-06, + "loss": 3769.3047, + "step": 48570 + }, + { + "epoch": 0.09813467357797646, + "grad_norm": 32310.005859375, + "learning_rate": 9.716000000000002e-06, + "loss": 2853.565, + "step": 48580 + }, + { + "epoch": 0.09815487421066028, + "grad_norm": 2351.95556640625, + "learning_rate": 9.718e-06, + "loss": 3449.8504, + "step": 48590 + }, + { + "epoch": 0.09817507484334409, + "grad_norm": 4673.2734375, + "learning_rate": 9.72e-06, + "loss": 2670.7152, + "step": 48600 + }, + { + "epoch": 0.09819527547602791, + "grad_norm": 15767.248046875, + "learning_rate": 9.722000000000001e-06, + "loss": 3871.3617, + "step": 48610 + }, + { + "epoch": 0.09821547610871173, + "grad_norm": 1129.1502685546875, + "learning_rate": 9.724e-06, + "loss": 7931.6789, + "step": 48620 + }, + { + "epoch": 0.09823567674139554, + "grad_norm": 12814.6025390625, + "learning_rate": 9.726000000000001e-06, + "loss": 3667.0672, + "step": 48630 + }, + { + "epoch": 0.09825587737407936, + "grad_norm": 306.6357116699219, + "learning_rate": 9.728e-06, + "loss": 4989.0934, + "step": 48640 + }, + { + "epoch": 0.09827607800676318, + "grad_norm": 28598.255859375, + "learning_rate": 9.73e-06, + "loss": 2195.016, + "step": 48650 + }, + { + "epoch": 0.09829627863944698, + "grad_norm": 17897.26953125, + "learning_rate": 9.732000000000001e-06, + "loss": 4184.3809, + "step": 48660 + }, + { + "epoch": 0.0983164792721308, + "grad_norm": 36919.08203125, + "learning_rate": 9.734000000000002e-06, + "loss": 6396.9777, + "step": 48670 + }, + { + "epoch": 0.09833667990481462, + "grad_norm": 1895.88330078125, + "learning_rate": 9.736000000000001e-06, + "loss": 2410.0209, + "step": 48680 + }, + { + "epoch": 0.09835688053749843, + "grad_norm": 17981.904296875, + "learning_rate": 9.738e-06, + "loss": 3916.5453, + "step": 48690 + }, + { + "epoch": 0.09837708117018225, + "grad_norm": 7976.65478515625, + "learning_rate": 9.74e-06, + "loss": 3542.0242, + "step": 48700 + }, + { + "epoch": 0.09839728180286607, + "grad_norm": 35211.55859375, + "learning_rate": 9.742000000000001e-06, + "loss": 10735.4227, + "step": 48710 + }, + { + "epoch": 0.09841748243554987, + "grad_norm": 4973.69580078125, + "learning_rate": 9.744000000000002e-06, + "loss": 3550.6262, + "step": 48720 + }, + { + "epoch": 0.0984376830682337, + "grad_norm": 18118.923828125, + "learning_rate": 9.746000000000001e-06, + "loss": 3156.1574, + "step": 48730 + }, + { + "epoch": 0.09845788370091751, + "grad_norm": 2123.89453125, + "learning_rate": 9.748e-06, + "loss": 7353.718, + "step": 48740 + }, + { + "epoch": 0.09847808433360133, + "grad_norm": 8954.62890625, + "learning_rate": 9.75e-06, + "loss": 6184.5, + "step": 48750 + }, + { + "epoch": 0.09849828496628514, + "grad_norm": 5610.41357421875, + "learning_rate": 9.752e-06, + "loss": 8275.6023, + "step": 48760 + }, + { + "epoch": 0.09851848559896896, + "grad_norm": 35639.56640625, + "learning_rate": 9.754000000000002e-06, + "loss": 5822.0844, + "step": 48770 + }, + { + "epoch": 0.09853868623165278, + "grad_norm": 116395.2109375, + "learning_rate": 9.756000000000001e-06, + "loss": 8189.5406, + "step": 48780 + }, + { + "epoch": 0.09855888686433659, + "grad_norm": 20870.033203125, + "learning_rate": 9.758e-06, + "loss": 6715.5664, + "step": 48790 + }, + { + "epoch": 0.0985790874970204, + "grad_norm": 85375.9140625, + "learning_rate": 9.760000000000001e-06, + "loss": 7740.45, + "step": 48800 + }, + { + "epoch": 0.09859928812970423, + "grad_norm": 1128.639404296875, + "learning_rate": 9.762e-06, + "loss": 4108.0836, + "step": 48810 + }, + { + "epoch": 0.09861948876238803, + "grad_norm": 155006.765625, + "learning_rate": 9.764000000000002e-06, + "loss": 14390.0047, + "step": 48820 + }, + { + "epoch": 0.09863968939507185, + "grad_norm": 4693.8251953125, + "learning_rate": 9.766000000000001e-06, + "loss": 3131.3258, + "step": 48830 + }, + { + "epoch": 0.09865989002775567, + "grad_norm": 12435.8203125, + "learning_rate": 9.768e-06, + "loss": 9852.3062, + "step": 48840 + }, + { + "epoch": 0.09868009066043948, + "grad_norm": 1802.5673828125, + "learning_rate": 9.770000000000001e-06, + "loss": 2398.251, + "step": 48850 + }, + { + "epoch": 0.0987002912931233, + "grad_norm": 16516.994140625, + "learning_rate": 9.772e-06, + "loss": 8740.3797, + "step": 48860 + }, + { + "epoch": 0.09872049192580712, + "grad_norm": 90984.703125, + "learning_rate": 9.774000000000002e-06, + "loss": 11659.8297, + "step": 48870 + }, + { + "epoch": 0.09874069255849093, + "grad_norm": 7832.1806640625, + "learning_rate": 9.776000000000001e-06, + "loss": 5284.9324, + "step": 48880 + }, + { + "epoch": 0.09876089319117475, + "grad_norm": 94081.5234375, + "learning_rate": 9.778e-06, + "loss": 5717.1387, + "step": 48890 + }, + { + "epoch": 0.09878109382385857, + "grad_norm": 60490.51171875, + "learning_rate": 9.780000000000001e-06, + "loss": 6199.748, + "step": 48900 + }, + { + "epoch": 0.09880129445654239, + "grad_norm": 10602.74609375, + "learning_rate": 9.782e-06, + "loss": 5522.0316, + "step": 48910 + }, + { + "epoch": 0.09882149508922619, + "grad_norm": 6777.263671875, + "learning_rate": 9.784000000000002e-06, + "loss": 8795.9594, + "step": 48920 + }, + { + "epoch": 0.09884169572191001, + "grad_norm": 1118.733154296875, + "learning_rate": 9.786e-06, + "loss": 7097.2492, + "step": 48930 + }, + { + "epoch": 0.09886189635459383, + "grad_norm": 127239.2109375, + "learning_rate": 9.788e-06, + "loss": 8402.5266, + "step": 48940 + }, + { + "epoch": 0.09888209698727764, + "grad_norm": 1768.691650390625, + "learning_rate": 9.790000000000001e-06, + "loss": 3735.1758, + "step": 48950 + }, + { + "epoch": 0.09890229761996146, + "grad_norm": 3654.69677734375, + "learning_rate": 9.792e-06, + "loss": 2407.39, + "step": 48960 + }, + { + "epoch": 0.09892249825264528, + "grad_norm": 53015.171875, + "learning_rate": 9.794000000000001e-06, + "loss": 3046.3871, + "step": 48970 + }, + { + "epoch": 0.09894269888532908, + "grad_norm": 52200.125, + "learning_rate": 9.796e-06, + "loss": 8369.8797, + "step": 48980 + }, + { + "epoch": 0.0989628995180129, + "grad_norm": 117224.4453125, + "learning_rate": 9.798e-06, + "loss": 10396.3602, + "step": 48990 + }, + { + "epoch": 0.09898310015069672, + "grad_norm": 1302.4456787109375, + "learning_rate": 9.800000000000001e-06, + "loss": 6082.1363, + "step": 49000 + }, + { + "epoch": 0.09900330078338053, + "grad_norm": 25923.306640625, + "learning_rate": 9.802e-06, + "loss": 6593.1828, + "step": 49010 + }, + { + "epoch": 0.09902350141606435, + "grad_norm": 11574.8525390625, + "learning_rate": 9.804000000000001e-06, + "loss": 4057.9117, + "step": 49020 + }, + { + "epoch": 0.09904370204874817, + "grad_norm": 51849.03515625, + "learning_rate": 9.806e-06, + "loss": 4770.3305, + "step": 49030 + }, + { + "epoch": 0.09906390268143198, + "grad_norm": 1375.9981689453125, + "learning_rate": 9.808000000000002e-06, + "loss": 1824.1838, + "step": 49040 + }, + { + "epoch": 0.0990841033141158, + "grad_norm": 54950.078125, + "learning_rate": 9.810000000000001e-06, + "loss": 4914.0945, + "step": 49050 + }, + { + "epoch": 0.09910430394679962, + "grad_norm": 3110.13623046875, + "learning_rate": 9.812e-06, + "loss": 1678.6131, + "step": 49060 + }, + { + "epoch": 0.09912450457948344, + "grad_norm": 21979.50390625, + "learning_rate": 9.814000000000001e-06, + "loss": 4423.459, + "step": 49070 + }, + { + "epoch": 0.09914470521216724, + "grad_norm": 7389.8447265625, + "learning_rate": 9.816e-06, + "loss": 5771.7016, + "step": 49080 + }, + { + "epoch": 0.09916490584485106, + "grad_norm": 939.8839721679688, + "learning_rate": 9.818000000000002e-06, + "loss": 3841.4402, + "step": 49090 + }, + { + "epoch": 0.09918510647753488, + "grad_norm": 89431.1171875, + "learning_rate": 9.820000000000001e-06, + "loss": 5504.0066, + "step": 49100 + }, + { + "epoch": 0.09920530711021869, + "grad_norm": 88665.6015625, + "learning_rate": 9.822e-06, + "loss": 4826.1742, + "step": 49110 + }, + { + "epoch": 0.09922550774290251, + "grad_norm": 16110.173828125, + "learning_rate": 9.824000000000001e-06, + "loss": 7795.6859, + "step": 49120 + }, + { + "epoch": 0.09924570837558633, + "grad_norm": 344606.96875, + "learning_rate": 9.826e-06, + "loss": 16724.9469, + "step": 49130 + }, + { + "epoch": 0.09926590900827013, + "grad_norm": 22500.541015625, + "learning_rate": 9.828000000000001e-06, + "loss": 8601.2563, + "step": 49140 + }, + { + "epoch": 0.09928610964095395, + "grad_norm": 2066.411865234375, + "learning_rate": 9.83e-06, + "loss": 4055.0523, + "step": 49150 + }, + { + "epoch": 0.09930631027363777, + "grad_norm": 1864.3558349609375, + "learning_rate": 9.832e-06, + "loss": 2731.735, + "step": 49160 + }, + { + "epoch": 0.09932651090632158, + "grad_norm": 2639.367919921875, + "learning_rate": 9.834000000000001e-06, + "loss": 15230.2297, + "step": 49170 + }, + { + "epoch": 0.0993467115390054, + "grad_norm": 11635.5341796875, + "learning_rate": 9.836e-06, + "loss": 4457.4555, + "step": 49180 + }, + { + "epoch": 0.09936691217168922, + "grad_norm": 4715.90380859375, + "learning_rate": 9.838000000000001e-06, + "loss": 6933.0586, + "step": 49190 + }, + { + "epoch": 0.09938711280437303, + "grad_norm": 177501.203125, + "learning_rate": 9.84e-06, + "loss": 8545.8148, + "step": 49200 + }, + { + "epoch": 0.09940731343705685, + "grad_norm": 6796.5556640625, + "learning_rate": 9.842e-06, + "loss": 2671.5594, + "step": 49210 + }, + { + "epoch": 0.09942751406974067, + "grad_norm": 3698.833251953125, + "learning_rate": 9.844000000000001e-06, + "loss": 6729.4805, + "step": 49220 + }, + { + "epoch": 0.09944771470242449, + "grad_norm": 9257.6611328125, + "learning_rate": 9.846000000000002e-06, + "loss": 2998.542, + "step": 49230 + }, + { + "epoch": 0.09946791533510829, + "grad_norm": 6544.94091796875, + "learning_rate": 9.848000000000001e-06, + "loss": 4181.3484, + "step": 49240 + }, + { + "epoch": 0.09948811596779211, + "grad_norm": 24232.146484375, + "learning_rate": 9.85e-06, + "loss": 3972.9117, + "step": 49250 + }, + { + "epoch": 0.09950831660047593, + "grad_norm": 19407.041015625, + "learning_rate": 9.852e-06, + "loss": 5096.1945, + "step": 49260 + }, + { + "epoch": 0.09952851723315974, + "grad_norm": 14484.447265625, + "learning_rate": 9.854000000000001e-06, + "loss": 3944.8422, + "step": 49270 + }, + { + "epoch": 0.09954871786584356, + "grad_norm": 7685.78125, + "learning_rate": 9.856000000000002e-06, + "loss": 4907.1531, + "step": 49280 + }, + { + "epoch": 0.09956891849852738, + "grad_norm": 5996.35888671875, + "learning_rate": 9.858000000000001e-06, + "loss": 1683.3547, + "step": 49290 + }, + { + "epoch": 0.09958911913121118, + "grad_norm": 38690.65234375, + "learning_rate": 9.86e-06, + "loss": 2354.3691, + "step": 49300 + }, + { + "epoch": 0.099609319763895, + "grad_norm": 50395.94140625, + "learning_rate": 9.862e-06, + "loss": 10450.2188, + "step": 49310 + }, + { + "epoch": 0.09962952039657882, + "grad_norm": 11530.986328125, + "learning_rate": 9.864e-06, + "loss": 6085.0422, + "step": 49320 + }, + { + "epoch": 0.09964972102926263, + "grad_norm": 10723.7431640625, + "learning_rate": 9.866000000000002e-06, + "loss": 6446.0617, + "step": 49330 + }, + { + "epoch": 0.09966992166194645, + "grad_norm": 1729.6575927734375, + "learning_rate": 9.868000000000001e-06, + "loss": 9200.1797, + "step": 49340 + }, + { + "epoch": 0.09969012229463027, + "grad_norm": 4284.7548828125, + "learning_rate": 9.87e-06, + "loss": 6519.7344, + "step": 49350 + }, + { + "epoch": 0.09971032292731408, + "grad_norm": 99519.03125, + "learning_rate": 9.872e-06, + "loss": 6091.409, + "step": 49360 + }, + { + "epoch": 0.0997305235599979, + "grad_norm": 70480.6484375, + "learning_rate": 9.874e-06, + "loss": 12748.282, + "step": 49370 + }, + { + "epoch": 0.09975072419268172, + "grad_norm": 2803.62841796875, + "learning_rate": 9.876000000000002e-06, + "loss": 4191.9715, + "step": 49380 + }, + { + "epoch": 0.09977092482536554, + "grad_norm": 30694.654296875, + "learning_rate": 9.878000000000001e-06, + "loss": 3545.8734, + "step": 49390 + }, + { + "epoch": 0.09979112545804934, + "grad_norm": 3622.715576171875, + "learning_rate": 9.88e-06, + "loss": 1908.6236, + "step": 49400 + }, + { + "epoch": 0.09981132609073316, + "grad_norm": 2803.430419921875, + "learning_rate": 9.882000000000001e-06, + "loss": 5233.5035, + "step": 49410 + }, + { + "epoch": 0.09983152672341698, + "grad_norm": 831.8093872070312, + "learning_rate": 9.884e-06, + "loss": 5747.0566, + "step": 49420 + }, + { + "epoch": 0.09985172735610079, + "grad_norm": 145353.390625, + "learning_rate": 9.886000000000002e-06, + "loss": 4955.2992, + "step": 49430 + }, + { + "epoch": 0.09987192798878461, + "grad_norm": 10759.146484375, + "learning_rate": 9.888000000000001e-06, + "loss": 4300.1621, + "step": 49440 + }, + { + "epoch": 0.09989212862146843, + "grad_norm": 8808.974609375, + "learning_rate": 9.89e-06, + "loss": 5367.4098, + "step": 49450 + }, + { + "epoch": 0.09991232925415224, + "grad_norm": 7534.4921875, + "learning_rate": 9.892000000000001e-06, + "loss": 6116.2508, + "step": 49460 + }, + { + "epoch": 0.09993252988683606, + "grad_norm": 25557.865234375, + "learning_rate": 9.894e-06, + "loss": 2565.1395, + "step": 49470 + }, + { + "epoch": 0.09995273051951988, + "grad_norm": 48011.96875, + "learning_rate": 9.896000000000001e-06, + "loss": 5910.3211, + "step": 49480 + }, + { + "epoch": 0.09997293115220368, + "grad_norm": 181724.59375, + "learning_rate": 9.898e-06, + "loss": 8557.9281, + "step": 49490 + }, + { + "epoch": 0.0999931317848875, + "grad_norm": 68355.125, + "learning_rate": 9.9e-06, + "loss": 8572.9625, + "step": 49500 + }, + { + "epoch": 0.10001333241757132, + "grad_norm": 92779.265625, + "learning_rate": 9.902000000000001e-06, + "loss": 6053.3211, + "step": 49510 + }, + { + "epoch": 0.10003353305025513, + "grad_norm": 2900.74072265625, + "learning_rate": 9.904e-06, + "loss": 1931.9453, + "step": 49520 + }, + { + "epoch": 0.10005373368293895, + "grad_norm": 1766.36572265625, + "learning_rate": 9.906000000000001e-06, + "loss": 2135.333, + "step": 49530 + }, + { + "epoch": 0.10007393431562277, + "grad_norm": 9457.4609375, + "learning_rate": 9.908e-06, + "loss": 2938.1898, + "step": 49540 + }, + { + "epoch": 0.10009413494830659, + "grad_norm": 2853.80029296875, + "learning_rate": 9.91e-06, + "loss": 2254.1107, + "step": 49550 + }, + { + "epoch": 0.1001143355809904, + "grad_norm": 90385.84375, + "learning_rate": 9.912000000000001e-06, + "loss": 4561.5668, + "step": 49560 + }, + { + "epoch": 0.10013453621367421, + "grad_norm": 1007.329833984375, + "learning_rate": 9.914e-06, + "loss": 5251.4918, + "step": 49570 + }, + { + "epoch": 0.10015473684635803, + "grad_norm": 86097.671875, + "learning_rate": 9.916000000000001e-06, + "loss": 5124.9477, + "step": 49580 + }, + { + "epoch": 0.10017493747904184, + "grad_norm": 22593.087890625, + "learning_rate": 9.918e-06, + "loss": 4059.7781, + "step": 49590 + }, + { + "epoch": 0.10019513811172566, + "grad_norm": 134287.609375, + "learning_rate": 9.920000000000002e-06, + "loss": 8164.5344, + "step": 49600 + }, + { + "epoch": 0.10021533874440948, + "grad_norm": 39685.51953125, + "learning_rate": 9.922000000000001e-06, + "loss": 6232.1629, + "step": 49610 + }, + { + "epoch": 0.10023553937709329, + "grad_norm": 18985.390625, + "learning_rate": 9.924e-06, + "loss": 4739.2602, + "step": 49620 + }, + { + "epoch": 0.1002557400097771, + "grad_norm": 5841.09228515625, + "learning_rate": 9.926000000000001e-06, + "loss": 8540.5594, + "step": 49630 + }, + { + "epoch": 0.10027594064246093, + "grad_norm": 10768.8759765625, + "learning_rate": 9.928e-06, + "loss": 7600.5195, + "step": 49640 + }, + { + "epoch": 0.10029614127514473, + "grad_norm": 998.419921875, + "learning_rate": 9.930000000000001e-06, + "loss": 6420.4504, + "step": 49650 + }, + { + "epoch": 0.10031634190782855, + "grad_norm": 39977.6796875, + "learning_rate": 9.932e-06, + "loss": 6713.7531, + "step": 49660 + }, + { + "epoch": 0.10033654254051237, + "grad_norm": 3051.070068359375, + "learning_rate": 9.934e-06, + "loss": 3367.4191, + "step": 49670 + }, + { + "epoch": 0.10035674317319618, + "grad_norm": 18075.779296875, + "learning_rate": 9.936000000000001e-06, + "loss": 6595.3195, + "step": 49680 + }, + { + "epoch": 0.10037694380588, + "grad_norm": 5077.3583984375, + "learning_rate": 9.938e-06, + "loss": 3784.2543, + "step": 49690 + }, + { + "epoch": 0.10039714443856382, + "grad_norm": 576.5385131835938, + "learning_rate": 9.940000000000001e-06, + "loss": 4496.1453, + "step": 49700 + }, + { + "epoch": 0.10041734507124764, + "grad_norm": 908.5853271484375, + "learning_rate": 9.942e-06, + "loss": 11949.4297, + "step": 49710 + }, + { + "epoch": 0.10043754570393144, + "grad_norm": 57359.80859375, + "learning_rate": 9.944e-06, + "loss": 3978.1984, + "step": 49720 + }, + { + "epoch": 0.10045774633661526, + "grad_norm": 4231.49462890625, + "learning_rate": 9.946000000000001e-06, + "loss": 6874.9484, + "step": 49730 + }, + { + "epoch": 0.10047794696929908, + "grad_norm": 1408.244384765625, + "learning_rate": 9.948e-06, + "loss": 7209.1633, + "step": 49740 + }, + { + "epoch": 0.10049814760198289, + "grad_norm": 6586.02099609375, + "learning_rate": 9.950000000000001e-06, + "loss": 5438.2414, + "step": 49750 + }, + { + "epoch": 0.10051834823466671, + "grad_norm": 14128.705078125, + "learning_rate": 9.952e-06, + "loss": 4090.932, + "step": 49760 + }, + { + "epoch": 0.10053854886735053, + "grad_norm": 77007.640625, + "learning_rate": 9.954e-06, + "loss": 13912.8266, + "step": 49770 + }, + { + "epoch": 0.10055874950003434, + "grad_norm": 108817.6953125, + "learning_rate": 9.956000000000001e-06, + "loss": 7137.7359, + "step": 49780 + }, + { + "epoch": 0.10057895013271816, + "grad_norm": 10865.4453125, + "learning_rate": 9.958e-06, + "loss": 7751.2758, + "step": 49790 + }, + { + "epoch": 0.10059915076540198, + "grad_norm": 10223.5908203125, + "learning_rate": 9.960000000000001e-06, + "loss": 4812.3687, + "step": 49800 + }, + { + "epoch": 0.10061935139808578, + "grad_norm": 22472.59375, + "learning_rate": 9.962e-06, + "loss": 3196.0234, + "step": 49810 + }, + { + "epoch": 0.1006395520307696, + "grad_norm": 1648.5826416015625, + "learning_rate": 9.964e-06, + "loss": 5749.9074, + "step": 49820 + }, + { + "epoch": 0.10065975266345342, + "grad_norm": 982.3861694335938, + "learning_rate": 9.966e-06, + "loss": 4298.6285, + "step": 49830 + }, + { + "epoch": 0.10067995329613723, + "grad_norm": 11498.470703125, + "learning_rate": 9.968000000000002e-06, + "loss": 14050.9375, + "step": 49840 + }, + { + "epoch": 0.10070015392882105, + "grad_norm": 39481.0234375, + "learning_rate": 9.970000000000001e-06, + "loss": 5423.8613, + "step": 49850 + }, + { + "epoch": 0.10072035456150487, + "grad_norm": 15216.578125, + "learning_rate": 9.972e-06, + "loss": 8149.0805, + "step": 49860 + }, + { + "epoch": 0.10074055519418869, + "grad_norm": 7333.43798828125, + "learning_rate": 9.974e-06, + "loss": 5927.1535, + "step": 49870 + }, + { + "epoch": 0.1007607558268725, + "grad_norm": 20015.3203125, + "learning_rate": 9.976e-06, + "loss": 4218.7277, + "step": 49880 + }, + { + "epoch": 0.10078095645955631, + "grad_norm": 3629.369384765625, + "learning_rate": 9.978000000000002e-06, + "loss": 6076.8102, + "step": 49890 + }, + { + "epoch": 0.10080115709224013, + "grad_norm": 71221.453125, + "learning_rate": 9.980000000000001e-06, + "loss": 4616.9426, + "step": 49900 + }, + { + "epoch": 0.10082135772492394, + "grad_norm": 15662.0673828125, + "learning_rate": 9.982e-06, + "loss": 8484.482, + "step": 49910 + }, + { + "epoch": 0.10084155835760776, + "grad_norm": 3809.091552734375, + "learning_rate": 9.984e-06, + "loss": 4362.3508, + "step": 49920 + }, + { + "epoch": 0.10086175899029158, + "grad_norm": 2438.992431640625, + "learning_rate": 9.986e-06, + "loss": 5736.7898, + "step": 49930 + }, + { + "epoch": 0.10088195962297539, + "grad_norm": 19202.21875, + "learning_rate": 9.988000000000002e-06, + "loss": 7706.1922, + "step": 49940 + }, + { + "epoch": 0.1009021602556592, + "grad_norm": 667.6085815429688, + "learning_rate": 9.990000000000001e-06, + "loss": 6486.4219, + "step": 49950 + }, + { + "epoch": 0.10092236088834303, + "grad_norm": 14091.8212890625, + "learning_rate": 9.992e-06, + "loss": 5965.7922, + "step": 49960 + }, + { + "epoch": 0.10094256152102683, + "grad_norm": 1124.5538330078125, + "learning_rate": 9.994000000000001e-06, + "loss": 3655.8379, + "step": 49970 + }, + { + "epoch": 0.10096276215371065, + "grad_norm": 7974.8017578125, + "learning_rate": 9.996e-06, + "loss": 5097.3086, + "step": 49980 + }, + { + "epoch": 0.10098296278639447, + "grad_norm": 812.642822265625, + "learning_rate": 9.998000000000002e-06, + "loss": 4122.9168, + "step": 49990 + }, + { + "epoch": 0.10100316341907828, + "grad_norm": 1784.027099609375, + "learning_rate": 1e-05, + "loss": 5906.5648, + "step": 50000 + }, + { + "epoch": 0.1010233640517621, + "grad_norm": 1786.267333984375, + "learning_rate": 9.999999987815305e-06, + "loss": 6350.6617, + "step": 50010 + }, + { + "epoch": 0.10104356468444592, + "grad_norm": 64936.83984375, + "learning_rate": 9.999999951261215e-06, + "loss": 4625.3809, + "step": 50020 + }, + { + "epoch": 0.10106376531712974, + "grad_norm": 4219.63525390625, + "learning_rate": 9.99999989033773e-06, + "loss": 7319.118, + "step": 50030 + }, + { + "epoch": 0.10108396594981355, + "grad_norm": 46727.7265625, + "learning_rate": 9.999999805044853e-06, + "loss": 4065.9383, + "step": 50040 + }, + { + "epoch": 0.10110416658249737, + "grad_norm": 2084.635009765625, + "learning_rate": 9.999999695382584e-06, + "loss": 4856.559, + "step": 50050 + }, + { + "epoch": 0.10112436721518119, + "grad_norm": 4801.126953125, + "learning_rate": 9.999999561350923e-06, + "loss": 4367.4625, + "step": 50060 + }, + { + "epoch": 0.10114456784786499, + "grad_norm": 9778.794921875, + "learning_rate": 9.99999940294987e-06, + "loss": 3448.6078, + "step": 50070 + }, + { + "epoch": 0.10116476848054881, + "grad_norm": 36695.921875, + "learning_rate": 9.999999220179426e-06, + "loss": 4212.427, + "step": 50080 + }, + { + "epoch": 0.10118496911323263, + "grad_norm": 12550.845703125, + "learning_rate": 9.999999013039593e-06, + "loss": 2310.2354, + "step": 50090 + }, + { + "epoch": 0.10120516974591644, + "grad_norm": 44823.1875, + "learning_rate": 9.999998781530372e-06, + "loss": 10268.1016, + "step": 50100 + }, + { + "epoch": 0.10122537037860026, + "grad_norm": 76591.9375, + "learning_rate": 9.999998525651761e-06, + "loss": 7910.9125, + "step": 50110 + }, + { + "epoch": 0.10124557101128408, + "grad_norm": 27162.943359375, + "learning_rate": 9.999998245403766e-06, + "loss": 4057.2094, + "step": 50120 + }, + { + "epoch": 0.10126577164396788, + "grad_norm": 6940.43505859375, + "learning_rate": 9.999997940786385e-06, + "loss": 3104.3775, + "step": 50130 + }, + { + "epoch": 0.1012859722766517, + "grad_norm": 593.4945068359375, + "learning_rate": 9.99999761179962e-06, + "loss": 5357.1258, + "step": 50140 + }, + { + "epoch": 0.10130617290933552, + "grad_norm": 4306.923828125, + "learning_rate": 9.999997258443473e-06, + "loss": 2510.1309, + "step": 50150 + }, + { + "epoch": 0.10132637354201933, + "grad_norm": 11375.146484375, + "learning_rate": 9.999996880717946e-06, + "loss": 2354.6887, + "step": 50160 + }, + { + "epoch": 0.10134657417470315, + "grad_norm": 7460.958984375, + "learning_rate": 9.999996478623041e-06, + "loss": 8367.2258, + "step": 50170 + }, + { + "epoch": 0.10136677480738697, + "grad_norm": 6650.65380859375, + "learning_rate": 9.99999605215876e-06, + "loss": 2579.2131, + "step": 50180 + }, + { + "epoch": 0.10138697544007079, + "grad_norm": 21927.88671875, + "learning_rate": 9.999995601325104e-06, + "loss": 6507.5879, + "step": 50190 + }, + { + "epoch": 0.1014071760727546, + "grad_norm": 109281.4609375, + "learning_rate": 9.999995126122076e-06, + "loss": 3899.2145, + "step": 50200 + }, + { + "epoch": 0.10142737670543842, + "grad_norm": 73048.0703125, + "learning_rate": 9.999994626549678e-06, + "loss": 10303.5094, + "step": 50210 + }, + { + "epoch": 0.10144757733812224, + "grad_norm": 855.02783203125, + "learning_rate": 9.999994102607912e-06, + "loss": 3936.684, + "step": 50220 + }, + { + "epoch": 0.10146777797080604, + "grad_norm": 5657.5439453125, + "learning_rate": 9.999993554296783e-06, + "loss": 5410.4207, + "step": 50230 + }, + { + "epoch": 0.10148797860348986, + "grad_norm": 7094.47607421875, + "learning_rate": 9.999992981616292e-06, + "loss": 5452.3, + "step": 50240 + }, + { + "epoch": 0.10150817923617368, + "grad_norm": 42079.3203125, + "learning_rate": 9.99999238456644e-06, + "loss": 6337.0457, + "step": 50250 + }, + { + "epoch": 0.10152837986885749, + "grad_norm": 8269.8837890625, + "learning_rate": 9.999991763147232e-06, + "loss": 3102.1635, + "step": 50260 + }, + { + "epoch": 0.10154858050154131, + "grad_norm": 55594.37109375, + "learning_rate": 9.99999111735867e-06, + "loss": 7457.6687, + "step": 50270 + }, + { + "epoch": 0.10156878113422513, + "grad_norm": 2416.20849609375, + "learning_rate": 9.999990447200758e-06, + "loss": 8081.7523, + "step": 50280 + }, + { + "epoch": 0.10158898176690893, + "grad_norm": 676.45947265625, + "learning_rate": 9.9999897526735e-06, + "loss": 9228.6586, + "step": 50290 + }, + { + "epoch": 0.10160918239959275, + "grad_norm": 717.8648071289062, + "learning_rate": 9.999989033776898e-06, + "loss": 9850.1727, + "step": 50300 + }, + { + "epoch": 0.10162938303227657, + "grad_norm": 153500.09375, + "learning_rate": 9.999988290510955e-06, + "loss": 8029.7437, + "step": 50310 + }, + { + "epoch": 0.10164958366496038, + "grad_norm": 82848.6015625, + "learning_rate": 9.999987522875676e-06, + "loss": 5607.5406, + "step": 50320 + }, + { + "epoch": 0.1016697842976442, + "grad_norm": 5361.67333984375, + "learning_rate": 9.999986730871065e-06, + "loss": 5396.4223, + "step": 50330 + }, + { + "epoch": 0.10168998493032802, + "grad_norm": 102563.2890625, + "learning_rate": 9.999985914497124e-06, + "loss": 11963.6883, + "step": 50340 + }, + { + "epoch": 0.10171018556301184, + "grad_norm": 9291.646484375, + "learning_rate": 9.999985073753857e-06, + "loss": 5004.5063, + "step": 50350 + }, + { + "epoch": 0.10173038619569565, + "grad_norm": 56579.84765625, + "learning_rate": 9.999984208641271e-06, + "loss": 5442.0305, + "step": 50360 + }, + { + "epoch": 0.10175058682837947, + "grad_norm": 5502.75390625, + "learning_rate": 9.999983319159368e-06, + "loss": 4134.6023, + "step": 50370 + }, + { + "epoch": 0.10177078746106329, + "grad_norm": 69699.625, + "learning_rate": 9.999982405308154e-06, + "loss": 5794.5664, + "step": 50380 + }, + { + "epoch": 0.10179098809374709, + "grad_norm": 166977.8125, + "learning_rate": 9.999981467087629e-06, + "loss": 10246.2922, + "step": 50390 + }, + { + "epoch": 0.10181118872643091, + "grad_norm": 18376.029296875, + "learning_rate": 9.999980504497803e-06, + "loss": 2160.8023, + "step": 50400 + }, + { + "epoch": 0.10183138935911473, + "grad_norm": 7459.51025390625, + "learning_rate": 9.999979517538677e-06, + "loss": 2577.4867, + "step": 50410 + }, + { + "epoch": 0.10185158999179854, + "grad_norm": 5912.7333984375, + "learning_rate": 9.99997850621026e-06, + "loss": 4984.1102, + "step": 50420 + }, + { + "epoch": 0.10187179062448236, + "grad_norm": 1759.997314453125, + "learning_rate": 9.999977470512551e-06, + "loss": 2771.3639, + "step": 50430 + }, + { + "epoch": 0.10189199125716618, + "grad_norm": 695.9417114257812, + "learning_rate": 9.999976410445563e-06, + "loss": 2314.1168, + "step": 50440 + }, + { + "epoch": 0.10191219188984998, + "grad_norm": 5773.7763671875, + "learning_rate": 9.999975326009292e-06, + "loss": 3860.2023, + "step": 50450 + }, + { + "epoch": 0.1019323925225338, + "grad_norm": 55715.828125, + "learning_rate": 9.999974217203749e-06, + "loss": 4825.4391, + "step": 50460 + }, + { + "epoch": 0.10195259315521762, + "grad_norm": 49413.40234375, + "learning_rate": 9.999973084028938e-06, + "loss": 7436.7688, + "step": 50470 + }, + { + "epoch": 0.10197279378790143, + "grad_norm": 55122.41015625, + "learning_rate": 9.999971926484865e-06, + "loss": 7465.4148, + "step": 50480 + }, + { + "epoch": 0.10199299442058525, + "grad_norm": 46366.0, + "learning_rate": 9.999970744571534e-06, + "loss": 8539.1789, + "step": 50490 + }, + { + "epoch": 0.10201319505326907, + "grad_norm": 13637.98828125, + "learning_rate": 9.999969538288953e-06, + "loss": 2148.1646, + "step": 50500 + }, + { + "epoch": 0.10203339568595289, + "grad_norm": 15744.0634765625, + "learning_rate": 9.999968307637127e-06, + "loss": 4321.2598, + "step": 50510 + }, + { + "epoch": 0.1020535963186367, + "grad_norm": 8042.2958984375, + "learning_rate": 9.999967052616061e-06, + "loss": 3047.8361, + "step": 50520 + }, + { + "epoch": 0.10207379695132052, + "grad_norm": 8843.8193359375, + "learning_rate": 9.999965773225762e-06, + "loss": 10811.1156, + "step": 50530 + }, + { + "epoch": 0.10209399758400434, + "grad_norm": 2891.7373046875, + "learning_rate": 9.999964469466236e-06, + "loss": 2628.9195, + "step": 50540 + }, + { + "epoch": 0.10211419821668814, + "grad_norm": 2932.17236328125, + "learning_rate": 9.999963141337493e-06, + "loss": 5759.3727, + "step": 50550 + }, + { + "epoch": 0.10213439884937196, + "grad_norm": 3393.1640625, + "learning_rate": 9.999961788839533e-06, + "loss": 7468.4125, + "step": 50560 + }, + { + "epoch": 0.10215459948205578, + "grad_norm": 20789.13671875, + "learning_rate": 9.999960411972366e-06, + "loss": 9907.268, + "step": 50570 + }, + { + "epoch": 0.10217480011473959, + "grad_norm": 562.7151489257812, + "learning_rate": 9.999959010735997e-06, + "loss": 1892.3027, + "step": 50580 + }, + { + "epoch": 0.10219500074742341, + "grad_norm": 12565.353515625, + "learning_rate": 9.999957585130438e-06, + "loss": 2783.6268, + "step": 50590 + }, + { + "epoch": 0.10221520138010723, + "grad_norm": 3298.295166015625, + "learning_rate": 9.999956135155688e-06, + "loss": 1961.7479, + "step": 50600 + }, + { + "epoch": 0.10223540201279104, + "grad_norm": 28488.884765625, + "learning_rate": 9.999954660811761e-06, + "loss": 5620.375, + "step": 50610 + }, + { + "epoch": 0.10225560264547486, + "grad_norm": 13514.6640625, + "learning_rate": 9.99995316209866e-06, + "loss": 3228.1996, + "step": 50620 + }, + { + "epoch": 0.10227580327815868, + "grad_norm": 16514.76171875, + "learning_rate": 9.999951639016396e-06, + "loss": 13313.3078, + "step": 50630 + }, + { + "epoch": 0.10229600391084248, + "grad_norm": 4683.23486328125, + "learning_rate": 9.999950091564972e-06, + "loss": 5213.2414, + "step": 50640 + }, + { + "epoch": 0.1023162045435263, + "grad_norm": 296767.875, + "learning_rate": 9.999948519744397e-06, + "loss": 7945.6375, + "step": 50650 + }, + { + "epoch": 0.10233640517621012, + "grad_norm": 5984.28662109375, + "learning_rate": 9.999946923554681e-06, + "loss": 6284.2199, + "step": 50660 + }, + { + "epoch": 0.10235660580889394, + "grad_norm": 14609.0751953125, + "learning_rate": 9.99994530299583e-06, + "loss": 7008.6391, + "step": 50670 + }, + { + "epoch": 0.10237680644157775, + "grad_norm": 6115.6357421875, + "learning_rate": 9.99994365806785e-06, + "loss": 4231.0113, + "step": 50680 + }, + { + "epoch": 0.10239700707426157, + "grad_norm": 154292.71875, + "learning_rate": 9.999941988770754e-06, + "loss": 10872.2766, + "step": 50690 + }, + { + "epoch": 0.10241720770694539, + "grad_norm": 6489.2548828125, + "learning_rate": 9.999940295104546e-06, + "loss": 4717.4027, + "step": 50700 + }, + { + "epoch": 0.1024374083396292, + "grad_norm": 168722.703125, + "learning_rate": 9.999938577069235e-06, + "loss": 7865.3047, + "step": 50710 + }, + { + "epoch": 0.10245760897231301, + "grad_norm": 5913.15966796875, + "learning_rate": 9.99993683466483e-06, + "loss": 3608.0199, + "step": 50720 + }, + { + "epoch": 0.10247780960499683, + "grad_norm": 99645.515625, + "learning_rate": 9.999935067891339e-06, + "loss": 5766.266, + "step": 50730 + }, + { + "epoch": 0.10249801023768064, + "grad_norm": 7537.99658203125, + "learning_rate": 9.999933276748772e-06, + "loss": 3369.1984, + "step": 50740 + }, + { + "epoch": 0.10251821087036446, + "grad_norm": 11954.421875, + "learning_rate": 9.999931461237135e-06, + "loss": 7765.8281, + "step": 50750 + }, + { + "epoch": 0.10253841150304828, + "grad_norm": 47039.03515625, + "learning_rate": 9.99992962135644e-06, + "loss": 2571.9332, + "step": 50760 + }, + { + "epoch": 0.10255861213573209, + "grad_norm": 15762.75390625, + "learning_rate": 9.999927757106693e-06, + "loss": 4027.0215, + "step": 50770 + }, + { + "epoch": 0.1025788127684159, + "grad_norm": 11211.1513671875, + "learning_rate": 9.999925868487905e-06, + "loss": 2990.3391, + "step": 50780 + }, + { + "epoch": 0.10259901340109973, + "grad_norm": 401.0817565917969, + "learning_rate": 9.999923955500085e-06, + "loss": 10406.7602, + "step": 50790 + }, + { + "epoch": 0.10261921403378353, + "grad_norm": 218.9814910888672, + "learning_rate": 9.999922018143242e-06, + "loss": 5426.8012, + "step": 50800 + }, + { + "epoch": 0.10263941466646735, + "grad_norm": 64427.2109375, + "learning_rate": 9.999920056417385e-06, + "loss": 3437.2035, + "step": 50810 + }, + { + "epoch": 0.10265961529915117, + "grad_norm": 4029.251953125, + "learning_rate": 9.999918070322525e-06, + "loss": 7924.1313, + "step": 50820 + }, + { + "epoch": 0.10267981593183499, + "grad_norm": 30019.1328125, + "learning_rate": 9.999916059858669e-06, + "loss": 4734.0113, + "step": 50830 + }, + { + "epoch": 0.1027000165645188, + "grad_norm": 1511.835693359375, + "learning_rate": 9.999914025025831e-06, + "loss": 5232.125, + "step": 50840 + }, + { + "epoch": 0.10272021719720262, + "grad_norm": 4746.341796875, + "learning_rate": 9.999911965824018e-06, + "loss": 3365.4375, + "step": 50850 + }, + { + "epoch": 0.10274041782988644, + "grad_norm": 7202.95654296875, + "learning_rate": 9.99990988225324e-06, + "loss": 2802.1861, + "step": 50860 + }, + { + "epoch": 0.10276061846257024, + "grad_norm": 152.30502319335938, + "learning_rate": 9.999907774313507e-06, + "loss": 10015.0227, + "step": 50870 + }, + { + "epoch": 0.10278081909525406, + "grad_norm": 24969.58203125, + "learning_rate": 9.99990564200483e-06, + "loss": 4089.0887, + "step": 50880 + }, + { + "epoch": 0.10280101972793788, + "grad_norm": 0.0, + "learning_rate": 9.999903485327221e-06, + "loss": 4587.952, + "step": 50890 + }, + { + "epoch": 0.10282122036062169, + "grad_norm": 4635.32080078125, + "learning_rate": 9.999901304280686e-06, + "loss": 5344.9699, + "step": 50900 + }, + { + "epoch": 0.10284142099330551, + "grad_norm": 15902.1240234375, + "learning_rate": 9.99989909886524e-06, + "loss": 7811.6133, + "step": 50910 + }, + { + "epoch": 0.10286162162598933, + "grad_norm": 14328.0263671875, + "learning_rate": 9.999896869080893e-06, + "loss": 7265.0656, + "step": 50920 + }, + { + "epoch": 0.10288182225867314, + "grad_norm": 20717.90625, + "learning_rate": 9.999894614927655e-06, + "loss": 6770.1125, + "step": 50930 + }, + { + "epoch": 0.10290202289135696, + "grad_norm": 17116.109375, + "learning_rate": 9.999892336405534e-06, + "loss": 5244.6938, + "step": 50940 + }, + { + "epoch": 0.10292222352404078, + "grad_norm": 10592.6806640625, + "learning_rate": 9.999890033514547e-06, + "loss": 2748.5008, + "step": 50950 + }, + { + "epoch": 0.10294242415672458, + "grad_norm": 16898.73046875, + "learning_rate": 9.999887706254703e-06, + "loss": 5535.1961, + "step": 50960 + }, + { + "epoch": 0.1029626247894084, + "grad_norm": 3757.0693359375, + "learning_rate": 9.999885354626011e-06, + "loss": 3933.418, + "step": 50970 + }, + { + "epoch": 0.10298282542209222, + "grad_norm": 6461.6103515625, + "learning_rate": 9.999882978628485e-06, + "loss": 2856.1467, + "step": 50980 + }, + { + "epoch": 0.10300302605477604, + "grad_norm": 316026.09375, + "learning_rate": 9.999880578262135e-06, + "loss": 12294.6656, + "step": 50990 + }, + { + "epoch": 0.10302322668745985, + "grad_norm": 33819.96484375, + "learning_rate": 9.999878153526974e-06, + "loss": 5153.6121, + "step": 51000 + }, + { + "epoch": 0.10304342732014367, + "grad_norm": 6487.388671875, + "learning_rate": 9.999875704423015e-06, + "loss": 3570.7168, + "step": 51010 + }, + { + "epoch": 0.10306362795282749, + "grad_norm": 2966.4091796875, + "learning_rate": 9.999873230950265e-06, + "loss": 7985.3984, + "step": 51020 + }, + { + "epoch": 0.1030838285855113, + "grad_norm": 4349.7822265625, + "learning_rate": 9.99987073310874e-06, + "loss": 7532.8555, + "step": 51030 + }, + { + "epoch": 0.10310402921819511, + "grad_norm": 2985.1162109375, + "learning_rate": 9.999868210898454e-06, + "loss": 2358.6975, + "step": 51040 + }, + { + "epoch": 0.10312422985087893, + "grad_norm": 6094.00048828125, + "learning_rate": 9.999865664319414e-06, + "loss": 12471.6836, + "step": 51050 + }, + { + "epoch": 0.10314443048356274, + "grad_norm": 2104.6259765625, + "learning_rate": 9.999863093371638e-06, + "loss": 8380.6695, + "step": 51060 + }, + { + "epoch": 0.10316463111624656, + "grad_norm": 2569.14208984375, + "learning_rate": 9.999860498055134e-06, + "loss": 2027.0986, + "step": 51070 + }, + { + "epoch": 0.10318483174893038, + "grad_norm": 81581.984375, + "learning_rate": 9.999857878369917e-06, + "loss": 6313.3121, + "step": 51080 + }, + { + "epoch": 0.10320503238161419, + "grad_norm": 2002.0601806640625, + "learning_rate": 9.999855234315997e-06, + "loss": 3186.8756, + "step": 51090 + }, + { + "epoch": 0.103225233014298, + "grad_norm": 8090.78271484375, + "learning_rate": 9.99985256589339e-06, + "loss": 13012.975, + "step": 51100 + }, + { + "epoch": 0.10324543364698183, + "grad_norm": 174.92059326171875, + "learning_rate": 9.999849873102108e-06, + "loss": 3087.3996, + "step": 51110 + }, + { + "epoch": 0.10326563427966563, + "grad_norm": 32736.453125, + "learning_rate": 9.999847155942165e-06, + "loss": 2208.9303, + "step": 51120 + }, + { + "epoch": 0.10328583491234945, + "grad_norm": 26929.18359375, + "learning_rate": 9.999844414413574e-06, + "loss": 3766.466, + "step": 51130 + }, + { + "epoch": 0.10330603554503327, + "grad_norm": 0.0, + "learning_rate": 9.999841648516347e-06, + "loss": 2107.9516, + "step": 51140 + }, + { + "epoch": 0.10332623617771709, + "grad_norm": 1167.5474853515625, + "learning_rate": 9.999838858250497e-06, + "loss": 4535.3504, + "step": 51150 + }, + { + "epoch": 0.1033464368104009, + "grad_norm": 576.41064453125, + "learning_rate": 9.99983604361604e-06, + "loss": 2362.1191, + "step": 51160 + }, + { + "epoch": 0.10336663744308472, + "grad_norm": 37839.53125, + "learning_rate": 9.999833204612988e-06, + "loss": 8386.8336, + "step": 51170 + }, + { + "epoch": 0.10338683807576854, + "grad_norm": 36104.08203125, + "learning_rate": 9.999830341241354e-06, + "loss": 4680.5844, + "step": 51180 + }, + { + "epoch": 0.10340703870845235, + "grad_norm": 16249.76171875, + "learning_rate": 9.999827453501156e-06, + "loss": 7268.5156, + "step": 51190 + }, + { + "epoch": 0.10342723934113617, + "grad_norm": 50086.91796875, + "learning_rate": 9.999824541392404e-06, + "loss": 3457.9117, + "step": 51200 + }, + { + "epoch": 0.10344743997381999, + "grad_norm": 47446.8671875, + "learning_rate": 9.999821604915114e-06, + "loss": 7093.8586, + "step": 51210 + }, + { + "epoch": 0.10346764060650379, + "grad_norm": 4128.82763671875, + "learning_rate": 9.999818644069299e-06, + "loss": 3752.9211, + "step": 51220 + }, + { + "epoch": 0.10348784123918761, + "grad_norm": 7000.76416015625, + "learning_rate": 9.999815658854976e-06, + "loss": 1884.4336, + "step": 51230 + }, + { + "epoch": 0.10350804187187143, + "grad_norm": 65467.76953125, + "learning_rate": 9.999812649272157e-06, + "loss": 3965.0527, + "step": 51240 + }, + { + "epoch": 0.10352824250455524, + "grad_norm": 13629.3203125, + "learning_rate": 9.999809615320857e-06, + "loss": 1738.9529, + "step": 51250 + }, + { + "epoch": 0.10354844313723906, + "grad_norm": 45029.20703125, + "learning_rate": 9.999806557001092e-06, + "loss": 4251.8562, + "step": 51260 + }, + { + "epoch": 0.10356864376992288, + "grad_norm": 3890.140380859375, + "learning_rate": 9.999803474312877e-06, + "loss": 7043.3828, + "step": 51270 + }, + { + "epoch": 0.10358884440260668, + "grad_norm": 273332.15625, + "learning_rate": 9.999800367256225e-06, + "loss": 11824.8773, + "step": 51280 + }, + { + "epoch": 0.1036090450352905, + "grad_norm": 35078.4921875, + "learning_rate": 9.999797235831153e-06, + "loss": 10507.0922, + "step": 51290 + }, + { + "epoch": 0.10362924566797432, + "grad_norm": 1307.9931640625, + "learning_rate": 9.999794080037675e-06, + "loss": 2038.149, + "step": 51300 + }, + { + "epoch": 0.10364944630065814, + "grad_norm": 191705.234375, + "learning_rate": 9.999790899875807e-06, + "loss": 8258.8813, + "step": 51310 + }, + { + "epoch": 0.10366964693334195, + "grad_norm": 12850.7548828125, + "learning_rate": 9.999787695345565e-06, + "loss": 6373.4328, + "step": 51320 + }, + { + "epoch": 0.10368984756602577, + "grad_norm": 67986.640625, + "learning_rate": 9.999784466446965e-06, + "loss": 5108.6148, + "step": 51330 + }, + { + "epoch": 0.10371004819870959, + "grad_norm": 166167.75, + "learning_rate": 9.99978121318002e-06, + "loss": 6116.1781, + "step": 51340 + }, + { + "epoch": 0.1037302488313934, + "grad_norm": 8628.43359375, + "learning_rate": 9.99977793554475e-06, + "loss": 4030.8805, + "step": 51350 + }, + { + "epoch": 0.10375044946407722, + "grad_norm": 414.2991943359375, + "learning_rate": 9.999774633541169e-06, + "loss": 2519.0998, + "step": 51360 + }, + { + "epoch": 0.10377065009676104, + "grad_norm": 73018.71875, + "learning_rate": 9.999771307169291e-06, + "loss": 6595.0359, + "step": 51370 + }, + { + "epoch": 0.10379085072944484, + "grad_norm": 6877.998046875, + "learning_rate": 9.999767956429135e-06, + "loss": 3477.816, + "step": 51380 + }, + { + "epoch": 0.10381105136212866, + "grad_norm": 39207.4921875, + "learning_rate": 9.999764581320714e-06, + "loss": 7009.7172, + "step": 51390 + }, + { + "epoch": 0.10383125199481248, + "grad_norm": 198105.484375, + "learning_rate": 9.99976118184405e-06, + "loss": 8483.7336, + "step": 51400 + }, + { + "epoch": 0.10385145262749629, + "grad_norm": 18262.88671875, + "learning_rate": 9.999757757999155e-06, + "loss": 2484.8822, + "step": 51410 + }, + { + "epoch": 0.10387165326018011, + "grad_norm": 17929.6640625, + "learning_rate": 9.999754309786047e-06, + "loss": 9180.9867, + "step": 51420 + }, + { + "epoch": 0.10389185389286393, + "grad_norm": 21167.62109375, + "learning_rate": 9.999750837204743e-06, + "loss": 3033.5848, + "step": 51430 + }, + { + "epoch": 0.10391205452554773, + "grad_norm": 19437.767578125, + "learning_rate": 9.99974734025526e-06, + "loss": 2366.0043, + "step": 51440 + }, + { + "epoch": 0.10393225515823155, + "grad_norm": 29687.412109375, + "learning_rate": 9.999743818937614e-06, + "loss": 5132.2551, + "step": 51450 + }, + { + "epoch": 0.10395245579091537, + "grad_norm": 7621.27197265625, + "learning_rate": 9.999740273251824e-06, + "loss": 6962.7937, + "step": 51460 + }, + { + "epoch": 0.1039726564235992, + "grad_norm": 96193.8125, + "learning_rate": 9.999736703197907e-06, + "loss": 3389.0609, + "step": 51470 + }, + { + "epoch": 0.103992857056283, + "grad_norm": 42140.9609375, + "learning_rate": 9.999733108775878e-06, + "loss": 4041.834, + "step": 51480 + }, + { + "epoch": 0.10401305768896682, + "grad_norm": 3025.92822265625, + "learning_rate": 9.999729489985757e-06, + "loss": 1434.8942, + "step": 51490 + }, + { + "epoch": 0.10403325832165064, + "grad_norm": 113158.390625, + "learning_rate": 9.999725846827562e-06, + "loss": 6084.3402, + "step": 51500 + }, + { + "epoch": 0.10405345895433445, + "grad_norm": 37467.140625, + "learning_rate": 9.999722179301309e-06, + "loss": 7336.8445, + "step": 51510 + }, + { + "epoch": 0.10407365958701827, + "grad_norm": 76770.9140625, + "learning_rate": 9.999718487407015e-06, + "loss": 3791.9453, + "step": 51520 + }, + { + "epoch": 0.10409386021970209, + "grad_norm": 7165.16064453125, + "learning_rate": 9.9997147711447e-06, + "loss": 2870.4338, + "step": 51530 + }, + { + "epoch": 0.10411406085238589, + "grad_norm": 15711.0166015625, + "learning_rate": 9.999711030514383e-06, + "loss": 6614.368, + "step": 51540 + }, + { + "epoch": 0.10413426148506971, + "grad_norm": 7718.08203125, + "learning_rate": 9.99970726551608e-06, + "loss": 2176.7818, + "step": 51550 + }, + { + "epoch": 0.10415446211775353, + "grad_norm": 22137.05078125, + "learning_rate": 9.999703476149808e-06, + "loss": 5358.6664, + "step": 51560 + }, + { + "epoch": 0.10417466275043734, + "grad_norm": 9350.0078125, + "learning_rate": 9.999699662415592e-06, + "loss": 5586.7852, + "step": 51570 + }, + { + "epoch": 0.10419486338312116, + "grad_norm": 612.735595703125, + "learning_rate": 9.999695824313443e-06, + "loss": 9048.568, + "step": 51580 + }, + { + "epoch": 0.10421506401580498, + "grad_norm": 11405.3798828125, + "learning_rate": 9.999691961843385e-06, + "loss": 8452.8328, + "step": 51590 + }, + { + "epoch": 0.10423526464848878, + "grad_norm": 10245.3876953125, + "learning_rate": 9.999688075005434e-06, + "loss": 8898.5273, + "step": 51600 + }, + { + "epoch": 0.1042554652811726, + "grad_norm": 4110.48046875, + "learning_rate": 9.999684163799609e-06, + "loss": 4018.2555, + "step": 51610 + }, + { + "epoch": 0.10427566591385642, + "grad_norm": 3550.77490234375, + "learning_rate": 9.99968022822593e-06, + "loss": 3734.5758, + "step": 51620 + }, + { + "epoch": 0.10429586654654023, + "grad_norm": 11222.220703125, + "learning_rate": 9.999676268284416e-06, + "loss": 2859.4625, + "step": 51630 + }, + { + "epoch": 0.10431606717922405, + "grad_norm": 5720.97802734375, + "learning_rate": 9.999672283975085e-06, + "loss": 14769.7375, + "step": 51640 + }, + { + "epoch": 0.10433626781190787, + "grad_norm": 5236.76708984375, + "learning_rate": 9.99966827529796e-06, + "loss": 11013.0742, + "step": 51650 + }, + { + "epoch": 0.10435646844459169, + "grad_norm": 795.1997680664062, + "learning_rate": 9.999664242253058e-06, + "loss": 1976.9811, + "step": 51660 + }, + { + "epoch": 0.1043766690772755, + "grad_norm": 68133.375, + "learning_rate": 9.999660184840398e-06, + "loss": 11971.0969, + "step": 51670 + }, + { + "epoch": 0.10439686970995932, + "grad_norm": 10589.7041015625, + "learning_rate": 9.999656103060001e-06, + "loss": 4082.2941, + "step": 51680 + }, + { + "epoch": 0.10441707034264314, + "grad_norm": 4382.28076171875, + "learning_rate": 9.999651996911886e-06, + "loss": 6168.991, + "step": 51690 + }, + { + "epoch": 0.10443727097532694, + "grad_norm": 599.9317626953125, + "learning_rate": 9.999647866396073e-06, + "loss": 5416.8469, + "step": 51700 + }, + { + "epoch": 0.10445747160801076, + "grad_norm": 8315.8046875, + "learning_rate": 9.999643711512586e-06, + "loss": 7063.757, + "step": 51710 + }, + { + "epoch": 0.10447767224069458, + "grad_norm": 9467.3662109375, + "learning_rate": 9.999639532261438e-06, + "loss": 1300.4231, + "step": 51720 + }, + { + "epoch": 0.10449787287337839, + "grad_norm": 176224.3125, + "learning_rate": 9.999635328642655e-06, + "loss": 8422.7938, + "step": 51730 + }, + { + "epoch": 0.10451807350606221, + "grad_norm": 0.0, + "learning_rate": 9.999631100656255e-06, + "loss": 4955.9434, + "step": 51740 + }, + { + "epoch": 0.10453827413874603, + "grad_norm": 46971.78515625, + "learning_rate": 9.999626848302261e-06, + "loss": 4901.5402, + "step": 51750 + }, + { + "epoch": 0.10455847477142984, + "grad_norm": 11285.8408203125, + "learning_rate": 9.99962257158069e-06, + "loss": 5110.802, + "step": 51760 + }, + { + "epoch": 0.10457867540411366, + "grad_norm": 115812.9921875, + "learning_rate": 9.999618270491567e-06, + "loss": 4218.0699, + "step": 51770 + }, + { + "epoch": 0.10459887603679748, + "grad_norm": 14799.318359375, + "learning_rate": 9.999613945034909e-06, + "loss": 6295.6613, + "step": 51780 + }, + { + "epoch": 0.10461907666948128, + "grad_norm": 4063.677734375, + "learning_rate": 9.999609595210743e-06, + "loss": 1545.5679, + "step": 51790 + }, + { + "epoch": 0.1046392773021651, + "grad_norm": 12204.6904296875, + "learning_rate": 9.999605221019082e-06, + "loss": 7685.9266, + "step": 51800 + }, + { + "epoch": 0.10465947793484892, + "grad_norm": 3726.083740234375, + "learning_rate": 9.999600822459952e-06, + "loss": 1864.5791, + "step": 51810 + }, + { + "epoch": 0.10467967856753274, + "grad_norm": 18482.021484375, + "learning_rate": 9.999596399533375e-06, + "loss": 8853.95, + "step": 51820 + }, + { + "epoch": 0.10469987920021655, + "grad_norm": 1755.484375, + "learning_rate": 9.999591952239371e-06, + "loss": 3122.8785, + "step": 51830 + }, + { + "epoch": 0.10472007983290037, + "grad_norm": 31523.8828125, + "learning_rate": 9.999587480577964e-06, + "loss": 2531.4053, + "step": 51840 + }, + { + "epoch": 0.10474028046558419, + "grad_norm": 20928.1328125, + "learning_rate": 9.999582984549172e-06, + "loss": 3091.4133, + "step": 51850 + }, + { + "epoch": 0.104760481098268, + "grad_norm": 927.7621459960938, + "learning_rate": 9.99957846415302e-06, + "loss": 3597.3105, + "step": 51860 + }, + { + "epoch": 0.10478068173095181, + "grad_norm": 8429.2587890625, + "learning_rate": 9.999573919389527e-06, + "loss": 4324.5023, + "step": 51870 + }, + { + "epoch": 0.10480088236363563, + "grad_norm": 7446.53515625, + "learning_rate": 9.999569350258717e-06, + "loss": 9908.1453, + "step": 51880 + }, + { + "epoch": 0.10482108299631944, + "grad_norm": 14619.59765625, + "learning_rate": 9.999564756760616e-06, + "loss": 5874.5379, + "step": 51890 + }, + { + "epoch": 0.10484128362900326, + "grad_norm": 7882.68896484375, + "learning_rate": 9.999560138895238e-06, + "loss": 4407.9227, + "step": 51900 + }, + { + "epoch": 0.10486148426168708, + "grad_norm": 36424.94921875, + "learning_rate": 9.999555496662614e-06, + "loss": 4197.3289, + "step": 51910 + }, + { + "epoch": 0.10488168489437089, + "grad_norm": 41522.41796875, + "learning_rate": 9.999550830062762e-06, + "loss": 4438.6484, + "step": 51920 + }, + { + "epoch": 0.1049018855270547, + "grad_norm": 10361.9931640625, + "learning_rate": 9.999546139095706e-06, + "loss": 7084.4937, + "step": 51930 + }, + { + "epoch": 0.10492208615973853, + "grad_norm": 39711.3828125, + "learning_rate": 9.999541423761468e-06, + "loss": 7265.9156, + "step": 51940 + }, + { + "epoch": 0.10494228679242233, + "grad_norm": 2823.685546875, + "learning_rate": 9.999536684060071e-06, + "loss": 9368.257, + "step": 51950 + }, + { + "epoch": 0.10496248742510615, + "grad_norm": 103857.7109375, + "learning_rate": 9.999531919991538e-06, + "loss": 4832.8266, + "step": 51960 + }, + { + "epoch": 0.10498268805778997, + "grad_norm": 37445.74609375, + "learning_rate": 9.999527131555894e-06, + "loss": 5020.1918, + "step": 51970 + }, + { + "epoch": 0.10500288869047379, + "grad_norm": 10564.67578125, + "learning_rate": 9.99952231875316e-06, + "loss": 6309.5, + "step": 51980 + }, + { + "epoch": 0.1050230893231576, + "grad_norm": 3503.320068359375, + "learning_rate": 9.999517481583363e-06, + "loss": 5482.9109, + "step": 51990 + }, + { + "epoch": 0.10504328995584142, + "grad_norm": 37456.265625, + "learning_rate": 9.999512620046523e-06, + "loss": 2908.6582, + "step": 52000 + }, + { + "epoch": 0.10506349058852524, + "grad_norm": 13816.0224609375, + "learning_rate": 9.999507734142663e-06, + "loss": 3277.0367, + "step": 52010 + }, + { + "epoch": 0.10508369122120904, + "grad_norm": 4538.21044921875, + "learning_rate": 9.999502823871809e-06, + "loss": 2581.9072, + "step": 52020 + }, + { + "epoch": 0.10510389185389286, + "grad_norm": 27918.279296875, + "learning_rate": 9.999497889233987e-06, + "loss": 3661.5043, + "step": 52030 + }, + { + "epoch": 0.10512409248657668, + "grad_norm": 15147.4140625, + "learning_rate": 9.999492930229217e-06, + "loss": 2643.0053, + "step": 52040 + }, + { + "epoch": 0.10514429311926049, + "grad_norm": 1009.1744384765625, + "learning_rate": 9.999487946857526e-06, + "loss": 3654.7852, + "step": 52050 + }, + { + "epoch": 0.10516449375194431, + "grad_norm": 13371.6220703125, + "learning_rate": 9.999482939118936e-06, + "loss": 3286.2785, + "step": 52060 + }, + { + "epoch": 0.10518469438462813, + "grad_norm": 39523.99609375, + "learning_rate": 9.999477907013473e-06, + "loss": 7689.2266, + "step": 52070 + }, + { + "epoch": 0.10520489501731194, + "grad_norm": 23114.1328125, + "learning_rate": 9.999472850541161e-06, + "loss": 8770.7078, + "step": 52080 + }, + { + "epoch": 0.10522509564999576, + "grad_norm": 14469.3544921875, + "learning_rate": 9.999467769702023e-06, + "loss": 5025.1328, + "step": 52090 + }, + { + "epoch": 0.10524529628267958, + "grad_norm": 15232.5009765625, + "learning_rate": 9.999462664496088e-06, + "loss": 2719.4457, + "step": 52100 + }, + { + "epoch": 0.10526549691536338, + "grad_norm": 122364.984375, + "learning_rate": 9.999457534923377e-06, + "loss": 8907.2875, + "step": 52110 + }, + { + "epoch": 0.1052856975480472, + "grad_norm": 4972.015625, + "learning_rate": 9.999452380983915e-06, + "loss": 5856.6852, + "step": 52120 + }, + { + "epoch": 0.10530589818073102, + "grad_norm": 853.01220703125, + "learning_rate": 9.999447202677732e-06, + "loss": 3644.8621, + "step": 52130 + }, + { + "epoch": 0.10532609881341484, + "grad_norm": 8523.7939453125, + "learning_rate": 9.999442000004848e-06, + "loss": 4033.8711, + "step": 52140 + }, + { + "epoch": 0.10534629944609865, + "grad_norm": 54691.1640625, + "learning_rate": 9.99943677296529e-06, + "loss": 5340.393, + "step": 52150 + }, + { + "epoch": 0.10536650007878247, + "grad_norm": 16022.9140625, + "learning_rate": 9.999431521559081e-06, + "loss": 3650.8367, + "step": 52160 + }, + { + "epoch": 0.10538670071146629, + "grad_norm": 747.8538208007812, + "learning_rate": 9.999426245786253e-06, + "loss": 1151.2422, + "step": 52170 + }, + { + "epoch": 0.1054069013441501, + "grad_norm": 20547.318359375, + "learning_rate": 9.999420945646828e-06, + "loss": 4988.4105, + "step": 52180 + }, + { + "epoch": 0.10542710197683391, + "grad_norm": 13775.12109375, + "learning_rate": 9.99941562114083e-06, + "loss": 1413.8872, + "step": 52190 + }, + { + "epoch": 0.10544730260951773, + "grad_norm": 773.8280029296875, + "learning_rate": 9.999410272268285e-06, + "loss": 3094.6969, + "step": 52200 + }, + { + "epoch": 0.10546750324220154, + "grad_norm": 325.1138000488281, + "learning_rate": 9.999404899029222e-06, + "loss": 4431.698, + "step": 52210 + }, + { + "epoch": 0.10548770387488536, + "grad_norm": 15645.671875, + "learning_rate": 9.999399501423667e-06, + "loss": 3920.1574, + "step": 52220 + }, + { + "epoch": 0.10550790450756918, + "grad_norm": 433.6936340332031, + "learning_rate": 9.999394079451643e-06, + "loss": 2731.7283, + "step": 52230 + }, + { + "epoch": 0.10552810514025299, + "grad_norm": 1454.37451171875, + "learning_rate": 9.99938863311318e-06, + "loss": 1400.0572, + "step": 52240 + }, + { + "epoch": 0.1055483057729368, + "grad_norm": 6650.13330078125, + "learning_rate": 9.999383162408303e-06, + "loss": 3792.3785, + "step": 52250 + }, + { + "epoch": 0.10556850640562063, + "grad_norm": 80198.375, + "learning_rate": 9.99937766733704e-06, + "loss": 4072.2695, + "step": 52260 + }, + { + "epoch": 0.10558870703830443, + "grad_norm": 14261.462890625, + "learning_rate": 9.999372147899416e-06, + "loss": 2326.8309, + "step": 52270 + }, + { + "epoch": 0.10560890767098825, + "grad_norm": 3027.74658203125, + "learning_rate": 9.999366604095458e-06, + "loss": 9364.1055, + "step": 52280 + }, + { + "epoch": 0.10562910830367207, + "grad_norm": 25210.92578125, + "learning_rate": 9.999361035925193e-06, + "loss": 8020.0148, + "step": 52290 + }, + { + "epoch": 0.10564930893635589, + "grad_norm": 9495.861328125, + "learning_rate": 9.999355443388649e-06, + "loss": 5375.4824, + "step": 52300 + }, + { + "epoch": 0.1056695095690397, + "grad_norm": 46091.6953125, + "learning_rate": 9.999349826485854e-06, + "loss": 4390.3961, + "step": 52310 + }, + { + "epoch": 0.10568971020172352, + "grad_norm": 6132.6123046875, + "learning_rate": 9.999344185216833e-06, + "loss": 4766.7906, + "step": 52320 + }, + { + "epoch": 0.10570991083440734, + "grad_norm": 99416.828125, + "learning_rate": 9.999338519581616e-06, + "loss": 3992.5449, + "step": 52330 + }, + { + "epoch": 0.10573011146709115, + "grad_norm": 18952.236328125, + "learning_rate": 9.999332829580227e-06, + "loss": 4743.6605, + "step": 52340 + }, + { + "epoch": 0.10575031209977497, + "grad_norm": 25047.185546875, + "learning_rate": 9.999327115212698e-06, + "loss": 4289.9289, + "step": 52350 + }, + { + "epoch": 0.10577051273245879, + "grad_norm": 962.4237060546875, + "learning_rate": 9.999321376479054e-06, + "loss": 4007.982, + "step": 52360 + }, + { + "epoch": 0.10579071336514259, + "grad_norm": 2237.6865234375, + "learning_rate": 9.999315613379326e-06, + "loss": 3247.7779, + "step": 52370 + }, + { + "epoch": 0.10581091399782641, + "grad_norm": 1340.9661865234375, + "learning_rate": 9.999309825913538e-06, + "loss": 3421.4602, + "step": 52380 + }, + { + "epoch": 0.10583111463051023, + "grad_norm": 72359.6875, + "learning_rate": 9.999304014081721e-06, + "loss": 3389.4555, + "step": 52390 + }, + { + "epoch": 0.10585131526319404, + "grad_norm": 1174.212646484375, + "learning_rate": 9.999298177883902e-06, + "loss": 3310.9645, + "step": 52400 + }, + { + "epoch": 0.10587151589587786, + "grad_norm": 34658.6484375, + "learning_rate": 9.999292317320112e-06, + "loss": 2345.452, + "step": 52410 + }, + { + "epoch": 0.10589171652856168, + "grad_norm": 38319.78125, + "learning_rate": 9.999286432390376e-06, + "loss": 8655.4281, + "step": 52420 + }, + { + "epoch": 0.10591191716124548, + "grad_norm": 3342.0693359375, + "learning_rate": 9.999280523094724e-06, + "loss": 6624.668, + "step": 52430 + }, + { + "epoch": 0.1059321177939293, + "grad_norm": 2352.605224609375, + "learning_rate": 9.999274589433186e-06, + "loss": 5556.9484, + "step": 52440 + }, + { + "epoch": 0.10595231842661312, + "grad_norm": 12725.5947265625, + "learning_rate": 9.99926863140579e-06, + "loss": 5052.4941, + "step": 52450 + }, + { + "epoch": 0.10597251905929694, + "grad_norm": 37041.22265625, + "learning_rate": 9.999262649012564e-06, + "loss": 3803.7234, + "step": 52460 + }, + { + "epoch": 0.10599271969198075, + "grad_norm": 2428.313232421875, + "learning_rate": 9.99925664225354e-06, + "loss": 721.3964, + "step": 52470 + }, + { + "epoch": 0.10601292032466457, + "grad_norm": 37367.36328125, + "learning_rate": 9.999250611128743e-06, + "loss": 5418.2586, + "step": 52480 + }, + { + "epoch": 0.10603312095734839, + "grad_norm": 1935.044189453125, + "learning_rate": 9.999244555638205e-06, + "loss": 1350.2146, + "step": 52490 + }, + { + "epoch": 0.1060533215900322, + "grad_norm": 5857.724609375, + "learning_rate": 9.999238475781957e-06, + "loss": 1846.5082, + "step": 52500 + }, + { + "epoch": 0.10607352222271602, + "grad_norm": 718.3492431640625, + "learning_rate": 9.999232371560027e-06, + "loss": 3028.1434, + "step": 52510 + }, + { + "epoch": 0.10609372285539984, + "grad_norm": 83232.359375, + "learning_rate": 9.999226242972445e-06, + "loss": 2951.3391, + "step": 52520 + }, + { + "epoch": 0.10611392348808364, + "grad_norm": 19480.734375, + "learning_rate": 9.999220090019238e-06, + "loss": 4460.6617, + "step": 52530 + }, + { + "epoch": 0.10613412412076746, + "grad_norm": 31042.697265625, + "learning_rate": 9.99921391270044e-06, + "loss": 5176.4898, + "step": 52540 + }, + { + "epoch": 0.10615432475345128, + "grad_norm": 38087.43359375, + "learning_rate": 9.999207711016081e-06, + "loss": 5407.8539, + "step": 52550 + }, + { + "epoch": 0.10617452538613509, + "grad_norm": 5520.95556640625, + "learning_rate": 9.999201484966188e-06, + "loss": 2182.5158, + "step": 52560 + }, + { + "epoch": 0.10619472601881891, + "grad_norm": 6102.72509765625, + "learning_rate": 9.999195234550796e-06, + "loss": 5378.7492, + "step": 52570 + }, + { + "epoch": 0.10621492665150273, + "grad_norm": 21263.01171875, + "learning_rate": 9.99918895976993e-06, + "loss": 6842.1219, + "step": 52580 + }, + { + "epoch": 0.10623512728418653, + "grad_norm": 15988.9873046875, + "learning_rate": 9.999182660623625e-06, + "loss": 3278.9281, + "step": 52590 + }, + { + "epoch": 0.10625532791687035, + "grad_norm": 23106.806640625, + "learning_rate": 9.999176337111908e-06, + "loss": 10089.268, + "step": 52600 + }, + { + "epoch": 0.10627552854955417, + "grad_norm": 29198.966796875, + "learning_rate": 9.999169989234815e-06, + "loss": 3404.1617, + "step": 52610 + }, + { + "epoch": 0.106295729182238, + "grad_norm": 41631.50390625, + "learning_rate": 9.999163616992371e-06, + "loss": 7728.0781, + "step": 52620 + }, + { + "epoch": 0.1063159298149218, + "grad_norm": 36.28479766845703, + "learning_rate": 9.999157220384612e-06, + "loss": 923.7778, + "step": 52630 + }, + { + "epoch": 0.10633613044760562, + "grad_norm": 128254.203125, + "learning_rate": 9.999150799411565e-06, + "loss": 4704.7328, + "step": 52640 + }, + { + "epoch": 0.10635633108028944, + "grad_norm": 96933.296875, + "learning_rate": 9.999144354073264e-06, + "loss": 10104.8422, + "step": 52650 + }, + { + "epoch": 0.10637653171297325, + "grad_norm": 36189.7890625, + "learning_rate": 9.999137884369741e-06, + "loss": 7622.6586, + "step": 52660 + }, + { + "epoch": 0.10639673234565707, + "grad_norm": 32495.529296875, + "learning_rate": 9.999131390301027e-06, + "loss": 4310.2867, + "step": 52670 + }, + { + "epoch": 0.10641693297834089, + "grad_norm": 31180.099609375, + "learning_rate": 9.99912487186715e-06, + "loss": 4670.4121, + "step": 52680 + }, + { + "epoch": 0.10643713361102469, + "grad_norm": 149349.515625, + "learning_rate": 9.999118329068148e-06, + "loss": 7113.2859, + "step": 52690 + }, + { + "epoch": 0.10645733424370851, + "grad_norm": 16454.65625, + "learning_rate": 9.999111761904046e-06, + "loss": 10914.6297, + "step": 52700 + }, + { + "epoch": 0.10647753487639233, + "grad_norm": 1162.154296875, + "learning_rate": 9.999105170374881e-06, + "loss": 5534.7895, + "step": 52710 + }, + { + "epoch": 0.10649773550907614, + "grad_norm": 15285.62109375, + "learning_rate": 9.999098554480685e-06, + "loss": 5427.6414, + "step": 52720 + }, + { + "epoch": 0.10651793614175996, + "grad_norm": 0.0, + "learning_rate": 9.999091914221487e-06, + "loss": 4277.8594, + "step": 52730 + }, + { + "epoch": 0.10653813677444378, + "grad_norm": 94635.3984375, + "learning_rate": 9.999085249597322e-06, + "loss": 4935.8289, + "step": 52740 + }, + { + "epoch": 0.10655833740712758, + "grad_norm": 1275.281982421875, + "learning_rate": 9.999078560608221e-06, + "loss": 8809.7633, + "step": 52750 + }, + { + "epoch": 0.1065785380398114, + "grad_norm": 2580.53369140625, + "learning_rate": 9.999071847254219e-06, + "loss": 3332.6715, + "step": 52760 + }, + { + "epoch": 0.10659873867249522, + "grad_norm": 156460.96875, + "learning_rate": 9.999065109535346e-06, + "loss": 7224.2828, + "step": 52770 + }, + { + "epoch": 0.10661893930517904, + "grad_norm": 3471.2685546875, + "learning_rate": 9.999058347451638e-06, + "loss": 4211.3371, + "step": 52780 + }, + { + "epoch": 0.10663913993786285, + "grad_norm": 1361.599609375, + "learning_rate": 9.999051561003124e-06, + "loss": 2848.673, + "step": 52790 + }, + { + "epoch": 0.10665934057054667, + "grad_norm": 20090.56640625, + "learning_rate": 9.99904475018984e-06, + "loss": 2521.0906, + "step": 52800 + }, + { + "epoch": 0.10667954120323049, + "grad_norm": 70938.453125, + "learning_rate": 9.999037915011819e-06, + "loss": 6996.2195, + "step": 52810 + }, + { + "epoch": 0.1066997418359143, + "grad_norm": 33063.63671875, + "learning_rate": 9.999031055469091e-06, + "loss": 10472.0953, + "step": 52820 + }, + { + "epoch": 0.10671994246859812, + "grad_norm": 591.55322265625, + "learning_rate": 9.999024171561693e-06, + "loss": 2882.1414, + "step": 52830 + }, + { + "epoch": 0.10674014310128194, + "grad_norm": 2835.876953125, + "learning_rate": 9.999017263289656e-06, + "loss": 12807.2945, + "step": 52840 + }, + { + "epoch": 0.10676034373396574, + "grad_norm": 181301.90625, + "learning_rate": 9.999010330653019e-06, + "loss": 6540.6203, + "step": 52850 + }, + { + "epoch": 0.10678054436664956, + "grad_norm": 2415.509033203125, + "learning_rate": 9.999003373651809e-06, + "loss": 7016.1078, + "step": 52860 + }, + { + "epoch": 0.10680074499933338, + "grad_norm": 495.6498107910156, + "learning_rate": 9.998996392286062e-06, + "loss": 3810.9535, + "step": 52870 + }, + { + "epoch": 0.10682094563201719, + "grad_norm": 21179.85546875, + "learning_rate": 9.998989386555815e-06, + "loss": 7856.9273, + "step": 52880 + }, + { + "epoch": 0.10684114626470101, + "grad_norm": 34143.875, + "learning_rate": 9.9989823564611e-06, + "loss": 2097.5023, + "step": 52890 + }, + { + "epoch": 0.10686134689738483, + "grad_norm": 299996.1875, + "learning_rate": 9.99897530200195e-06, + "loss": 7995.4031, + "step": 52900 + }, + { + "epoch": 0.10688154753006864, + "grad_norm": 56183.09375, + "learning_rate": 9.998968223178402e-06, + "loss": 5050.5473, + "step": 52910 + }, + { + "epoch": 0.10690174816275246, + "grad_norm": 17935.525390625, + "learning_rate": 9.99896111999049e-06, + "loss": 5969.7422, + "step": 52920 + }, + { + "epoch": 0.10692194879543628, + "grad_norm": 9045.9296875, + "learning_rate": 9.998953992438245e-06, + "loss": 6215.3047, + "step": 52930 + }, + { + "epoch": 0.1069421494281201, + "grad_norm": 31947.943359375, + "learning_rate": 9.998946840521706e-06, + "loss": 3049.3945, + "step": 52940 + }, + { + "epoch": 0.1069623500608039, + "grad_norm": 7439.998046875, + "learning_rate": 9.998939664240908e-06, + "loss": 4330.0086, + "step": 52950 + }, + { + "epoch": 0.10698255069348772, + "grad_norm": 49485.01171875, + "learning_rate": 9.998932463595882e-06, + "loss": 4175.5094, + "step": 52960 + }, + { + "epoch": 0.10700275132617154, + "grad_norm": 1916.4876708984375, + "learning_rate": 9.998925238586666e-06, + "loss": 4210.2949, + "step": 52970 + }, + { + "epoch": 0.10702295195885535, + "grad_norm": 3545.841552734375, + "learning_rate": 9.998917989213296e-06, + "loss": 5115.7383, + "step": 52980 + }, + { + "epoch": 0.10704315259153917, + "grad_norm": 1381.089111328125, + "learning_rate": 9.998910715475804e-06, + "loss": 3736.4945, + "step": 52990 + }, + { + "epoch": 0.10706335322422299, + "grad_norm": 4371.08447265625, + "learning_rate": 9.998903417374228e-06, + "loss": 7366.1781, + "step": 53000 + }, + { + "epoch": 0.1070835538569068, + "grad_norm": 31312.328125, + "learning_rate": 9.998896094908603e-06, + "loss": 8629.2227, + "step": 53010 + }, + { + "epoch": 0.10710375448959061, + "grad_norm": 15286.3486328125, + "learning_rate": 9.998888748078966e-06, + "loss": 3362.1859, + "step": 53020 + }, + { + "epoch": 0.10712395512227443, + "grad_norm": 430.40484619140625, + "learning_rate": 9.99888137688535e-06, + "loss": 4759.4383, + "step": 53030 + }, + { + "epoch": 0.10714415575495824, + "grad_norm": 156.77682495117188, + "learning_rate": 9.998873981327795e-06, + "loss": 3144.6586, + "step": 53040 + }, + { + "epoch": 0.10716435638764206, + "grad_norm": 1552.9857177734375, + "learning_rate": 9.998866561406331e-06, + "loss": 7498.3836, + "step": 53050 + }, + { + "epoch": 0.10718455702032588, + "grad_norm": 203547.046875, + "learning_rate": 9.998859117121e-06, + "loss": 6233.5859, + "step": 53060 + }, + { + "epoch": 0.10720475765300969, + "grad_norm": 21534.681640625, + "learning_rate": 9.998851648471834e-06, + "loss": 2758.7459, + "step": 53070 + }, + { + "epoch": 0.1072249582856935, + "grad_norm": 10544.3974609375, + "learning_rate": 9.998844155458873e-06, + "loss": 3030.3244, + "step": 53080 + }, + { + "epoch": 0.10724515891837733, + "grad_norm": 23586.671875, + "learning_rate": 9.998836638082152e-06, + "loss": 2993.718, + "step": 53090 + }, + { + "epoch": 0.10726535955106115, + "grad_norm": 13334.474609375, + "learning_rate": 9.998829096341706e-06, + "loss": 6003.8086, + "step": 53100 + }, + { + "epoch": 0.10728556018374495, + "grad_norm": 11276.595703125, + "learning_rate": 9.998821530237576e-06, + "loss": 7754.518, + "step": 53110 + }, + { + "epoch": 0.10730576081642877, + "grad_norm": 57094.33203125, + "learning_rate": 9.998813939769794e-06, + "loss": 5234.793, + "step": 53120 + }, + { + "epoch": 0.10732596144911259, + "grad_norm": 18277.931640625, + "learning_rate": 9.9988063249384e-06, + "loss": 4007.5566, + "step": 53130 + }, + { + "epoch": 0.1073461620817964, + "grad_norm": 82089.75, + "learning_rate": 9.99879868574343e-06, + "loss": 6623.7953, + "step": 53140 + }, + { + "epoch": 0.10736636271448022, + "grad_norm": 49818.390625, + "learning_rate": 9.998791022184921e-06, + "loss": 9279.5836, + "step": 53150 + }, + { + "epoch": 0.10738656334716404, + "grad_norm": 3841.69189453125, + "learning_rate": 9.998783334262911e-06, + "loss": 8791.8664, + "step": 53160 + }, + { + "epoch": 0.10740676397984784, + "grad_norm": 1836.21923828125, + "learning_rate": 9.998775621977438e-06, + "loss": 4842.0602, + "step": 53170 + }, + { + "epoch": 0.10742696461253166, + "grad_norm": 1532.50927734375, + "learning_rate": 9.998767885328538e-06, + "loss": 7704.8367, + "step": 53180 + }, + { + "epoch": 0.10744716524521548, + "grad_norm": 14365.8798828125, + "learning_rate": 9.99876012431625e-06, + "loss": 5650.8102, + "step": 53190 + }, + { + "epoch": 0.10746736587789929, + "grad_norm": 131852.390625, + "learning_rate": 9.998752338940612e-06, + "loss": 7884.6234, + "step": 53200 + }, + { + "epoch": 0.10748756651058311, + "grad_norm": 1394.4666748046875, + "learning_rate": 9.99874452920166e-06, + "loss": 4512.107, + "step": 53210 + }, + { + "epoch": 0.10750776714326693, + "grad_norm": 44332.8203125, + "learning_rate": 9.998736695099434e-06, + "loss": 3821.3418, + "step": 53220 + }, + { + "epoch": 0.10752796777595074, + "grad_norm": 157.0779266357422, + "learning_rate": 9.998728836633972e-06, + "loss": 2824.5193, + "step": 53230 + }, + { + "epoch": 0.10754816840863456, + "grad_norm": 17644.220703125, + "learning_rate": 9.998720953805312e-06, + "loss": 1155.4775, + "step": 53240 + }, + { + "epoch": 0.10756836904131838, + "grad_norm": 34976.6015625, + "learning_rate": 9.998713046613492e-06, + "loss": 3524.5992, + "step": 53250 + }, + { + "epoch": 0.1075885696740022, + "grad_norm": 4337.67333984375, + "learning_rate": 9.998705115058552e-06, + "loss": 3947.7734, + "step": 53260 + }, + { + "epoch": 0.107608770306686, + "grad_norm": 11779.4150390625, + "learning_rate": 9.998697159140528e-06, + "loss": 3323.709, + "step": 53270 + }, + { + "epoch": 0.10762897093936982, + "grad_norm": 123743.5, + "learning_rate": 9.998689178859461e-06, + "loss": 10980.7648, + "step": 53280 + }, + { + "epoch": 0.10764917157205364, + "grad_norm": 2393.89208984375, + "learning_rate": 9.99868117421539e-06, + "loss": 2036.8098, + "step": 53290 + }, + { + "epoch": 0.10766937220473745, + "grad_norm": 41704.7578125, + "learning_rate": 9.998673145208351e-06, + "loss": 3254.2984, + "step": 53300 + }, + { + "epoch": 0.10768957283742127, + "grad_norm": 53185.08984375, + "learning_rate": 9.998665091838386e-06, + "loss": 7943.0836, + "step": 53310 + }, + { + "epoch": 0.10770977347010509, + "grad_norm": 120195.3046875, + "learning_rate": 9.998657014105535e-06, + "loss": 8343.7, + "step": 53320 + }, + { + "epoch": 0.1077299741027889, + "grad_norm": 16679.3671875, + "learning_rate": 9.998648912009835e-06, + "loss": 7607.443, + "step": 53330 + }, + { + "epoch": 0.10775017473547271, + "grad_norm": 206133.75, + "learning_rate": 9.998640785551327e-06, + "loss": 5363.8313, + "step": 53340 + }, + { + "epoch": 0.10777037536815653, + "grad_norm": 13216.615234375, + "learning_rate": 9.99863263473005e-06, + "loss": 4407.4398, + "step": 53350 + }, + { + "epoch": 0.10779057600084034, + "grad_norm": 23477.080078125, + "learning_rate": 9.998624459546043e-06, + "loss": 2659.8162, + "step": 53360 + }, + { + "epoch": 0.10781077663352416, + "grad_norm": 945.0263061523438, + "learning_rate": 9.998616259999348e-06, + "loss": 4641.6488, + "step": 53370 + }, + { + "epoch": 0.10783097726620798, + "grad_norm": 150887.734375, + "learning_rate": 9.998608036090003e-06, + "loss": 8619.9641, + "step": 53380 + }, + { + "epoch": 0.10785117789889179, + "grad_norm": 1145.494873046875, + "learning_rate": 9.998599787818048e-06, + "loss": 1865.7602, + "step": 53390 + }, + { + "epoch": 0.1078713785315756, + "grad_norm": 4893.3525390625, + "learning_rate": 9.998591515183524e-06, + "loss": 1772.0549, + "step": 53400 + }, + { + "epoch": 0.10789157916425943, + "grad_norm": 2037.3458251953125, + "learning_rate": 9.998583218186471e-06, + "loss": 1690.7295, + "step": 53410 + }, + { + "epoch": 0.10791177979694325, + "grad_norm": 21212.91796875, + "learning_rate": 9.998574896826931e-06, + "loss": 5500.9488, + "step": 53420 + }, + { + "epoch": 0.10793198042962705, + "grad_norm": 11988.5068359375, + "learning_rate": 9.998566551104943e-06, + "loss": 2956.8602, + "step": 53430 + }, + { + "epoch": 0.10795218106231087, + "grad_norm": 2115.026123046875, + "learning_rate": 9.998558181020547e-06, + "loss": 10361.0875, + "step": 53440 + }, + { + "epoch": 0.10797238169499469, + "grad_norm": 222.88046264648438, + "learning_rate": 9.998549786573785e-06, + "loss": 5615.2629, + "step": 53450 + }, + { + "epoch": 0.1079925823276785, + "grad_norm": 20552.1953125, + "learning_rate": 9.998541367764699e-06, + "loss": 6087.9887, + "step": 53460 + }, + { + "epoch": 0.10801278296036232, + "grad_norm": 28747.029296875, + "learning_rate": 9.998532924593327e-06, + "loss": 3880.7828, + "step": 53470 + }, + { + "epoch": 0.10803298359304614, + "grad_norm": 2612.10400390625, + "learning_rate": 9.99852445705971e-06, + "loss": 6094.3398, + "step": 53480 + }, + { + "epoch": 0.10805318422572995, + "grad_norm": 428.80633544921875, + "learning_rate": 9.998515965163894e-06, + "loss": 2506.1957, + "step": 53490 + }, + { + "epoch": 0.10807338485841377, + "grad_norm": 1303.1915283203125, + "learning_rate": 9.998507448905917e-06, + "loss": 7327.0586, + "step": 53500 + }, + { + "epoch": 0.10809358549109759, + "grad_norm": 17006.7109375, + "learning_rate": 9.99849890828582e-06, + "loss": 2275.6521, + "step": 53510 + }, + { + "epoch": 0.10811378612378139, + "grad_norm": 17990.7265625, + "learning_rate": 9.998490343303646e-06, + "loss": 2016.5438, + "step": 53520 + }, + { + "epoch": 0.10813398675646521, + "grad_norm": 17417.822265625, + "learning_rate": 9.998481753959436e-06, + "loss": 6503.1219, + "step": 53530 + }, + { + "epoch": 0.10815418738914903, + "grad_norm": 2570.0029296875, + "learning_rate": 9.998473140253234e-06, + "loss": 4870.1168, + "step": 53540 + }, + { + "epoch": 0.10817438802183284, + "grad_norm": 3217.829345703125, + "learning_rate": 9.998464502185076e-06, + "loss": 8250.6156, + "step": 53550 + }, + { + "epoch": 0.10819458865451666, + "grad_norm": 20804.65625, + "learning_rate": 9.998455839755013e-06, + "loss": 3106.4814, + "step": 53560 + }, + { + "epoch": 0.10821478928720048, + "grad_norm": 3527.107666015625, + "learning_rate": 9.99844715296308e-06, + "loss": 2958.4379, + "step": 53570 + }, + { + "epoch": 0.1082349899198843, + "grad_norm": 10621.658203125, + "learning_rate": 9.998438441809322e-06, + "loss": 5697.7914, + "step": 53580 + }, + { + "epoch": 0.1082551905525681, + "grad_norm": 24409.6796875, + "learning_rate": 9.998429706293781e-06, + "loss": 2820.7146, + "step": 53590 + }, + { + "epoch": 0.10827539118525192, + "grad_norm": 16965.806640625, + "learning_rate": 9.9984209464165e-06, + "loss": 5175.1359, + "step": 53600 + }, + { + "epoch": 0.10829559181793574, + "grad_norm": 2451.957763671875, + "learning_rate": 9.998412162177523e-06, + "loss": 5994.0898, + "step": 53610 + }, + { + "epoch": 0.10831579245061955, + "grad_norm": 7701.220703125, + "learning_rate": 9.99840335357689e-06, + "loss": 5157.0855, + "step": 53620 + }, + { + "epoch": 0.10833599308330337, + "grad_norm": 13911.6982421875, + "learning_rate": 9.998394520614645e-06, + "loss": 4246.4801, + "step": 53630 + }, + { + "epoch": 0.10835619371598719, + "grad_norm": 4337.83251953125, + "learning_rate": 9.998385663290833e-06, + "loss": 5034.8109, + "step": 53640 + }, + { + "epoch": 0.108376394348671, + "grad_norm": 7273.7509765625, + "learning_rate": 9.998376781605493e-06, + "loss": 3827.0312, + "step": 53650 + }, + { + "epoch": 0.10839659498135482, + "grad_norm": 161280.859375, + "learning_rate": 9.998367875558673e-06, + "loss": 6877.7906, + "step": 53660 + }, + { + "epoch": 0.10841679561403864, + "grad_norm": 12618.4453125, + "learning_rate": 9.998358945150412e-06, + "loss": 3062.1422, + "step": 53670 + }, + { + "epoch": 0.10843699624672244, + "grad_norm": 9707.4091796875, + "learning_rate": 9.998349990380757e-06, + "loss": 3344.2453, + "step": 53680 + }, + { + "epoch": 0.10845719687940626, + "grad_norm": 539.64208984375, + "learning_rate": 9.998341011249752e-06, + "loss": 3739.091, + "step": 53690 + }, + { + "epoch": 0.10847739751209008, + "grad_norm": 690.81787109375, + "learning_rate": 9.998332007757436e-06, + "loss": 2484.7459, + "step": 53700 + }, + { + "epoch": 0.10849759814477389, + "grad_norm": 26048.484375, + "learning_rate": 9.998322979903859e-06, + "loss": 1595.0415, + "step": 53710 + }, + { + "epoch": 0.10851779877745771, + "grad_norm": 110.25146484375, + "learning_rate": 9.99831392768906e-06, + "loss": 2896.5855, + "step": 53720 + }, + { + "epoch": 0.10853799941014153, + "grad_norm": 5241.0419921875, + "learning_rate": 9.998304851113086e-06, + "loss": 1976.2467, + "step": 53730 + }, + { + "epoch": 0.10855820004282535, + "grad_norm": 2415.586181640625, + "learning_rate": 9.99829575017598e-06, + "loss": 4082.8402, + "step": 53740 + }, + { + "epoch": 0.10857840067550915, + "grad_norm": 61691.2890625, + "learning_rate": 9.998286624877786e-06, + "loss": 4260.2328, + "step": 53750 + }, + { + "epoch": 0.10859860130819297, + "grad_norm": 9458.337890625, + "learning_rate": 9.998277475218552e-06, + "loss": 1419.8496, + "step": 53760 + }, + { + "epoch": 0.1086188019408768, + "grad_norm": 10309.1845703125, + "learning_rate": 9.998268301198317e-06, + "loss": 4825.6141, + "step": 53770 + }, + { + "epoch": 0.1086390025735606, + "grad_norm": 30670.890625, + "learning_rate": 9.99825910281713e-06, + "loss": 2275.0633, + "step": 53780 + }, + { + "epoch": 0.10865920320624442, + "grad_norm": 21465.986328125, + "learning_rate": 9.998249880075033e-06, + "loss": 5697.9621, + "step": 53790 + }, + { + "epoch": 0.10867940383892824, + "grad_norm": 27963.185546875, + "learning_rate": 9.998240632972073e-06, + "loss": 11571.6484, + "step": 53800 + }, + { + "epoch": 0.10869960447161205, + "grad_norm": 3424.491455078125, + "learning_rate": 9.998231361508295e-06, + "loss": 10157.6359, + "step": 53810 + }, + { + "epoch": 0.10871980510429587, + "grad_norm": 1114.0341796875, + "learning_rate": 9.998222065683743e-06, + "loss": 6755.4445, + "step": 53820 + }, + { + "epoch": 0.10874000573697969, + "grad_norm": 8906.3916015625, + "learning_rate": 9.998212745498464e-06, + "loss": 4623.0973, + "step": 53830 + }, + { + "epoch": 0.10876020636966349, + "grad_norm": 1126.339111328125, + "learning_rate": 9.9982034009525e-06, + "loss": 4026.1551, + "step": 53840 + }, + { + "epoch": 0.10878040700234731, + "grad_norm": 26562.521484375, + "learning_rate": 9.9981940320459e-06, + "loss": 6360.352, + "step": 53850 + }, + { + "epoch": 0.10880060763503113, + "grad_norm": 1623.2635498046875, + "learning_rate": 9.998184638778708e-06, + "loss": 2377.223, + "step": 53860 + }, + { + "epoch": 0.10882080826771494, + "grad_norm": 33709.734375, + "learning_rate": 9.99817522115097e-06, + "loss": 2628.141, + "step": 53870 + }, + { + "epoch": 0.10884100890039876, + "grad_norm": 13342.775390625, + "learning_rate": 9.998165779162734e-06, + "loss": 2068.8539, + "step": 53880 + }, + { + "epoch": 0.10886120953308258, + "grad_norm": 100124.25, + "learning_rate": 9.998156312814043e-06, + "loss": 9154.4164, + "step": 53890 + }, + { + "epoch": 0.1088814101657664, + "grad_norm": 754.1126708984375, + "learning_rate": 9.998146822104943e-06, + "loss": 8358.7203, + "step": 53900 + }, + { + "epoch": 0.1089016107984502, + "grad_norm": 20092.73828125, + "learning_rate": 9.998137307035486e-06, + "loss": 2018.3604, + "step": 53910 + }, + { + "epoch": 0.10892181143113402, + "grad_norm": 4136.91357421875, + "learning_rate": 9.99812776760571e-06, + "loss": 2957.151, + "step": 53920 + }, + { + "epoch": 0.10894201206381784, + "grad_norm": 26106.203125, + "learning_rate": 9.998118203815666e-06, + "loss": 4337.9789, + "step": 53930 + }, + { + "epoch": 0.10896221269650165, + "grad_norm": 473.61151123046875, + "learning_rate": 9.9981086156654e-06, + "loss": 2535.9977, + "step": 53940 + }, + { + "epoch": 0.10898241332918547, + "grad_norm": 8716.7802734375, + "learning_rate": 9.99809900315496e-06, + "loss": 5722.7082, + "step": 53950 + }, + { + "epoch": 0.10900261396186929, + "grad_norm": 708.4292602539062, + "learning_rate": 9.998089366284392e-06, + "loss": 5810.2984, + "step": 53960 + }, + { + "epoch": 0.1090228145945531, + "grad_norm": 3476.0546875, + "learning_rate": 9.99807970505374e-06, + "loss": 6402.9574, + "step": 53970 + }, + { + "epoch": 0.10904301522723692, + "grad_norm": 4076.883056640625, + "learning_rate": 9.998070019463055e-06, + "loss": 3956.5555, + "step": 53980 + }, + { + "epoch": 0.10906321585992074, + "grad_norm": 7867.77001953125, + "learning_rate": 9.998060309512384e-06, + "loss": 2171.1113, + "step": 53990 + }, + { + "epoch": 0.10908341649260454, + "grad_norm": 827.490966796875, + "learning_rate": 9.998050575201772e-06, + "loss": 3472.6961, + "step": 54000 + }, + { + "epoch": 0.10910361712528836, + "grad_norm": 20614.021484375, + "learning_rate": 9.99804081653127e-06, + "loss": 4589.6535, + "step": 54010 + }, + { + "epoch": 0.10912381775797218, + "grad_norm": 531.4343872070312, + "learning_rate": 9.99803103350092e-06, + "loss": 4948.9387, + "step": 54020 + }, + { + "epoch": 0.10914401839065599, + "grad_norm": 5072.2626953125, + "learning_rate": 9.998021226110775e-06, + "loss": 2334.3906, + "step": 54030 + }, + { + "epoch": 0.10916421902333981, + "grad_norm": 53075.00390625, + "learning_rate": 9.99801139436088e-06, + "loss": 3990.3672, + "step": 54040 + }, + { + "epoch": 0.10918441965602363, + "grad_norm": 1257.4490966796875, + "learning_rate": 9.998001538251283e-06, + "loss": 6977.3758, + "step": 54050 + }, + { + "epoch": 0.10920462028870745, + "grad_norm": 412.9657287597656, + "learning_rate": 9.997991657782033e-06, + "loss": 5131.9707, + "step": 54060 + }, + { + "epoch": 0.10922482092139126, + "grad_norm": 425.0726318359375, + "learning_rate": 9.997981752953179e-06, + "loss": 4494.6801, + "step": 54070 + }, + { + "epoch": 0.10924502155407508, + "grad_norm": 12450.2294921875, + "learning_rate": 9.997971823764766e-06, + "loss": 9108.5812, + "step": 54080 + }, + { + "epoch": 0.1092652221867589, + "grad_norm": 128191.890625, + "learning_rate": 9.997961870216849e-06, + "loss": 8835.4445, + "step": 54090 + }, + { + "epoch": 0.1092854228194427, + "grad_norm": 3049.612060546875, + "learning_rate": 9.997951892309468e-06, + "loss": 2317.3311, + "step": 54100 + }, + { + "epoch": 0.10930562345212652, + "grad_norm": 16235.4697265625, + "learning_rate": 9.997941890042677e-06, + "loss": 5523.1164, + "step": 54110 + }, + { + "epoch": 0.10932582408481034, + "grad_norm": 513.9993896484375, + "learning_rate": 9.997931863416522e-06, + "loss": 6951.8477, + "step": 54120 + }, + { + "epoch": 0.10934602471749415, + "grad_norm": 44560.1953125, + "learning_rate": 9.997921812431055e-06, + "loss": 7568.3734, + "step": 54130 + }, + { + "epoch": 0.10936622535017797, + "grad_norm": 517.510009765625, + "learning_rate": 9.997911737086322e-06, + "loss": 5470.7777, + "step": 54140 + }, + { + "epoch": 0.10938642598286179, + "grad_norm": 23988.20703125, + "learning_rate": 9.997901637382375e-06, + "loss": 4839.4738, + "step": 54150 + }, + { + "epoch": 0.1094066266155456, + "grad_norm": 22544.16796875, + "learning_rate": 9.99789151331926e-06, + "loss": 2528.3027, + "step": 54160 + }, + { + "epoch": 0.10942682724822941, + "grad_norm": 86973.140625, + "learning_rate": 9.997881364897028e-06, + "loss": 4157.7602, + "step": 54170 + }, + { + "epoch": 0.10944702788091323, + "grad_norm": 30569.537109375, + "learning_rate": 9.99787119211573e-06, + "loss": 2009.9836, + "step": 54180 + }, + { + "epoch": 0.10946722851359704, + "grad_norm": 12969.32421875, + "learning_rate": 9.997860994975412e-06, + "loss": 7497.0008, + "step": 54190 + }, + { + "epoch": 0.10948742914628086, + "grad_norm": 777.6026611328125, + "learning_rate": 9.997850773476126e-06, + "loss": 3983.3262, + "step": 54200 + }, + { + "epoch": 0.10950762977896468, + "grad_norm": 931.8092651367188, + "learning_rate": 9.997840527617921e-06, + "loss": 6067.3016, + "step": 54210 + }, + { + "epoch": 0.1095278304116485, + "grad_norm": 33921.8515625, + "learning_rate": 9.99783025740085e-06, + "loss": 2945.1687, + "step": 54220 + }, + { + "epoch": 0.1095480310443323, + "grad_norm": 893.8612670898438, + "learning_rate": 9.997819962824958e-06, + "loss": 2142.4383, + "step": 54230 + }, + { + "epoch": 0.10956823167701613, + "grad_norm": 56865.64453125, + "learning_rate": 9.9978096438903e-06, + "loss": 2736.9033, + "step": 54240 + }, + { + "epoch": 0.10958843230969995, + "grad_norm": 673.6599731445312, + "learning_rate": 9.997799300596921e-06, + "loss": 3891.5188, + "step": 54250 + }, + { + "epoch": 0.10960863294238375, + "grad_norm": 5618.81298828125, + "learning_rate": 9.997788932944877e-06, + "loss": 2646.5344, + "step": 54260 + }, + { + "epoch": 0.10962883357506757, + "grad_norm": 6845.61279296875, + "learning_rate": 9.997778540934213e-06, + "loss": 1681.9572, + "step": 54270 + }, + { + "epoch": 0.10964903420775139, + "grad_norm": 34659.90625, + "learning_rate": 9.997768124564986e-06, + "loss": 4234.1422, + "step": 54280 + }, + { + "epoch": 0.1096692348404352, + "grad_norm": 485.23492431640625, + "learning_rate": 9.997757683837242e-06, + "loss": 3789.1109, + "step": 54290 + }, + { + "epoch": 0.10968943547311902, + "grad_norm": 2512.346923828125, + "learning_rate": 9.997747218751032e-06, + "loss": 2774.1367, + "step": 54300 + }, + { + "epoch": 0.10970963610580284, + "grad_norm": 11003.0244140625, + "learning_rate": 9.997736729306409e-06, + "loss": 4657.4004, + "step": 54310 + }, + { + "epoch": 0.10972983673848664, + "grad_norm": 682.3917236328125, + "learning_rate": 9.997726215503422e-06, + "loss": 4264.043, + "step": 54320 + }, + { + "epoch": 0.10975003737117046, + "grad_norm": 3842.923095703125, + "learning_rate": 9.997715677342126e-06, + "loss": 9543.975, + "step": 54330 + }, + { + "epoch": 0.10977023800385428, + "grad_norm": 6247.3974609375, + "learning_rate": 9.99770511482257e-06, + "loss": 7046.5391, + "step": 54340 + }, + { + "epoch": 0.10979043863653809, + "grad_norm": 15123.05859375, + "learning_rate": 9.997694527944804e-06, + "loss": 6483.1879, + "step": 54350 + }, + { + "epoch": 0.10981063926922191, + "grad_norm": 7651.1767578125, + "learning_rate": 9.99768391670888e-06, + "loss": 1372.0981, + "step": 54360 + }, + { + "epoch": 0.10983083990190573, + "grad_norm": 1910.1787109375, + "learning_rate": 9.997673281114852e-06, + "loss": 4569.4637, + "step": 54370 + }, + { + "epoch": 0.10985104053458955, + "grad_norm": 33822.27734375, + "learning_rate": 9.99766262116277e-06, + "loss": 6065.248, + "step": 54380 + }, + { + "epoch": 0.10987124116727336, + "grad_norm": 3884.65576171875, + "learning_rate": 9.997651936852689e-06, + "loss": 3803.3438, + "step": 54390 + }, + { + "epoch": 0.10989144179995718, + "grad_norm": 12663.931640625, + "learning_rate": 9.997641228184656e-06, + "loss": 2264.5621, + "step": 54400 + }, + { + "epoch": 0.109911642432641, + "grad_norm": 34595.50390625, + "learning_rate": 9.997630495158728e-06, + "loss": 3512.325, + "step": 54410 + }, + { + "epoch": 0.1099318430653248, + "grad_norm": 25391.79296875, + "learning_rate": 9.997619737774953e-06, + "loss": 7843.7109, + "step": 54420 + }, + { + "epoch": 0.10995204369800862, + "grad_norm": 362.8072204589844, + "learning_rate": 9.997608956033386e-06, + "loss": 2616.3967, + "step": 54430 + }, + { + "epoch": 0.10997224433069244, + "grad_norm": 100962.625, + "learning_rate": 9.99759814993408e-06, + "loss": 6397.4039, + "step": 54440 + }, + { + "epoch": 0.10999244496337625, + "grad_norm": 55933.9375, + "learning_rate": 9.997587319477084e-06, + "loss": 3826.0902, + "step": 54450 + }, + { + "epoch": 0.11001264559606007, + "grad_norm": 1301.2432861328125, + "learning_rate": 9.997576464662458e-06, + "loss": 2853.6965, + "step": 54460 + }, + { + "epoch": 0.11003284622874389, + "grad_norm": 1364.34716796875, + "learning_rate": 9.997565585490247e-06, + "loss": 5908.9563, + "step": 54470 + }, + { + "epoch": 0.1100530468614277, + "grad_norm": 16520.150390625, + "learning_rate": 9.997554681960508e-06, + "loss": 4876.6621, + "step": 54480 + }, + { + "epoch": 0.11007324749411151, + "grad_norm": 749.9754028320312, + "learning_rate": 9.997543754073295e-06, + "loss": 2088.5072, + "step": 54490 + }, + { + "epoch": 0.11009344812679533, + "grad_norm": 30443.796875, + "learning_rate": 9.997532801828659e-06, + "loss": 3023.8535, + "step": 54500 + }, + { + "epoch": 0.11011364875947914, + "grad_norm": 6070.41845703125, + "learning_rate": 9.997521825226654e-06, + "loss": 2866.6957, + "step": 54510 + }, + { + "epoch": 0.11013384939216296, + "grad_norm": 41752.15625, + "learning_rate": 9.997510824267334e-06, + "loss": 2397.6857, + "step": 54520 + }, + { + "epoch": 0.11015405002484678, + "grad_norm": 3871.4306640625, + "learning_rate": 9.997499798950752e-06, + "loss": 4841.6547, + "step": 54530 + }, + { + "epoch": 0.1101742506575306, + "grad_norm": 32978.4609375, + "learning_rate": 9.997488749276962e-06, + "loss": 4281.7844, + "step": 54540 + }, + { + "epoch": 0.1101944512902144, + "grad_norm": 1877.014892578125, + "learning_rate": 9.997477675246018e-06, + "loss": 2333.5436, + "step": 54550 + }, + { + "epoch": 0.11021465192289823, + "grad_norm": 74060.1484375, + "learning_rate": 9.997466576857974e-06, + "loss": 9124.1586, + "step": 54560 + }, + { + "epoch": 0.11023485255558205, + "grad_norm": 2173.612060546875, + "learning_rate": 9.997455454112885e-06, + "loss": 2057.0039, + "step": 54570 + }, + { + "epoch": 0.11025505318826585, + "grad_norm": 179588.140625, + "learning_rate": 9.997444307010804e-06, + "loss": 7956.0172, + "step": 54580 + }, + { + "epoch": 0.11027525382094967, + "grad_norm": 12619.2451171875, + "learning_rate": 9.997433135551786e-06, + "loss": 6639.5852, + "step": 54590 + }, + { + "epoch": 0.11029545445363349, + "grad_norm": 29581.251953125, + "learning_rate": 9.997421939735885e-06, + "loss": 4083.7469, + "step": 54600 + }, + { + "epoch": 0.1103156550863173, + "grad_norm": 1129.6707763671875, + "learning_rate": 9.997410719563155e-06, + "loss": 3239.5666, + "step": 54610 + }, + { + "epoch": 0.11033585571900112, + "grad_norm": 503.1601257324219, + "learning_rate": 9.997399475033648e-06, + "loss": 2474.2996, + "step": 54620 + }, + { + "epoch": 0.11035605635168494, + "grad_norm": 6981.81201171875, + "learning_rate": 9.997388206147427e-06, + "loss": 7363.4633, + "step": 54630 + }, + { + "epoch": 0.11037625698436875, + "grad_norm": 3464.943115234375, + "learning_rate": 9.99737691290454e-06, + "loss": 3901.1309, + "step": 54640 + }, + { + "epoch": 0.11039645761705257, + "grad_norm": 95625.25, + "learning_rate": 9.997365595305045e-06, + "loss": 6201.1098, + "step": 54650 + }, + { + "epoch": 0.11041665824973639, + "grad_norm": 14615.6650390625, + "learning_rate": 9.997354253348994e-06, + "loss": 1928.1244, + "step": 54660 + }, + { + "epoch": 0.11043685888242019, + "grad_norm": 192286.140625, + "learning_rate": 9.997342887036446e-06, + "loss": 6000.0133, + "step": 54670 + }, + { + "epoch": 0.11045705951510401, + "grad_norm": 161183.875, + "learning_rate": 9.997331496367455e-06, + "loss": 7127.6008, + "step": 54680 + }, + { + "epoch": 0.11047726014778783, + "grad_norm": 14064.3046875, + "learning_rate": 9.997320081342076e-06, + "loss": 3443.8457, + "step": 54690 + }, + { + "epoch": 0.11049746078047164, + "grad_norm": 6198.77099609375, + "learning_rate": 9.997308641960365e-06, + "loss": 7120.0391, + "step": 54700 + }, + { + "epoch": 0.11051766141315546, + "grad_norm": 7252.20166015625, + "learning_rate": 9.997297178222378e-06, + "loss": 2012.3727, + "step": 54710 + }, + { + "epoch": 0.11053786204583928, + "grad_norm": 1122.0821533203125, + "learning_rate": 9.997285690128172e-06, + "loss": 6079.4914, + "step": 54720 + }, + { + "epoch": 0.1105580626785231, + "grad_norm": 6953.65869140625, + "learning_rate": 9.997274177677799e-06, + "loss": 3713.7469, + "step": 54730 + }, + { + "epoch": 0.1105782633112069, + "grad_norm": 23710.494140625, + "learning_rate": 9.997262640871319e-06, + "loss": 8017.5984, + "step": 54740 + }, + { + "epoch": 0.11059846394389072, + "grad_norm": 7876.19775390625, + "learning_rate": 9.997251079708788e-06, + "loss": 1544.3514, + "step": 54750 + }, + { + "epoch": 0.11061866457657454, + "grad_norm": 1701.9495849609375, + "learning_rate": 9.997239494190258e-06, + "loss": 4749.9109, + "step": 54760 + }, + { + "epoch": 0.11063886520925835, + "grad_norm": 6498.0068359375, + "learning_rate": 9.997227884315792e-06, + "loss": 3600.2637, + "step": 54770 + }, + { + "epoch": 0.11065906584194217, + "grad_norm": 23484.72265625, + "learning_rate": 9.997216250085441e-06, + "loss": 2416.601, + "step": 54780 + }, + { + "epoch": 0.11067926647462599, + "grad_norm": 23360.91015625, + "learning_rate": 9.997204591499266e-06, + "loss": 2862.3529, + "step": 54790 + }, + { + "epoch": 0.1106994671073098, + "grad_norm": 46505.2421875, + "learning_rate": 9.997192908557322e-06, + "loss": 3413.4281, + "step": 54800 + }, + { + "epoch": 0.11071966773999362, + "grad_norm": 10902.9033203125, + "learning_rate": 9.997181201259664e-06, + "loss": 4637.4484, + "step": 54810 + }, + { + "epoch": 0.11073986837267744, + "grad_norm": 185372.9375, + "learning_rate": 9.997169469606353e-06, + "loss": 6311.3496, + "step": 54820 + }, + { + "epoch": 0.11076006900536124, + "grad_norm": 3500.91357421875, + "learning_rate": 9.997157713597444e-06, + "loss": 4261.6121, + "step": 54830 + }, + { + "epoch": 0.11078026963804506, + "grad_norm": 20341.36328125, + "learning_rate": 9.997145933232994e-06, + "loss": 6429.3125, + "step": 54840 + }, + { + "epoch": 0.11080047027072888, + "grad_norm": 9728.8486328125, + "learning_rate": 9.99713412851306e-06, + "loss": 2784.4031, + "step": 54850 + }, + { + "epoch": 0.11082067090341269, + "grad_norm": 34319.8046875, + "learning_rate": 9.9971222994377e-06, + "loss": 3204.3385, + "step": 54860 + }, + { + "epoch": 0.11084087153609651, + "grad_norm": 5740.38037109375, + "learning_rate": 9.997110446006974e-06, + "loss": 4105.3504, + "step": 54870 + }, + { + "epoch": 0.11086107216878033, + "grad_norm": 3267.24462890625, + "learning_rate": 9.997098568220937e-06, + "loss": 2918.3607, + "step": 54880 + }, + { + "epoch": 0.11088127280146415, + "grad_norm": 24133.806640625, + "learning_rate": 9.997086666079647e-06, + "loss": 7687.5039, + "step": 54890 + }, + { + "epoch": 0.11090147343414795, + "grad_norm": 147.5865478515625, + "learning_rate": 9.997074739583162e-06, + "loss": 6872.7695, + "step": 54900 + }, + { + "epoch": 0.11092167406683177, + "grad_norm": 199.70077514648438, + "learning_rate": 9.997062788731541e-06, + "loss": 5380.0008, + "step": 54910 + }, + { + "epoch": 0.1109418746995156, + "grad_norm": 13696.0732421875, + "learning_rate": 9.997050813524843e-06, + "loss": 2086.9426, + "step": 54920 + }, + { + "epoch": 0.1109620753321994, + "grad_norm": 1632.1534423828125, + "learning_rate": 9.997038813963126e-06, + "loss": 4446.6348, + "step": 54930 + }, + { + "epoch": 0.11098227596488322, + "grad_norm": 4829.3115234375, + "learning_rate": 9.997026790046446e-06, + "loss": 3325.323, + "step": 54940 + }, + { + "epoch": 0.11100247659756704, + "grad_norm": 9197.9765625, + "learning_rate": 9.997014741774866e-06, + "loss": 3485.7488, + "step": 54950 + }, + { + "epoch": 0.11102267723025085, + "grad_norm": 183.0758056640625, + "learning_rate": 9.99700266914844e-06, + "loss": 3512.2223, + "step": 54960 + }, + { + "epoch": 0.11104287786293467, + "grad_norm": 4374.107421875, + "learning_rate": 9.996990572167229e-06, + "loss": 5665.1824, + "step": 54970 + }, + { + "epoch": 0.11106307849561849, + "grad_norm": 569.2296752929688, + "learning_rate": 9.996978450831293e-06, + "loss": 2395.3918, + "step": 54980 + }, + { + "epoch": 0.11108327912830229, + "grad_norm": 107799.2578125, + "learning_rate": 9.99696630514069e-06, + "loss": 8269.2359, + "step": 54990 + }, + { + "epoch": 0.11110347976098611, + "grad_norm": 2438.061279296875, + "learning_rate": 9.99695413509548e-06, + "loss": 2097.7146, + "step": 55000 + }, + { + "epoch": 0.11112368039366993, + "grad_norm": 7055.275390625, + "learning_rate": 9.99694194069572e-06, + "loss": 1498.0023, + "step": 55010 + }, + { + "epoch": 0.11114388102635374, + "grad_norm": 7516.52783203125, + "learning_rate": 9.996929721941472e-06, + "loss": 2804.5531, + "step": 55020 + }, + { + "epoch": 0.11116408165903756, + "grad_norm": 23923.365234375, + "learning_rate": 9.996917478832794e-06, + "loss": 3716.6215, + "step": 55030 + }, + { + "epoch": 0.11118428229172138, + "grad_norm": 988.2357788085938, + "learning_rate": 9.996905211369748e-06, + "loss": 3750.8789, + "step": 55040 + }, + { + "epoch": 0.1112044829244052, + "grad_norm": 4953.759765625, + "learning_rate": 9.99689291955239e-06, + "loss": 2348.9508, + "step": 55050 + }, + { + "epoch": 0.111224683557089, + "grad_norm": 709.515625, + "learning_rate": 9.996880603380784e-06, + "loss": 5365.6652, + "step": 55060 + }, + { + "epoch": 0.11124488418977282, + "grad_norm": 4743.76123046875, + "learning_rate": 9.996868262854986e-06, + "loss": 4214.6387, + "step": 55070 + }, + { + "epoch": 0.11126508482245664, + "grad_norm": 1816.921630859375, + "learning_rate": 9.996855897975058e-06, + "loss": 6016.9336, + "step": 55080 + }, + { + "epoch": 0.11128528545514045, + "grad_norm": 12514.736328125, + "learning_rate": 9.996843508741061e-06, + "loss": 1669.5365, + "step": 55090 + }, + { + "epoch": 0.11130548608782427, + "grad_norm": 4076.854248046875, + "learning_rate": 9.996831095153054e-06, + "loss": 2320.6836, + "step": 55100 + }, + { + "epoch": 0.11132568672050809, + "grad_norm": 32247.869140625, + "learning_rate": 9.9968186572111e-06, + "loss": 3351.5148, + "step": 55110 + }, + { + "epoch": 0.1113458873531919, + "grad_norm": 7290.83447265625, + "learning_rate": 9.996806194915258e-06, + "loss": 2085.5801, + "step": 55120 + }, + { + "epoch": 0.11136608798587572, + "grad_norm": 12009.90625, + "learning_rate": 9.996793708265586e-06, + "loss": 3350.8793, + "step": 55130 + }, + { + "epoch": 0.11138628861855954, + "grad_norm": 31148.962890625, + "learning_rate": 9.99678119726215e-06, + "loss": 4922.7984, + "step": 55140 + }, + { + "epoch": 0.11140648925124334, + "grad_norm": 3148.75, + "learning_rate": 9.996768661905008e-06, + "loss": 2482.6707, + "step": 55150 + }, + { + "epoch": 0.11142668988392716, + "grad_norm": 578.828125, + "learning_rate": 9.996756102194222e-06, + "loss": 3873.9359, + "step": 55160 + }, + { + "epoch": 0.11144689051661098, + "grad_norm": 10657.10546875, + "learning_rate": 9.996743518129852e-06, + "loss": 4023.5707, + "step": 55170 + }, + { + "epoch": 0.11146709114929479, + "grad_norm": 32522.109375, + "learning_rate": 9.99673090971196e-06, + "loss": 1775.7525, + "step": 55180 + }, + { + "epoch": 0.11148729178197861, + "grad_norm": 18232.703125, + "learning_rate": 9.996718276940608e-06, + "loss": 10137.6297, + "step": 55190 + }, + { + "epoch": 0.11150749241466243, + "grad_norm": 1496.226806640625, + "learning_rate": 9.996705619815857e-06, + "loss": 5381.1566, + "step": 55200 + }, + { + "epoch": 0.11152769304734625, + "grad_norm": 212.05062866210938, + "learning_rate": 9.996692938337768e-06, + "loss": 3934.7309, + "step": 55210 + }, + { + "epoch": 0.11154789368003006, + "grad_norm": 66939.265625, + "learning_rate": 9.996680232506404e-06, + "loss": 4231.3687, + "step": 55220 + }, + { + "epoch": 0.11156809431271388, + "grad_norm": 1138.9381103515625, + "learning_rate": 9.996667502321829e-06, + "loss": 7097.9242, + "step": 55230 + }, + { + "epoch": 0.1115882949453977, + "grad_norm": 3356.829833984375, + "learning_rate": 9.9966547477841e-06, + "loss": 4870.4555, + "step": 55240 + }, + { + "epoch": 0.1116084955780815, + "grad_norm": 5881.2724609375, + "learning_rate": 9.996641968893281e-06, + "loss": 3201.582, + "step": 55250 + }, + { + "epoch": 0.11162869621076532, + "grad_norm": 1343.0274658203125, + "learning_rate": 9.996629165649437e-06, + "loss": 1005.6657, + "step": 55260 + }, + { + "epoch": 0.11164889684344914, + "grad_norm": 84078.3359375, + "learning_rate": 9.996616338052629e-06, + "loss": 3233.8281, + "step": 55270 + }, + { + "epoch": 0.11166909747613295, + "grad_norm": 3869.47705078125, + "learning_rate": 9.996603486102918e-06, + "loss": 1662.399, + "step": 55280 + }, + { + "epoch": 0.11168929810881677, + "grad_norm": 2372.047607421875, + "learning_rate": 9.996590609800367e-06, + "loss": 6763.7469, + "step": 55290 + }, + { + "epoch": 0.11170949874150059, + "grad_norm": 4361.36669921875, + "learning_rate": 9.99657770914504e-06, + "loss": 3703.7129, + "step": 55300 + }, + { + "epoch": 0.1117296993741844, + "grad_norm": 1040.2069091796875, + "learning_rate": 9.996564784137e-06, + "loss": 2387.2191, + "step": 55310 + }, + { + "epoch": 0.11174990000686821, + "grad_norm": 10483.0634765625, + "learning_rate": 9.996551834776309e-06, + "loss": 4021.952, + "step": 55320 + }, + { + "epoch": 0.11177010063955203, + "grad_norm": 33695.640625, + "learning_rate": 9.996538861063029e-06, + "loss": 4866.0012, + "step": 55330 + }, + { + "epoch": 0.11179030127223584, + "grad_norm": 2039.1519775390625, + "learning_rate": 9.996525862997225e-06, + "loss": 2897.3834, + "step": 55340 + }, + { + "epoch": 0.11181050190491966, + "grad_norm": 600.5106811523438, + "learning_rate": 9.99651284057896e-06, + "loss": 4839.8352, + "step": 55350 + }, + { + "epoch": 0.11183070253760348, + "grad_norm": 10709.404296875, + "learning_rate": 9.9964997938083e-06, + "loss": 4373.7176, + "step": 55360 + }, + { + "epoch": 0.1118509031702873, + "grad_norm": 3591.47314453125, + "learning_rate": 9.996486722685302e-06, + "loss": 4945.4812, + "step": 55370 + }, + { + "epoch": 0.1118711038029711, + "grad_norm": 6474.33251953125, + "learning_rate": 9.996473627210035e-06, + "loss": 1424.6888, + "step": 55380 + }, + { + "epoch": 0.11189130443565493, + "grad_norm": 11621.1904296875, + "learning_rate": 9.996460507382563e-06, + "loss": 3223.0604, + "step": 55390 + }, + { + "epoch": 0.11191150506833875, + "grad_norm": 37557.95703125, + "learning_rate": 9.996447363202947e-06, + "loss": 3041.7432, + "step": 55400 + }, + { + "epoch": 0.11193170570102255, + "grad_norm": 13320.73046875, + "learning_rate": 9.996434194671254e-06, + "loss": 6070.8809, + "step": 55410 + }, + { + "epoch": 0.11195190633370637, + "grad_norm": 56659.73046875, + "learning_rate": 9.996421001787545e-06, + "loss": 2976.6852, + "step": 55420 + }, + { + "epoch": 0.11197210696639019, + "grad_norm": 1293.3804931640625, + "learning_rate": 9.996407784551888e-06, + "loss": 4950.184, + "step": 55430 + }, + { + "epoch": 0.111992307599074, + "grad_norm": 9915.994140625, + "learning_rate": 9.996394542964343e-06, + "loss": 1378.2326, + "step": 55440 + }, + { + "epoch": 0.11201250823175782, + "grad_norm": 5720.10986328125, + "learning_rate": 9.996381277024978e-06, + "loss": 5243.7883, + "step": 55450 + }, + { + "epoch": 0.11203270886444164, + "grad_norm": 106850.078125, + "learning_rate": 9.996367986733857e-06, + "loss": 7430.55, + "step": 55460 + }, + { + "epoch": 0.11205290949712544, + "grad_norm": 2613.973388671875, + "learning_rate": 9.996354672091044e-06, + "loss": 1867.902, + "step": 55470 + }, + { + "epoch": 0.11207311012980926, + "grad_norm": 4221.0048828125, + "learning_rate": 9.996341333096606e-06, + "loss": 2318.25, + "step": 55480 + }, + { + "epoch": 0.11209331076249308, + "grad_norm": 18015.41796875, + "learning_rate": 9.996327969750605e-06, + "loss": 5364.3844, + "step": 55490 + }, + { + "epoch": 0.11211351139517689, + "grad_norm": 6169.80859375, + "learning_rate": 9.996314582053106e-06, + "loss": 2830.618, + "step": 55500 + }, + { + "epoch": 0.11213371202786071, + "grad_norm": 18867.0390625, + "learning_rate": 9.996301170004179e-06, + "loss": 3592.1879, + "step": 55510 + }, + { + "epoch": 0.11215391266054453, + "grad_norm": 6904.8564453125, + "learning_rate": 9.996287733603883e-06, + "loss": 2048.3002, + "step": 55520 + }, + { + "epoch": 0.11217411329322835, + "grad_norm": 1623.1678466796875, + "learning_rate": 9.996274272852289e-06, + "loss": 4706.3566, + "step": 55530 + }, + { + "epoch": 0.11219431392591216, + "grad_norm": 8040.26025390625, + "learning_rate": 9.996260787749457e-06, + "loss": 1662.0385, + "step": 55540 + }, + { + "epoch": 0.11221451455859598, + "grad_norm": 84151.875, + "learning_rate": 9.99624727829546e-06, + "loss": 4825.0797, + "step": 55550 + }, + { + "epoch": 0.1122347151912798, + "grad_norm": 4682.611328125, + "learning_rate": 9.996233744490356e-06, + "loss": 6077.5418, + "step": 55560 + }, + { + "epoch": 0.1122549158239636, + "grad_norm": 8294.693359375, + "learning_rate": 9.996220186334217e-06, + "loss": 4274.2672, + "step": 55570 + }, + { + "epoch": 0.11227511645664742, + "grad_norm": 10015.14453125, + "learning_rate": 9.996206603827105e-06, + "loss": 3454.623, + "step": 55580 + }, + { + "epoch": 0.11229531708933124, + "grad_norm": 111351.3203125, + "learning_rate": 9.99619299696909e-06, + "loss": 10132.2375, + "step": 55590 + }, + { + "epoch": 0.11231551772201505, + "grad_norm": 12163.3984375, + "learning_rate": 9.996179365760235e-06, + "loss": 8123.5281, + "step": 55600 + }, + { + "epoch": 0.11233571835469887, + "grad_norm": 9821.9609375, + "learning_rate": 9.996165710200607e-06, + "loss": 4970.5098, + "step": 55610 + }, + { + "epoch": 0.11235591898738269, + "grad_norm": 31169.875, + "learning_rate": 9.996152030290276e-06, + "loss": 3064.4234, + "step": 55620 + }, + { + "epoch": 0.1123761196200665, + "grad_norm": 13832.1044921875, + "learning_rate": 9.996138326029303e-06, + "loss": 4929.9793, + "step": 55630 + }, + { + "epoch": 0.11239632025275031, + "grad_norm": 28933.5390625, + "learning_rate": 9.99612459741776e-06, + "loss": 1723.5926, + "step": 55640 + }, + { + "epoch": 0.11241652088543413, + "grad_norm": 87815.484375, + "learning_rate": 9.99611084445571e-06, + "loss": 5926.8449, + "step": 55650 + }, + { + "epoch": 0.11243672151811794, + "grad_norm": 2299.583740234375, + "learning_rate": 9.996097067143223e-06, + "loss": 2720.9836, + "step": 55660 + }, + { + "epoch": 0.11245692215080176, + "grad_norm": 14651.09375, + "learning_rate": 9.996083265480366e-06, + "loss": 2674.9475, + "step": 55670 + }, + { + "epoch": 0.11247712278348558, + "grad_norm": 20600.091796875, + "learning_rate": 9.996069439467203e-06, + "loss": 2987.3277, + "step": 55680 + }, + { + "epoch": 0.1124973234161694, + "grad_norm": 32464.23046875, + "learning_rate": 9.996055589103804e-06, + "loss": 7489.2094, + "step": 55690 + }, + { + "epoch": 0.1125175240488532, + "grad_norm": 11861.640625, + "learning_rate": 9.996041714390235e-06, + "loss": 6025.4625, + "step": 55700 + }, + { + "epoch": 0.11253772468153703, + "grad_norm": 2306.12451171875, + "learning_rate": 9.996027815326565e-06, + "loss": 4796.5586, + "step": 55710 + }, + { + "epoch": 0.11255792531422085, + "grad_norm": 35547.0703125, + "learning_rate": 9.996013891912862e-06, + "loss": 7910.1898, + "step": 55720 + }, + { + "epoch": 0.11257812594690465, + "grad_norm": 459.864990234375, + "learning_rate": 9.995999944149192e-06, + "loss": 4477.1617, + "step": 55730 + }, + { + "epoch": 0.11259832657958847, + "grad_norm": 5588.7060546875, + "learning_rate": 9.995985972035626e-06, + "loss": 2910.2863, + "step": 55740 + }, + { + "epoch": 0.11261852721227229, + "grad_norm": 14653.892578125, + "learning_rate": 9.995971975572231e-06, + "loss": 2436.2279, + "step": 55750 + }, + { + "epoch": 0.1126387278449561, + "grad_norm": 56629.59375, + "learning_rate": 9.995957954759073e-06, + "loss": 4398.2699, + "step": 55760 + }, + { + "epoch": 0.11265892847763992, + "grad_norm": 11315.1591796875, + "learning_rate": 9.995943909596222e-06, + "loss": 8884.6516, + "step": 55770 + }, + { + "epoch": 0.11267912911032374, + "grad_norm": 39686.421875, + "learning_rate": 9.995929840083746e-06, + "loss": 3765.7238, + "step": 55780 + }, + { + "epoch": 0.11269932974300755, + "grad_norm": 59372.8984375, + "learning_rate": 9.995915746221715e-06, + "loss": 7462.5398, + "step": 55790 + }, + { + "epoch": 0.11271953037569137, + "grad_norm": 7950.59375, + "learning_rate": 9.995901628010196e-06, + "loss": 2677.808, + "step": 55800 + }, + { + "epoch": 0.11273973100837519, + "grad_norm": 19104.666015625, + "learning_rate": 9.995887485449257e-06, + "loss": 2650.4617, + "step": 55810 + }, + { + "epoch": 0.11275993164105899, + "grad_norm": 4158.1640625, + "learning_rate": 9.99587331853897e-06, + "loss": 1883.3686, + "step": 55820 + }, + { + "epoch": 0.11278013227374281, + "grad_norm": 3591.83154296875, + "learning_rate": 9.995859127279402e-06, + "loss": 5922.5043, + "step": 55830 + }, + { + "epoch": 0.11280033290642663, + "grad_norm": 14669.65234375, + "learning_rate": 9.995844911670623e-06, + "loss": 4130.4945, + "step": 55840 + }, + { + "epoch": 0.11282053353911045, + "grad_norm": 4859.18505859375, + "learning_rate": 9.995830671712701e-06, + "loss": 7052.6414, + "step": 55850 + }, + { + "epoch": 0.11284073417179426, + "grad_norm": 1143.2630615234375, + "learning_rate": 9.995816407405708e-06, + "loss": 2457.9475, + "step": 55860 + }, + { + "epoch": 0.11286093480447808, + "grad_norm": 23438.201171875, + "learning_rate": 9.995802118749708e-06, + "loss": 2551.325, + "step": 55870 + }, + { + "epoch": 0.1128811354371619, + "grad_norm": 90451.546875, + "learning_rate": 9.995787805744778e-06, + "loss": 5333.1953, + "step": 55880 + }, + { + "epoch": 0.1129013360698457, + "grad_norm": 5145.1416015625, + "learning_rate": 9.995773468390983e-06, + "loss": 4980.3859, + "step": 55890 + }, + { + "epoch": 0.11292153670252952, + "grad_norm": 17858.203125, + "learning_rate": 9.995759106688394e-06, + "loss": 5506.5992, + "step": 55900 + }, + { + "epoch": 0.11294173733521334, + "grad_norm": 4581.48876953125, + "learning_rate": 9.99574472063708e-06, + "loss": 2910.9873, + "step": 55910 + }, + { + "epoch": 0.11296193796789715, + "grad_norm": 28500.595703125, + "learning_rate": 9.995730310237113e-06, + "loss": 3569.1195, + "step": 55920 + }, + { + "epoch": 0.11298213860058097, + "grad_norm": 58488.60546875, + "learning_rate": 9.995715875488563e-06, + "loss": 7236.4227, + "step": 55930 + }, + { + "epoch": 0.11300233923326479, + "grad_norm": 23503.498046875, + "learning_rate": 9.9957014163915e-06, + "loss": 1311.6824, + "step": 55940 + }, + { + "epoch": 0.1130225398659486, + "grad_norm": 38849.94140625, + "learning_rate": 9.995686932945993e-06, + "loss": 2677.5717, + "step": 55950 + }, + { + "epoch": 0.11304274049863242, + "grad_norm": 129712.3671875, + "learning_rate": 9.995672425152115e-06, + "loss": 3911.148, + "step": 55960 + }, + { + "epoch": 0.11306294113131624, + "grad_norm": 1446.627685546875, + "learning_rate": 9.995657893009933e-06, + "loss": 2847.4697, + "step": 55970 + }, + { + "epoch": 0.11308314176400004, + "grad_norm": 20081.189453125, + "learning_rate": 9.995643336519523e-06, + "loss": 7611.7367, + "step": 55980 + }, + { + "epoch": 0.11310334239668386, + "grad_norm": 387.56463623046875, + "learning_rate": 9.995628755680952e-06, + "loss": 2644.4437, + "step": 55990 + }, + { + "epoch": 0.11312354302936768, + "grad_norm": 146765.1875, + "learning_rate": 9.995614150494293e-06, + "loss": 6975.3687, + "step": 56000 + }, + { + "epoch": 0.1131437436620515, + "grad_norm": 1601.614501953125, + "learning_rate": 9.995599520959615e-06, + "loss": 4299.0715, + "step": 56010 + }, + { + "epoch": 0.11316394429473531, + "grad_norm": 478.5643005371094, + "learning_rate": 9.995584867076994e-06, + "loss": 4015.5262, + "step": 56020 + }, + { + "epoch": 0.11318414492741913, + "grad_norm": 5723.47119140625, + "learning_rate": 9.995570188846495e-06, + "loss": 2084.4684, + "step": 56030 + }, + { + "epoch": 0.11320434556010295, + "grad_norm": 15092.4111328125, + "learning_rate": 9.995555486268193e-06, + "loss": 2764.2402, + "step": 56040 + }, + { + "epoch": 0.11322454619278675, + "grad_norm": 6172.3916015625, + "learning_rate": 9.995540759342161e-06, + "loss": 2885.183, + "step": 56050 + }, + { + "epoch": 0.11324474682547057, + "grad_norm": 1072.4716796875, + "learning_rate": 9.995526008068469e-06, + "loss": 4223.4129, + "step": 56060 + }, + { + "epoch": 0.1132649474581544, + "grad_norm": 44044.23046875, + "learning_rate": 9.995511232447189e-06, + "loss": 4828.5207, + "step": 56070 + }, + { + "epoch": 0.1132851480908382, + "grad_norm": 16009.3388671875, + "learning_rate": 9.995496432478392e-06, + "loss": 1296.4955, + "step": 56080 + }, + { + "epoch": 0.11330534872352202, + "grad_norm": 34169.00390625, + "learning_rate": 9.995481608162154e-06, + "loss": 4775.0371, + "step": 56090 + }, + { + "epoch": 0.11332554935620584, + "grad_norm": 271572.5625, + "learning_rate": 9.995466759498543e-06, + "loss": 4360.5105, + "step": 56100 + }, + { + "epoch": 0.11334574998888965, + "grad_norm": 6269.5205078125, + "learning_rate": 9.995451886487632e-06, + "loss": 2966.9197, + "step": 56110 + }, + { + "epoch": 0.11336595062157347, + "grad_norm": 7950.91796875, + "learning_rate": 9.995436989129495e-06, + "loss": 1577.7459, + "step": 56120 + }, + { + "epoch": 0.11338615125425729, + "grad_norm": 4436.078125, + "learning_rate": 9.995422067424203e-06, + "loss": 2663.9029, + "step": 56130 + }, + { + "epoch": 0.11340635188694109, + "grad_norm": 55322.83203125, + "learning_rate": 9.995407121371832e-06, + "loss": 10136.9875, + "step": 56140 + }, + { + "epoch": 0.11342655251962491, + "grad_norm": 16876.490234375, + "learning_rate": 9.995392150972451e-06, + "loss": 2071.2082, + "step": 56150 + }, + { + "epoch": 0.11344675315230873, + "grad_norm": 197.18588256835938, + "learning_rate": 9.995377156226133e-06, + "loss": 6816.5586, + "step": 56160 + }, + { + "epoch": 0.11346695378499255, + "grad_norm": 6442.81103515625, + "learning_rate": 9.995362137132956e-06, + "loss": 1930.8771, + "step": 56170 + }, + { + "epoch": 0.11348715441767636, + "grad_norm": 13070.658203125, + "learning_rate": 9.995347093692987e-06, + "loss": 9212.2008, + "step": 56180 + }, + { + "epoch": 0.11350735505036018, + "grad_norm": 31015.4921875, + "learning_rate": 9.995332025906304e-06, + "loss": 3642.0535, + "step": 56190 + }, + { + "epoch": 0.113527555683044, + "grad_norm": 56342.40625, + "learning_rate": 9.995316933772978e-06, + "loss": 6884.8664, + "step": 56200 + }, + { + "epoch": 0.1135477563157278, + "grad_norm": 7747.71142578125, + "learning_rate": 9.995301817293084e-06, + "loss": 5128.2336, + "step": 56210 + }, + { + "epoch": 0.11356795694841162, + "grad_norm": 12895.626953125, + "learning_rate": 9.995286676466694e-06, + "loss": 2004.1988, + "step": 56220 + }, + { + "epoch": 0.11358815758109544, + "grad_norm": 2409.12109375, + "learning_rate": 9.995271511293881e-06, + "loss": 6105.9441, + "step": 56230 + }, + { + "epoch": 0.11360835821377925, + "grad_norm": 19998.169921875, + "learning_rate": 9.995256321774722e-06, + "loss": 2719.749, + "step": 56240 + }, + { + "epoch": 0.11362855884646307, + "grad_norm": 27439.43359375, + "learning_rate": 9.99524110790929e-06, + "loss": 4002.7285, + "step": 56250 + }, + { + "epoch": 0.11364875947914689, + "grad_norm": 1473.247802734375, + "learning_rate": 9.995225869697657e-06, + "loss": 4355.3234, + "step": 56260 + }, + { + "epoch": 0.1136689601118307, + "grad_norm": 25715.322265625, + "learning_rate": 9.9952106071399e-06, + "loss": 6041.5344, + "step": 56270 + }, + { + "epoch": 0.11368916074451452, + "grad_norm": 4710.1513671875, + "learning_rate": 9.995195320236093e-06, + "loss": 2383.1303, + "step": 56280 + }, + { + "epoch": 0.11370936137719834, + "grad_norm": 4398.97265625, + "learning_rate": 9.995180008986309e-06, + "loss": 3961.8957, + "step": 56290 + }, + { + "epoch": 0.11372956200988214, + "grad_norm": 931.576904296875, + "learning_rate": 9.995164673390624e-06, + "loss": 3201.1633, + "step": 56300 + }, + { + "epoch": 0.11374976264256596, + "grad_norm": 19302.30859375, + "learning_rate": 9.995149313449114e-06, + "loss": 4173.566, + "step": 56310 + }, + { + "epoch": 0.11376996327524978, + "grad_norm": 35615.9375, + "learning_rate": 9.995133929161848e-06, + "loss": 4818.6008, + "step": 56320 + }, + { + "epoch": 0.1137901639079336, + "grad_norm": 722.1571044921875, + "learning_rate": 9.995118520528908e-06, + "loss": 1514.9068, + "step": 56330 + }, + { + "epoch": 0.11381036454061741, + "grad_norm": 6057.6552734375, + "learning_rate": 9.995103087550366e-06, + "loss": 5139.6371, + "step": 56340 + }, + { + "epoch": 0.11383056517330123, + "grad_norm": 20798.6875, + "learning_rate": 9.995087630226295e-06, + "loss": 2745.3086, + "step": 56350 + }, + { + "epoch": 0.11385076580598505, + "grad_norm": 149410.65625, + "learning_rate": 9.995072148556776e-06, + "loss": 6537.5371, + "step": 56360 + }, + { + "epoch": 0.11387096643866886, + "grad_norm": 3614.840576171875, + "learning_rate": 9.995056642541879e-06, + "loss": 3018.8619, + "step": 56370 + }, + { + "epoch": 0.11389116707135268, + "grad_norm": 38048.5703125, + "learning_rate": 9.995041112181683e-06, + "loss": 2812.8697, + "step": 56380 + }, + { + "epoch": 0.1139113677040365, + "grad_norm": 5934.4140625, + "learning_rate": 9.99502555747626e-06, + "loss": 2529.7457, + "step": 56390 + }, + { + "epoch": 0.1139315683367203, + "grad_norm": 8015.69140625, + "learning_rate": 9.995009978425692e-06, + "loss": 3087.6773, + "step": 56400 + }, + { + "epoch": 0.11395176896940412, + "grad_norm": 3203.959228515625, + "learning_rate": 9.994994375030048e-06, + "loss": 1996.1609, + "step": 56410 + }, + { + "epoch": 0.11397196960208794, + "grad_norm": 952.6238403320312, + "learning_rate": 9.994978747289408e-06, + "loss": 4198.4305, + "step": 56420 + }, + { + "epoch": 0.11399217023477175, + "grad_norm": 618.7186889648438, + "learning_rate": 9.994963095203849e-06, + "loss": 2371.2389, + "step": 56430 + }, + { + "epoch": 0.11401237086745557, + "grad_norm": 19516.3046875, + "learning_rate": 9.994947418773445e-06, + "loss": 2718.134, + "step": 56440 + }, + { + "epoch": 0.11403257150013939, + "grad_norm": 760.2864990234375, + "learning_rate": 9.994931717998272e-06, + "loss": 4020.9895, + "step": 56450 + }, + { + "epoch": 0.1140527721328232, + "grad_norm": 57727.1484375, + "learning_rate": 9.99491599287841e-06, + "loss": 2206.4775, + "step": 56460 + }, + { + "epoch": 0.11407297276550701, + "grad_norm": 46438.16796875, + "learning_rate": 9.99490024341393e-06, + "loss": 4100.7859, + "step": 56470 + }, + { + "epoch": 0.11409317339819083, + "grad_norm": 54374.953125, + "learning_rate": 9.994884469604913e-06, + "loss": 4522.0141, + "step": 56480 + }, + { + "epoch": 0.11411337403087465, + "grad_norm": 9316.1708984375, + "learning_rate": 9.994868671451436e-06, + "loss": 1548.6319, + "step": 56490 + }, + { + "epoch": 0.11413357466355846, + "grad_norm": 47054.41796875, + "learning_rate": 9.994852848953574e-06, + "loss": 11595.2289, + "step": 56500 + }, + { + "epoch": 0.11415377529624228, + "grad_norm": 9749.3466796875, + "learning_rate": 9.994837002111407e-06, + "loss": 3259.9146, + "step": 56510 + }, + { + "epoch": 0.1141739759289261, + "grad_norm": 24859.787109375, + "learning_rate": 9.994821130925007e-06, + "loss": 1503.4775, + "step": 56520 + }, + { + "epoch": 0.1141941765616099, + "grad_norm": 13614.6962890625, + "learning_rate": 9.994805235394456e-06, + "loss": 4384.8262, + "step": 56530 + }, + { + "epoch": 0.11421437719429373, + "grad_norm": 6087.9130859375, + "learning_rate": 9.99478931551983e-06, + "loss": 6909.7812, + "step": 56540 + }, + { + "epoch": 0.11423457782697755, + "grad_norm": 25878.671875, + "learning_rate": 9.994773371301208e-06, + "loss": 7453.2164, + "step": 56550 + }, + { + "epoch": 0.11425477845966135, + "grad_norm": 23873.88671875, + "learning_rate": 9.994757402738666e-06, + "loss": 7934.2445, + "step": 56560 + }, + { + "epoch": 0.11427497909234517, + "grad_norm": 6792.705078125, + "learning_rate": 9.99474140983228e-06, + "loss": 3048.5936, + "step": 56570 + }, + { + "epoch": 0.11429517972502899, + "grad_norm": 16221.408203125, + "learning_rate": 9.994725392582132e-06, + "loss": 2257.227, + "step": 56580 + }, + { + "epoch": 0.1143153803577128, + "grad_norm": 19183.12109375, + "learning_rate": 9.994709350988299e-06, + "loss": 1731.9094, + "step": 56590 + }, + { + "epoch": 0.11433558099039662, + "grad_norm": 2740.642333984375, + "learning_rate": 9.994693285050858e-06, + "loss": 3384.2223, + "step": 56600 + }, + { + "epoch": 0.11435578162308044, + "grad_norm": 12590.6240234375, + "learning_rate": 9.994677194769886e-06, + "loss": 2775.8473, + "step": 56610 + }, + { + "epoch": 0.11437598225576424, + "grad_norm": 307.564208984375, + "learning_rate": 9.994661080145464e-06, + "loss": 4254.5246, + "step": 56620 + }, + { + "epoch": 0.11439618288844806, + "grad_norm": 14047.3857421875, + "learning_rate": 9.99464494117767e-06, + "loss": 7946.1078, + "step": 56630 + }, + { + "epoch": 0.11441638352113188, + "grad_norm": 4778.05419921875, + "learning_rate": 9.994628777866582e-06, + "loss": 10364.8969, + "step": 56640 + }, + { + "epoch": 0.1144365841538157, + "grad_norm": 997.20166015625, + "learning_rate": 9.99461259021228e-06, + "loss": 4048.8375, + "step": 56650 + }, + { + "epoch": 0.11445678478649951, + "grad_norm": 3167.2705078125, + "learning_rate": 9.99459637821484e-06, + "loss": 4264.7629, + "step": 56660 + }, + { + "epoch": 0.11447698541918333, + "grad_norm": 4714.63720703125, + "learning_rate": 9.994580141874345e-06, + "loss": 3672.4672, + "step": 56670 + }, + { + "epoch": 0.11449718605186715, + "grad_norm": 2183.723876953125, + "learning_rate": 9.994563881190874e-06, + "loss": 8189.8828, + "step": 56680 + }, + { + "epoch": 0.11451738668455096, + "grad_norm": 4448.54296875, + "learning_rate": 9.9945475961645e-06, + "loss": 7092.0414, + "step": 56690 + }, + { + "epoch": 0.11453758731723478, + "grad_norm": 54745.390625, + "learning_rate": 9.994531286795309e-06, + "loss": 3560.2809, + "step": 56700 + }, + { + "epoch": 0.1145577879499186, + "grad_norm": 5014.806640625, + "learning_rate": 9.994514953083379e-06, + "loss": 1683.5004, + "step": 56710 + }, + { + "epoch": 0.1145779885826024, + "grad_norm": 10433.6943359375, + "learning_rate": 9.994498595028787e-06, + "loss": 1984.8373, + "step": 56720 + }, + { + "epoch": 0.11459818921528622, + "grad_norm": 5852.88916015625, + "learning_rate": 9.994482212631616e-06, + "loss": 2635.8293, + "step": 56730 + }, + { + "epoch": 0.11461838984797004, + "grad_norm": 18855.361328125, + "learning_rate": 9.994465805891944e-06, + "loss": 3761.5961, + "step": 56740 + }, + { + "epoch": 0.11463859048065385, + "grad_norm": 9510.6572265625, + "learning_rate": 9.994449374809851e-06, + "loss": 3851.432, + "step": 56750 + }, + { + "epoch": 0.11465879111333767, + "grad_norm": 22822.611328125, + "learning_rate": 9.994432919385417e-06, + "loss": 3992.7324, + "step": 56760 + }, + { + "epoch": 0.11467899174602149, + "grad_norm": 14441.53125, + "learning_rate": 9.994416439618723e-06, + "loss": 3731.0066, + "step": 56770 + }, + { + "epoch": 0.1146991923787053, + "grad_norm": 20491.748046875, + "learning_rate": 9.994399935509851e-06, + "loss": 4391.598, + "step": 56780 + }, + { + "epoch": 0.11471939301138911, + "grad_norm": 4851.14501953125, + "learning_rate": 9.994383407058878e-06, + "loss": 4824.1953, + "step": 56790 + }, + { + "epoch": 0.11473959364407293, + "grad_norm": 19836.791015625, + "learning_rate": 9.994366854265886e-06, + "loss": 3637.8965, + "step": 56800 + }, + { + "epoch": 0.11475979427675675, + "grad_norm": 20067.173828125, + "learning_rate": 9.994350277130956e-06, + "loss": 2588.4803, + "step": 56810 + }, + { + "epoch": 0.11477999490944056, + "grad_norm": 2050.32177734375, + "learning_rate": 9.994333675654169e-06, + "loss": 3422.1, + "step": 56820 + }, + { + "epoch": 0.11480019554212438, + "grad_norm": 178711.8125, + "learning_rate": 9.994317049835604e-06, + "loss": 5531.2121, + "step": 56830 + }, + { + "epoch": 0.1148203961748082, + "grad_norm": 73545.6875, + "learning_rate": 9.994300399675342e-06, + "loss": 2988.102, + "step": 56840 + }, + { + "epoch": 0.114840596807492, + "grad_norm": 2826.090087890625, + "learning_rate": 9.994283725173468e-06, + "loss": 1482.9663, + "step": 56850 + }, + { + "epoch": 0.11486079744017583, + "grad_norm": 48860.171875, + "learning_rate": 9.994267026330063e-06, + "loss": 7322.0875, + "step": 56860 + }, + { + "epoch": 0.11488099807285965, + "grad_norm": 46136.57421875, + "learning_rate": 9.994250303145203e-06, + "loss": 3415.9414, + "step": 56870 + }, + { + "epoch": 0.11490119870554345, + "grad_norm": 2635.611083984375, + "learning_rate": 9.994233555618973e-06, + "loss": 3638.75, + "step": 56880 + }, + { + "epoch": 0.11492139933822727, + "grad_norm": 26441.833984375, + "learning_rate": 9.994216783751457e-06, + "loss": 2661.0971, + "step": 56890 + }, + { + "epoch": 0.11494159997091109, + "grad_norm": 36057.10546875, + "learning_rate": 9.99419998754273e-06, + "loss": 3177.8297, + "step": 56900 + }, + { + "epoch": 0.1149618006035949, + "grad_norm": 14662.3251953125, + "learning_rate": 9.99418316699288e-06, + "loss": 3370.7172, + "step": 56910 + }, + { + "epoch": 0.11498200123627872, + "grad_norm": 8772.8173828125, + "learning_rate": 9.994166322101988e-06, + "loss": 2591.8479, + "step": 56920 + }, + { + "epoch": 0.11500220186896254, + "grad_norm": 2127.722900390625, + "learning_rate": 9.994149452870133e-06, + "loss": 1499.4343, + "step": 56930 + }, + { + "epoch": 0.11502240250164635, + "grad_norm": 10466.85546875, + "learning_rate": 9.9941325592974e-06, + "loss": 3858.448, + "step": 56940 + }, + { + "epoch": 0.11504260313433017, + "grad_norm": 131607.75, + "learning_rate": 9.994115641383872e-06, + "loss": 4464.6754, + "step": 56950 + }, + { + "epoch": 0.11506280376701399, + "grad_norm": 13880.32421875, + "learning_rate": 9.994098699129628e-06, + "loss": 4870.8473, + "step": 56960 + }, + { + "epoch": 0.1150830043996978, + "grad_norm": 1023.6370239257812, + "learning_rate": 9.994081732534755e-06, + "loss": 6281.5066, + "step": 56970 + }, + { + "epoch": 0.11510320503238161, + "grad_norm": 252.55484008789062, + "learning_rate": 9.994064741599332e-06, + "loss": 6582.5813, + "step": 56980 + }, + { + "epoch": 0.11512340566506543, + "grad_norm": 2735.510498046875, + "learning_rate": 9.994047726323442e-06, + "loss": 7734.1922, + "step": 56990 + }, + { + "epoch": 0.11514360629774925, + "grad_norm": 294.0409851074219, + "learning_rate": 9.994030686707171e-06, + "loss": 2870.5953, + "step": 57000 + }, + { + "epoch": 0.11516380693043306, + "grad_norm": 9159.5400390625, + "learning_rate": 9.9940136227506e-06, + "loss": 3474.673, + "step": 57010 + }, + { + "epoch": 0.11518400756311688, + "grad_norm": 49131.1484375, + "learning_rate": 9.993996534453812e-06, + "loss": 2209.1137, + "step": 57020 + }, + { + "epoch": 0.1152042081958007, + "grad_norm": 23064.18359375, + "learning_rate": 9.993979421816889e-06, + "loss": 4635.007, + "step": 57030 + }, + { + "epoch": 0.1152244088284845, + "grad_norm": 431.2340087890625, + "learning_rate": 9.993962284839918e-06, + "loss": 1253.0291, + "step": 57040 + }, + { + "epoch": 0.11524460946116832, + "grad_norm": 19239.650390625, + "learning_rate": 9.99394512352298e-06, + "loss": 2484.9879, + "step": 57050 + }, + { + "epoch": 0.11526481009385214, + "grad_norm": 1259.4530029296875, + "learning_rate": 9.993927937866158e-06, + "loss": 1111.9841, + "step": 57060 + }, + { + "epoch": 0.11528501072653595, + "grad_norm": 57804.5625, + "learning_rate": 9.993910727869538e-06, + "loss": 2092.7578, + "step": 57070 + }, + { + "epoch": 0.11530521135921977, + "grad_norm": 13544.4541015625, + "learning_rate": 9.993893493533203e-06, + "loss": 2290.6881, + "step": 57080 + }, + { + "epoch": 0.11532541199190359, + "grad_norm": 59.8425407409668, + "learning_rate": 9.993876234857236e-06, + "loss": 3560.8906, + "step": 57090 + }, + { + "epoch": 0.1153456126245874, + "grad_norm": 39980.50390625, + "learning_rate": 9.993858951841724e-06, + "loss": 3618.4094, + "step": 57100 + }, + { + "epoch": 0.11536581325727122, + "grad_norm": 1899.1263427734375, + "learning_rate": 9.993841644486747e-06, + "loss": 3897.9348, + "step": 57110 + }, + { + "epoch": 0.11538601388995504, + "grad_norm": 1496.321044921875, + "learning_rate": 9.993824312792393e-06, + "loss": 2377.3242, + "step": 57120 + }, + { + "epoch": 0.11540621452263886, + "grad_norm": 91975.59375, + "learning_rate": 9.993806956758743e-06, + "loss": 7115.7563, + "step": 57130 + }, + { + "epoch": 0.11542641515532266, + "grad_norm": 23713.482421875, + "learning_rate": 9.993789576385884e-06, + "loss": 6079.1734, + "step": 57140 + }, + { + "epoch": 0.11544661578800648, + "grad_norm": 27984.291015625, + "learning_rate": 9.993772171673901e-06, + "loss": 4552.7223, + "step": 57150 + }, + { + "epoch": 0.1154668164206903, + "grad_norm": 10167.3427734375, + "learning_rate": 9.993754742622879e-06, + "loss": 4599.4719, + "step": 57160 + }, + { + "epoch": 0.11548701705337411, + "grad_norm": 21961.67578125, + "learning_rate": 9.993737289232902e-06, + "loss": 4609.0504, + "step": 57170 + }, + { + "epoch": 0.11550721768605793, + "grad_norm": 22196.173828125, + "learning_rate": 9.993719811504053e-06, + "loss": 6814.9375, + "step": 57180 + }, + { + "epoch": 0.11552741831874175, + "grad_norm": 36653.88671875, + "learning_rate": 9.993702309436419e-06, + "loss": 3964.141, + "step": 57190 + }, + { + "epoch": 0.11554761895142555, + "grad_norm": 33571.734375, + "learning_rate": 9.99368478303009e-06, + "loss": 6659.4828, + "step": 57200 + }, + { + "epoch": 0.11556781958410937, + "grad_norm": 27457.224609375, + "learning_rate": 9.993667232285142e-06, + "loss": 4363.9543, + "step": 57210 + }, + { + "epoch": 0.1155880202167932, + "grad_norm": 3126.922119140625, + "learning_rate": 9.993649657201669e-06, + "loss": 4021.593, + "step": 57220 + }, + { + "epoch": 0.115608220849477, + "grad_norm": 27805.359375, + "learning_rate": 9.993632057779752e-06, + "loss": 2535.5572, + "step": 57230 + }, + { + "epoch": 0.11562842148216082, + "grad_norm": 9878.9765625, + "learning_rate": 9.993614434019476e-06, + "loss": 3890.4012, + "step": 57240 + }, + { + "epoch": 0.11564862211484464, + "grad_norm": 783.625732421875, + "learning_rate": 9.993596785920932e-06, + "loss": 3875.075, + "step": 57250 + }, + { + "epoch": 0.11566882274752845, + "grad_norm": 6498.63134765625, + "learning_rate": 9.993579113484202e-06, + "loss": 1293.086, + "step": 57260 + }, + { + "epoch": 0.11568902338021227, + "grad_norm": 23281.66796875, + "learning_rate": 9.993561416709372e-06, + "loss": 2988.342, + "step": 57270 + }, + { + "epoch": 0.11570922401289609, + "grad_norm": 33003.4921875, + "learning_rate": 9.99354369559653e-06, + "loss": 2003.782, + "step": 57280 + }, + { + "epoch": 0.1157294246455799, + "grad_norm": 5881.7978515625, + "learning_rate": 9.993525950145761e-06, + "loss": 7574.6906, + "step": 57290 + }, + { + "epoch": 0.11574962527826371, + "grad_norm": 765.3621826171875, + "learning_rate": 9.993508180357154e-06, + "loss": 4682.3141, + "step": 57300 + }, + { + "epoch": 0.11576982591094753, + "grad_norm": 9165.806640625, + "learning_rate": 9.993490386230793e-06, + "loss": 1559.6766, + "step": 57310 + }, + { + "epoch": 0.11579002654363135, + "grad_norm": 929.4835815429688, + "learning_rate": 9.993472567766764e-06, + "loss": 3120.8061, + "step": 57320 + }, + { + "epoch": 0.11581022717631516, + "grad_norm": 2477.9384765625, + "learning_rate": 9.993454724965157e-06, + "loss": 5750.4879, + "step": 57330 + }, + { + "epoch": 0.11583042780899898, + "grad_norm": 24471.09375, + "learning_rate": 9.993436857826058e-06, + "loss": 2731.0846, + "step": 57340 + }, + { + "epoch": 0.1158506284416828, + "grad_norm": 97174.875, + "learning_rate": 9.993418966349551e-06, + "loss": 7373.0523, + "step": 57350 + }, + { + "epoch": 0.1158708290743666, + "grad_norm": 9358.1865234375, + "learning_rate": 9.993401050535726e-06, + "loss": 3996.2465, + "step": 57360 + }, + { + "epoch": 0.11589102970705042, + "grad_norm": 4724.99755859375, + "learning_rate": 9.993383110384673e-06, + "loss": 4811.9586, + "step": 57370 + }, + { + "epoch": 0.11591123033973424, + "grad_norm": 118797.3046875, + "learning_rate": 9.993365145896473e-06, + "loss": 7739.8516, + "step": 57380 + }, + { + "epoch": 0.11593143097241805, + "grad_norm": 5352.37109375, + "learning_rate": 9.993347157071218e-06, + "loss": 3297.368, + "step": 57390 + }, + { + "epoch": 0.11595163160510187, + "grad_norm": 30604.95703125, + "learning_rate": 9.993329143908994e-06, + "loss": 2526.1557, + "step": 57400 + }, + { + "epoch": 0.11597183223778569, + "grad_norm": 29991.755859375, + "learning_rate": 9.993311106409891e-06, + "loss": 7150.0914, + "step": 57410 + }, + { + "epoch": 0.1159920328704695, + "grad_norm": 31308.498046875, + "learning_rate": 9.993293044573995e-06, + "loss": 2835.6352, + "step": 57420 + }, + { + "epoch": 0.11601223350315332, + "grad_norm": 2922.490966796875, + "learning_rate": 9.993274958401392e-06, + "loss": 2265.1467, + "step": 57430 + }, + { + "epoch": 0.11603243413583714, + "grad_norm": 102890.0390625, + "learning_rate": 9.993256847892175e-06, + "loss": 8318.8563, + "step": 57440 + }, + { + "epoch": 0.11605263476852096, + "grad_norm": 1369.6456298828125, + "learning_rate": 9.993238713046428e-06, + "loss": 1538.457, + "step": 57450 + }, + { + "epoch": 0.11607283540120476, + "grad_norm": 79537.1875, + "learning_rate": 9.993220553864242e-06, + "loss": 4241.1184, + "step": 57460 + }, + { + "epoch": 0.11609303603388858, + "grad_norm": 10666.271484375, + "learning_rate": 9.993202370345705e-06, + "loss": 2490.5529, + "step": 57470 + }, + { + "epoch": 0.1161132366665724, + "grad_norm": 33038.8203125, + "learning_rate": 9.993184162490903e-06, + "loss": 4592.0504, + "step": 57480 + }, + { + "epoch": 0.11613343729925621, + "grad_norm": 19217.99609375, + "learning_rate": 9.99316593029993e-06, + "loss": 2335.8848, + "step": 57490 + }, + { + "epoch": 0.11615363793194003, + "grad_norm": 6432.35791015625, + "learning_rate": 9.993147673772869e-06, + "loss": 2106.9449, + "step": 57500 + }, + { + "epoch": 0.11617383856462385, + "grad_norm": 141.9556121826172, + "learning_rate": 9.993129392909814e-06, + "loss": 1583.991, + "step": 57510 + }, + { + "epoch": 0.11619403919730766, + "grad_norm": 25288.8046875, + "learning_rate": 9.993111087710852e-06, + "loss": 4326.8375, + "step": 57520 + }, + { + "epoch": 0.11621423982999148, + "grad_norm": 4551.7421875, + "learning_rate": 9.993092758176071e-06, + "loss": 3726.3141, + "step": 57530 + }, + { + "epoch": 0.1162344404626753, + "grad_norm": 2817.215087890625, + "learning_rate": 9.993074404305563e-06, + "loss": 6152.7047, + "step": 57540 + }, + { + "epoch": 0.1162546410953591, + "grad_norm": 6884.091796875, + "learning_rate": 9.993056026099415e-06, + "loss": 1646.1375, + "step": 57550 + }, + { + "epoch": 0.11627484172804292, + "grad_norm": 47422.1328125, + "learning_rate": 9.993037623557716e-06, + "loss": 4012.0738, + "step": 57560 + }, + { + "epoch": 0.11629504236072674, + "grad_norm": 8795.6650390625, + "learning_rate": 9.993019196680558e-06, + "loss": 3747.1727, + "step": 57570 + }, + { + "epoch": 0.11631524299341055, + "grad_norm": 12900.0849609375, + "learning_rate": 9.993000745468031e-06, + "loss": 5976.2488, + "step": 57580 + }, + { + "epoch": 0.11633544362609437, + "grad_norm": 5787.0830078125, + "learning_rate": 9.992982269920223e-06, + "loss": 2940.7656, + "step": 57590 + }, + { + "epoch": 0.11635564425877819, + "grad_norm": 4691.22509765625, + "learning_rate": 9.992963770037227e-06, + "loss": 2616.7766, + "step": 57600 + }, + { + "epoch": 0.11637584489146201, + "grad_norm": 18646.767578125, + "learning_rate": 9.99294524581913e-06, + "loss": 5541.8828, + "step": 57610 + }, + { + "epoch": 0.11639604552414581, + "grad_norm": 85002.1015625, + "learning_rate": 9.992926697266023e-06, + "loss": 3765.3516, + "step": 57620 + }, + { + "epoch": 0.11641624615682963, + "grad_norm": 348.74462890625, + "learning_rate": 9.992908124377997e-06, + "loss": 3352.4641, + "step": 57630 + }, + { + "epoch": 0.11643644678951345, + "grad_norm": 11173.634765625, + "learning_rate": 9.992889527155143e-06, + "loss": 1649.8777, + "step": 57640 + }, + { + "epoch": 0.11645664742219726, + "grad_norm": 3181.045166015625, + "learning_rate": 9.992870905597549e-06, + "loss": 7141.132, + "step": 57650 + }, + { + "epoch": 0.11647684805488108, + "grad_norm": 4808.19189453125, + "learning_rate": 9.99285225970531e-06, + "loss": 5129.5793, + "step": 57660 + }, + { + "epoch": 0.1164970486875649, + "grad_norm": 28566.060546875, + "learning_rate": 9.992833589478513e-06, + "loss": 3680.0617, + "step": 57670 + }, + { + "epoch": 0.1165172493202487, + "grad_norm": 3048.17431640625, + "learning_rate": 9.992814894917251e-06, + "loss": 2278.7691, + "step": 57680 + }, + { + "epoch": 0.11653744995293253, + "grad_norm": 26977.607421875, + "learning_rate": 9.992796176021616e-06, + "loss": 4613.2012, + "step": 57690 + }, + { + "epoch": 0.11655765058561635, + "grad_norm": 16073.1953125, + "learning_rate": 9.992777432791697e-06, + "loss": 3882.4465, + "step": 57700 + }, + { + "epoch": 0.11657785121830015, + "grad_norm": 19821.673828125, + "learning_rate": 9.992758665227586e-06, + "loss": 4781.8703, + "step": 57710 + }, + { + "epoch": 0.11659805185098397, + "grad_norm": 283.07391357421875, + "learning_rate": 9.992739873329375e-06, + "loss": 3930.434, + "step": 57720 + }, + { + "epoch": 0.11661825248366779, + "grad_norm": 3709.515869140625, + "learning_rate": 9.992721057097157e-06, + "loss": 1635.9434, + "step": 57730 + }, + { + "epoch": 0.1166384531163516, + "grad_norm": 0.0, + "learning_rate": 9.99270221653102e-06, + "loss": 3122.9416, + "step": 57740 + }, + { + "epoch": 0.11665865374903542, + "grad_norm": 1630.8157958984375, + "learning_rate": 9.99268335163106e-06, + "loss": 1821.5006, + "step": 57750 + }, + { + "epoch": 0.11667885438171924, + "grad_norm": 16742.818359375, + "learning_rate": 9.992664462397366e-06, + "loss": 3229.2975, + "step": 57760 + }, + { + "epoch": 0.11669905501440304, + "grad_norm": 10381.0712890625, + "learning_rate": 9.99264554883003e-06, + "loss": 4156.9859, + "step": 57770 + }, + { + "epoch": 0.11671925564708686, + "grad_norm": 7463.8154296875, + "learning_rate": 9.992626610929146e-06, + "loss": 6811.2, + "step": 57780 + }, + { + "epoch": 0.11673945627977068, + "grad_norm": 7018.7509765625, + "learning_rate": 9.992607648694805e-06, + "loss": 6346.9547, + "step": 57790 + }, + { + "epoch": 0.1167596569124545, + "grad_norm": 16674.3515625, + "learning_rate": 9.9925886621271e-06, + "loss": 4198.6242, + "step": 57800 + }, + { + "epoch": 0.11677985754513831, + "grad_norm": 100541.1484375, + "learning_rate": 9.992569651226123e-06, + "loss": 6335.059, + "step": 57810 + }, + { + "epoch": 0.11680005817782213, + "grad_norm": 19427.37109375, + "learning_rate": 9.992550615991968e-06, + "loss": 3113.3063, + "step": 57820 + }, + { + "epoch": 0.11682025881050595, + "grad_norm": 18974.81640625, + "learning_rate": 9.992531556424726e-06, + "loss": 1433.6428, + "step": 57830 + }, + { + "epoch": 0.11684045944318976, + "grad_norm": 17821.943359375, + "learning_rate": 9.992512472524491e-06, + "loss": 3270.8043, + "step": 57840 + }, + { + "epoch": 0.11686066007587358, + "grad_norm": 8796.8974609375, + "learning_rate": 9.992493364291356e-06, + "loss": 2185.0672, + "step": 57850 + }, + { + "epoch": 0.1168808607085574, + "grad_norm": 3345.388427734375, + "learning_rate": 9.992474231725412e-06, + "loss": 1350.5002, + "step": 57860 + }, + { + "epoch": 0.1169010613412412, + "grad_norm": 16785.33984375, + "learning_rate": 9.992455074826757e-06, + "loss": 4717.9879, + "step": 57870 + }, + { + "epoch": 0.11692126197392502, + "grad_norm": 7832.4130859375, + "learning_rate": 9.99243589359548e-06, + "loss": 1392.139, + "step": 57880 + }, + { + "epoch": 0.11694146260660884, + "grad_norm": 18834.3984375, + "learning_rate": 9.992416688031676e-06, + "loss": 7042.3383, + "step": 57890 + }, + { + "epoch": 0.11696166323929265, + "grad_norm": 43268.04296875, + "learning_rate": 9.992397458135438e-06, + "loss": 3797.1684, + "step": 57900 + }, + { + "epoch": 0.11698186387197647, + "grad_norm": 11241.859375, + "learning_rate": 9.992378203906862e-06, + "loss": 4900.1023, + "step": 57910 + }, + { + "epoch": 0.11700206450466029, + "grad_norm": 23724.2421875, + "learning_rate": 9.99235892534604e-06, + "loss": 3105.4914, + "step": 57920 + }, + { + "epoch": 0.1170222651373441, + "grad_norm": 2660.864013671875, + "learning_rate": 9.992339622453065e-06, + "loss": 3877.4984, + "step": 57930 + }, + { + "epoch": 0.11704246577002791, + "grad_norm": 22208.388671875, + "learning_rate": 9.992320295228032e-06, + "loss": 2345.1832, + "step": 57940 + }, + { + "epoch": 0.11706266640271173, + "grad_norm": 8210.5498046875, + "learning_rate": 9.992300943671035e-06, + "loss": 3530.4926, + "step": 57950 + }, + { + "epoch": 0.11708286703539555, + "grad_norm": 65257.91015625, + "learning_rate": 9.99228156778217e-06, + "loss": 5331.1949, + "step": 57960 + }, + { + "epoch": 0.11710306766807936, + "grad_norm": 11153.6279296875, + "learning_rate": 9.99226216756153e-06, + "loss": 4656.1773, + "step": 57970 + }, + { + "epoch": 0.11712326830076318, + "grad_norm": 28038.826171875, + "learning_rate": 9.99224274300921e-06, + "loss": 3790.4496, + "step": 57980 + }, + { + "epoch": 0.117143468933447, + "grad_norm": 4125.173828125, + "learning_rate": 9.992223294125303e-06, + "loss": 2198.2658, + "step": 57990 + }, + { + "epoch": 0.11716366956613081, + "grad_norm": 1053.1795654296875, + "learning_rate": 9.992203820909906e-06, + "loss": 2070.3336, + "step": 58000 + }, + { + "epoch": 0.11718387019881463, + "grad_norm": 20441.65234375, + "learning_rate": 9.992184323363112e-06, + "loss": 4768.2078, + "step": 58010 + }, + { + "epoch": 0.11720407083149845, + "grad_norm": 8232.701171875, + "learning_rate": 9.992164801485018e-06, + "loss": 1853.1865, + "step": 58020 + }, + { + "epoch": 0.11722427146418225, + "grad_norm": 26867.091796875, + "learning_rate": 9.992145255275718e-06, + "loss": 6519.1723, + "step": 58030 + }, + { + "epoch": 0.11724447209686607, + "grad_norm": 4616.2470703125, + "learning_rate": 9.99212568473531e-06, + "loss": 3317.6793, + "step": 58040 + }, + { + "epoch": 0.1172646727295499, + "grad_norm": 3021.537109375, + "learning_rate": 9.992106089863884e-06, + "loss": 2019.3162, + "step": 58050 + }, + { + "epoch": 0.1172848733622337, + "grad_norm": 34459.78515625, + "learning_rate": 9.992086470661537e-06, + "loss": 2648.7766, + "step": 58060 + }, + { + "epoch": 0.11730507399491752, + "grad_norm": 4283.69189453125, + "learning_rate": 9.992066827128368e-06, + "loss": 2873.6408, + "step": 58070 + }, + { + "epoch": 0.11732527462760134, + "grad_norm": 7628.6435546875, + "learning_rate": 9.992047159264472e-06, + "loss": 5443.5234, + "step": 58080 + }, + { + "epoch": 0.11734547526028515, + "grad_norm": 130344.859375, + "learning_rate": 9.992027467069943e-06, + "loss": 8854.6742, + "step": 58090 + }, + { + "epoch": 0.11736567589296897, + "grad_norm": 6566.705078125, + "learning_rate": 9.992007750544876e-06, + "loss": 3373.5535, + "step": 58100 + }, + { + "epoch": 0.11738587652565279, + "grad_norm": 32564.794921875, + "learning_rate": 9.99198800968937e-06, + "loss": 3391.0059, + "step": 58110 + }, + { + "epoch": 0.1174060771583366, + "grad_norm": 564.8240356445312, + "learning_rate": 9.991968244503519e-06, + "loss": 774.2537, + "step": 58120 + }, + { + "epoch": 0.11742627779102041, + "grad_norm": 746.0786743164062, + "learning_rate": 9.991948454987422e-06, + "loss": 4357.2375, + "step": 58130 + }, + { + "epoch": 0.11744647842370423, + "grad_norm": 2038.5020751953125, + "learning_rate": 9.99192864114117e-06, + "loss": 3330.009, + "step": 58140 + }, + { + "epoch": 0.11746667905638805, + "grad_norm": 56157.4921875, + "learning_rate": 9.991908802964867e-06, + "loss": 4014.5977, + "step": 58150 + }, + { + "epoch": 0.11748687968907186, + "grad_norm": 19922.09765625, + "learning_rate": 9.991888940458605e-06, + "loss": 1908.1689, + "step": 58160 + }, + { + "epoch": 0.11750708032175568, + "grad_norm": 106062.4375, + "learning_rate": 9.99186905362248e-06, + "loss": 5443.6398, + "step": 58170 + }, + { + "epoch": 0.1175272809544395, + "grad_norm": 2249.634521484375, + "learning_rate": 9.991849142456593e-06, + "loss": 2977.3549, + "step": 58180 + }, + { + "epoch": 0.1175474815871233, + "grad_norm": 18023.578125, + "learning_rate": 9.991829206961038e-06, + "loss": 2361.726, + "step": 58190 + }, + { + "epoch": 0.11756768221980712, + "grad_norm": 22524.6640625, + "learning_rate": 9.991809247135912e-06, + "loss": 4571.8199, + "step": 58200 + }, + { + "epoch": 0.11758788285249094, + "grad_norm": 14524.259765625, + "learning_rate": 9.991789262981314e-06, + "loss": 1983.7238, + "step": 58210 + }, + { + "epoch": 0.11760808348517475, + "grad_norm": 7711.93798828125, + "learning_rate": 9.99176925449734e-06, + "loss": 4827.7555, + "step": 58220 + }, + { + "epoch": 0.11762828411785857, + "grad_norm": 36658.78125, + "learning_rate": 9.991749221684088e-06, + "loss": 5000.8676, + "step": 58230 + }, + { + "epoch": 0.11764848475054239, + "grad_norm": 17955.515625, + "learning_rate": 9.991729164541656e-06, + "loss": 2850.307, + "step": 58240 + }, + { + "epoch": 0.1176686853832262, + "grad_norm": 38209.4921875, + "learning_rate": 9.991709083070143e-06, + "loss": 2711.567, + "step": 58250 + }, + { + "epoch": 0.11768888601591002, + "grad_norm": 27897.923828125, + "learning_rate": 9.991688977269643e-06, + "loss": 1467.2014, + "step": 58260 + }, + { + "epoch": 0.11770908664859384, + "grad_norm": 36596.50390625, + "learning_rate": 9.991668847140258e-06, + "loss": 6021.2695, + "step": 58270 + }, + { + "epoch": 0.11772928728127766, + "grad_norm": 4331.97802734375, + "learning_rate": 9.991648692682083e-06, + "loss": 2432.3084, + "step": 58280 + }, + { + "epoch": 0.11774948791396146, + "grad_norm": 119617.640625, + "learning_rate": 9.99162851389522e-06, + "loss": 7920.0172, + "step": 58290 + }, + { + "epoch": 0.11776968854664528, + "grad_norm": 7471.2861328125, + "learning_rate": 9.991608310779762e-06, + "loss": 3777.2234, + "step": 58300 + }, + { + "epoch": 0.1177898891793291, + "grad_norm": 7867.0146484375, + "learning_rate": 9.991588083335812e-06, + "loss": 2553.4887, + "step": 58310 + }, + { + "epoch": 0.11781008981201291, + "grad_norm": 1072.831298828125, + "learning_rate": 9.991567831563468e-06, + "loss": 3649.4605, + "step": 58320 + }, + { + "epoch": 0.11783029044469673, + "grad_norm": 11696.1748046875, + "learning_rate": 9.991547555462825e-06, + "loss": 8627.9617, + "step": 58330 + }, + { + "epoch": 0.11785049107738055, + "grad_norm": 28591.98828125, + "learning_rate": 9.991527255033988e-06, + "loss": 2814.1898, + "step": 58340 + }, + { + "epoch": 0.11787069171006435, + "grad_norm": 2333.094482421875, + "learning_rate": 9.99150693027705e-06, + "loss": 4048.1016, + "step": 58350 + }, + { + "epoch": 0.11789089234274817, + "grad_norm": 6789.40478515625, + "learning_rate": 9.991486581192115e-06, + "loss": 4213.9684, + "step": 58360 + }, + { + "epoch": 0.117911092975432, + "grad_norm": 4244.41357421875, + "learning_rate": 9.991466207779279e-06, + "loss": 6117.077, + "step": 58370 + }, + { + "epoch": 0.1179312936081158, + "grad_norm": 6587.34716796875, + "learning_rate": 9.99144581003864e-06, + "loss": 2999.6146, + "step": 58380 + }, + { + "epoch": 0.11795149424079962, + "grad_norm": 4498.8935546875, + "learning_rate": 9.991425387970301e-06, + "loss": 1117.3115, + "step": 58390 + }, + { + "epoch": 0.11797169487348344, + "grad_norm": 2465.523193359375, + "learning_rate": 9.99140494157436e-06, + "loss": 2744.6928, + "step": 58400 + }, + { + "epoch": 0.11799189550616725, + "grad_norm": 14735.8916015625, + "learning_rate": 9.991384470850918e-06, + "loss": 3526.5141, + "step": 58410 + }, + { + "epoch": 0.11801209613885107, + "grad_norm": 187900.484375, + "learning_rate": 9.991363975800073e-06, + "loss": 8363.0453, + "step": 58420 + }, + { + "epoch": 0.11803229677153489, + "grad_norm": 12029.40625, + "learning_rate": 9.991343456421923e-06, + "loss": 4560.0121, + "step": 58430 + }, + { + "epoch": 0.1180524974042187, + "grad_norm": 722.6077880859375, + "learning_rate": 9.991322912716572e-06, + "loss": 3211.6328, + "step": 58440 + }, + { + "epoch": 0.11807269803690251, + "grad_norm": 905.3804931640625, + "learning_rate": 9.99130234468412e-06, + "loss": 4266.0309, + "step": 58450 + }, + { + "epoch": 0.11809289866958633, + "grad_norm": 3204.400634765625, + "learning_rate": 9.991281752324664e-06, + "loss": 2970.2477, + "step": 58460 + }, + { + "epoch": 0.11811309930227015, + "grad_norm": 5681.4423828125, + "learning_rate": 9.991261135638307e-06, + "loss": 2864.6936, + "step": 58470 + }, + { + "epoch": 0.11813329993495396, + "grad_norm": 143242.171875, + "learning_rate": 9.991240494625147e-06, + "loss": 4020.8508, + "step": 58480 + }, + { + "epoch": 0.11815350056763778, + "grad_norm": 11069.7607421875, + "learning_rate": 9.991219829285287e-06, + "loss": 2372.6219, + "step": 58490 + }, + { + "epoch": 0.1181737012003216, + "grad_norm": 17034.62890625, + "learning_rate": 9.991199139618828e-06, + "loss": 2442.9502, + "step": 58500 + }, + { + "epoch": 0.1181939018330054, + "grad_norm": 26192.203125, + "learning_rate": 9.991178425625869e-06, + "loss": 2507.1057, + "step": 58510 + }, + { + "epoch": 0.11821410246568922, + "grad_norm": 32021.671875, + "learning_rate": 9.99115768730651e-06, + "loss": 2299.6316, + "step": 58520 + }, + { + "epoch": 0.11823430309837304, + "grad_norm": 7359.5419921875, + "learning_rate": 9.991136924660856e-06, + "loss": 3044.6945, + "step": 58530 + }, + { + "epoch": 0.11825450373105685, + "grad_norm": 1374.322998046875, + "learning_rate": 9.991116137689006e-06, + "loss": 2933.1725, + "step": 58540 + }, + { + "epoch": 0.11827470436374067, + "grad_norm": 2898.7841796875, + "learning_rate": 9.991095326391061e-06, + "loss": 1708.325, + "step": 58550 + }, + { + "epoch": 0.11829490499642449, + "grad_norm": 12820.626953125, + "learning_rate": 9.99107449076712e-06, + "loss": 4811.4586, + "step": 58560 + }, + { + "epoch": 0.1183151056291083, + "grad_norm": 2092.815673828125, + "learning_rate": 9.99105363081729e-06, + "loss": 6298.9469, + "step": 58570 + }, + { + "epoch": 0.11833530626179212, + "grad_norm": 67271.1171875, + "learning_rate": 9.99103274654167e-06, + "loss": 5210.1344, + "step": 58580 + }, + { + "epoch": 0.11835550689447594, + "grad_norm": 27985.576171875, + "learning_rate": 9.99101183794036e-06, + "loss": 2183.1412, + "step": 58590 + }, + { + "epoch": 0.11837570752715976, + "grad_norm": 58369.26953125, + "learning_rate": 9.990990905013466e-06, + "loss": 5849.807, + "step": 58600 + }, + { + "epoch": 0.11839590815984356, + "grad_norm": 20516.09765625, + "learning_rate": 9.990969947761087e-06, + "loss": 11161.5109, + "step": 58610 + }, + { + "epoch": 0.11841610879252738, + "grad_norm": 1255.243408203125, + "learning_rate": 9.990948966183324e-06, + "loss": 2804.2434, + "step": 58620 + }, + { + "epoch": 0.1184363094252112, + "grad_norm": 15422.021484375, + "learning_rate": 9.990927960280283e-06, + "loss": 1730.5916, + "step": 58630 + }, + { + "epoch": 0.11845651005789501, + "grad_norm": 3244.716552734375, + "learning_rate": 9.990906930052065e-06, + "loss": 2528.5525, + "step": 58640 + }, + { + "epoch": 0.11847671069057883, + "grad_norm": 15383.0869140625, + "learning_rate": 9.99088587549877e-06, + "loss": 2710.0828, + "step": 58650 + }, + { + "epoch": 0.11849691132326265, + "grad_norm": 14953.8515625, + "learning_rate": 9.990864796620503e-06, + "loss": 7074.7945, + "step": 58660 + }, + { + "epoch": 0.11851711195594646, + "grad_norm": 516.6032104492188, + "learning_rate": 9.990843693417366e-06, + "loss": 5671.2582, + "step": 58670 + }, + { + "epoch": 0.11853731258863028, + "grad_norm": 89232.2578125, + "learning_rate": 9.990822565889464e-06, + "loss": 3996.2941, + "step": 58680 + }, + { + "epoch": 0.1185575132213141, + "grad_norm": 14479.03515625, + "learning_rate": 9.990801414036896e-06, + "loss": 4915.6023, + "step": 58690 + }, + { + "epoch": 0.1185777138539979, + "grad_norm": 4538.7099609375, + "learning_rate": 9.99078023785977e-06, + "loss": 3185.7629, + "step": 58700 + }, + { + "epoch": 0.11859791448668172, + "grad_norm": 330.3240661621094, + "learning_rate": 9.990759037358184e-06, + "loss": 3112.5439, + "step": 58710 + }, + { + "epoch": 0.11861811511936554, + "grad_norm": 7345.29248046875, + "learning_rate": 9.990737812532245e-06, + "loss": 3569.7812, + "step": 58720 + }, + { + "epoch": 0.11863831575204935, + "grad_norm": 540.5125732421875, + "learning_rate": 9.990716563382055e-06, + "loss": 3987.1758, + "step": 58730 + }, + { + "epoch": 0.11865851638473317, + "grad_norm": 10023.373046875, + "learning_rate": 9.990695289907716e-06, + "loss": 4418.834, + "step": 58740 + }, + { + "epoch": 0.11867871701741699, + "grad_norm": 7359.5615234375, + "learning_rate": 9.990673992109335e-06, + "loss": 2328.4885, + "step": 58750 + }, + { + "epoch": 0.11869891765010081, + "grad_norm": 38202.37109375, + "learning_rate": 9.990652669987016e-06, + "loss": 4499.866, + "step": 58760 + }, + { + "epoch": 0.11871911828278461, + "grad_norm": 44373.08984375, + "learning_rate": 9.990631323540858e-06, + "loss": 8954.5297, + "step": 58770 + }, + { + "epoch": 0.11873931891546843, + "grad_norm": 15033.533203125, + "learning_rate": 9.990609952770969e-06, + "loss": 3142.1729, + "step": 58780 + }, + { + "epoch": 0.11875951954815225, + "grad_norm": 12430.46484375, + "learning_rate": 9.990588557677454e-06, + "loss": 2052.0359, + "step": 58790 + }, + { + "epoch": 0.11877972018083606, + "grad_norm": 1194.8599853515625, + "learning_rate": 9.990567138260414e-06, + "loss": 3890.5582, + "step": 58800 + }, + { + "epoch": 0.11879992081351988, + "grad_norm": 17095.720703125, + "learning_rate": 9.990545694519956e-06, + "loss": 2034.1975, + "step": 58810 + }, + { + "epoch": 0.1188201214462037, + "grad_norm": 6897.19140625, + "learning_rate": 9.990524226456182e-06, + "loss": 4251.8895, + "step": 58820 + }, + { + "epoch": 0.1188403220788875, + "grad_norm": 6809.1337890625, + "learning_rate": 9.9905027340692e-06, + "loss": 2125.3645, + "step": 58830 + }, + { + "epoch": 0.11886052271157133, + "grad_norm": 6186.9248046875, + "learning_rate": 9.990481217359112e-06, + "loss": 4243.0957, + "step": 58840 + }, + { + "epoch": 0.11888072334425515, + "grad_norm": 60182.60546875, + "learning_rate": 9.990459676326025e-06, + "loss": 5457.2035, + "step": 58850 + }, + { + "epoch": 0.11890092397693895, + "grad_norm": 5821.1650390625, + "learning_rate": 9.990438110970043e-06, + "loss": 4456.2172, + "step": 58860 + }, + { + "epoch": 0.11892112460962277, + "grad_norm": 4629.48046875, + "learning_rate": 9.990416521291268e-06, + "loss": 2715.0273, + "step": 58870 + }, + { + "epoch": 0.11894132524230659, + "grad_norm": 27098.7578125, + "learning_rate": 9.990394907289811e-06, + "loss": 2514.3, + "step": 58880 + }, + { + "epoch": 0.1189615258749904, + "grad_norm": 1981.8985595703125, + "learning_rate": 9.990373268965773e-06, + "loss": 3370.7648, + "step": 58890 + }, + { + "epoch": 0.11898172650767422, + "grad_norm": 4804.34130859375, + "learning_rate": 9.990351606319261e-06, + "loss": 2539.9924, + "step": 58900 + }, + { + "epoch": 0.11900192714035804, + "grad_norm": 267.0777587890625, + "learning_rate": 9.990329919350382e-06, + "loss": 6209.0953, + "step": 58910 + }, + { + "epoch": 0.11902212777304186, + "grad_norm": 31371.451171875, + "learning_rate": 9.990308208059239e-06, + "loss": 4525.9547, + "step": 58920 + }, + { + "epoch": 0.11904232840572566, + "grad_norm": 2253.875732421875, + "learning_rate": 9.990286472445938e-06, + "loss": 4188.6328, + "step": 58930 + }, + { + "epoch": 0.11906252903840948, + "grad_norm": 34572.75, + "learning_rate": 9.990264712510586e-06, + "loss": 4215.7723, + "step": 58940 + }, + { + "epoch": 0.1190827296710933, + "grad_norm": 37277.203125, + "learning_rate": 9.990242928253291e-06, + "loss": 3688.4266, + "step": 58950 + }, + { + "epoch": 0.11910293030377711, + "grad_norm": 14295.5341796875, + "learning_rate": 9.990221119674157e-06, + "loss": 1865.5061, + "step": 58960 + }, + { + "epoch": 0.11912313093646093, + "grad_norm": 2758.6064453125, + "learning_rate": 9.99019928677329e-06, + "loss": 2722.141, + "step": 58970 + }, + { + "epoch": 0.11914333156914475, + "grad_norm": 101710.7265625, + "learning_rate": 9.990177429550797e-06, + "loss": 4574.4406, + "step": 58980 + }, + { + "epoch": 0.11916353220182856, + "grad_norm": 16536.0703125, + "learning_rate": 9.990155548006783e-06, + "loss": 3057.4895, + "step": 58990 + }, + { + "epoch": 0.11918373283451238, + "grad_norm": 18454.279296875, + "learning_rate": 9.990133642141359e-06, + "loss": 2921.4502, + "step": 59000 + }, + { + "epoch": 0.1192039334671962, + "grad_norm": 5860.94775390625, + "learning_rate": 9.990111711954626e-06, + "loss": 2269.3063, + "step": 59010 + }, + { + "epoch": 0.11922413409988, + "grad_norm": 70293.3203125, + "learning_rate": 9.990089757446697e-06, + "loss": 6906.8062, + "step": 59020 + }, + { + "epoch": 0.11924433473256382, + "grad_norm": 24697.9765625, + "learning_rate": 9.990067778617672e-06, + "loss": 2700.6273, + "step": 59030 + }, + { + "epoch": 0.11926453536524764, + "grad_norm": 40904.12890625, + "learning_rate": 9.990045775467664e-06, + "loss": 3418.6129, + "step": 59040 + }, + { + "epoch": 0.11928473599793145, + "grad_norm": 1008.8265380859375, + "learning_rate": 9.990023747996778e-06, + "loss": 6507.1406, + "step": 59050 + }, + { + "epoch": 0.11930493663061527, + "grad_norm": 3820.104736328125, + "learning_rate": 9.990001696205121e-06, + "loss": 3611.8391, + "step": 59060 + }, + { + "epoch": 0.11932513726329909, + "grad_norm": 3533.161865234375, + "learning_rate": 9.989979620092802e-06, + "loss": 2321.0855, + "step": 59070 + }, + { + "epoch": 0.11934533789598291, + "grad_norm": 64325.31640625, + "learning_rate": 9.989957519659926e-06, + "loss": 3662.6883, + "step": 59080 + }, + { + "epoch": 0.11936553852866671, + "grad_norm": 10408.9345703125, + "learning_rate": 9.989935394906602e-06, + "loss": 1375.8396, + "step": 59090 + }, + { + "epoch": 0.11938573916135053, + "grad_norm": 13962.5166015625, + "learning_rate": 9.98991324583294e-06, + "loss": 3981.4531, + "step": 59100 + }, + { + "epoch": 0.11940593979403435, + "grad_norm": 13642.2890625, + "learning_rate": 9.989891072439045e-06, + "loss": 3676.4754, + "step": 59110 + }, + { + "epoch": 0.11942614042671816, + "grad_norm": 1351.91357421875, + "learning_rate": 9.989868874725026e-06, + "loss": 4690.0777, + "step": 59120 + }, + { + "epoch": 0.11944634105940198, + "grad_norm": 508.3990173339844, + "learning_rate": 9.989846652690992e-06, + "loss": 5311.7406, + "step": 59130 + }, + { + "epoch": 0.1194665416920858, + "grad_norm": 16758.76953125, + "learning_rate": 9.989824406337049e-06, + "loss": 1130.6981, + "step": 59140 + }, + { + "epoch": 0.11948674232476961, + "grad_norm": 0.0, + "learning_rate": 9.989802135663308e-06, + "loss": 5046.6289, + "step": 59150 + }, + { + "epoch": 0.11950694295745343, + "grad_norm": 1471.8060302734375, + "learning_rate": 9.989779840669878e-06, + "loss": 1620.0178, + "step": 59160 + }, + { + "epoch": 0.11952714359013725, + "grad_norm": 14586.6083984375, + "learning_rate": 9.989757521356864e-06, + "loss": 6303.093, + "step": 59170 + }, + { + "epoch": 0.11954734422282105, + "grad_norm": 18427.591796875, + "learning_rate": 9.989735177724378e-06, + "loss": 2812.3512, + "step": 59180 + }, + { + "epoch": 0.11956754485550487, + "grad_norm": 1677.9752197265625, + "learning_rate": 9.989712809772528e-06, + "loss": 615.0795, + "step": 59190 + }, + { + "epoch": 0.1195877454881887, + "grad_norm": 12605.9677734375, + "learning_rate": 9.989690417501423e-06, + "loss": 3086.6744, + "step": 59200 + }, + { + "epoch": 0.1196079461208725, + "grad_norm": 6816.00048828125, + "learning_rate": 9.989668000911173e-06, + "loss": 4372.6707, + "step": 59210 + }, + { + "epoch": 0.11962814675355632, + "grad_norm": 121873.6796875, + "learning_rate": 9.989645560001884e-06, + "loss": 3669.3926, + "step": 59220 + }, + { + "epoch": 0.11964834738624014, + "grad_norm": 44608.8671875, + "learning_rate": 9.989623094773669e-06, + "loss": 4566.9555, + "step": 59230 + }, + { + "epoch": 0.11966854801892396, + "grad_norm": 5914.083984375, + "learning_rate": 9.989600605226637e-06, + "loss": 720.4709, + "step": 59240 + }, + { + "epoch": 0.11968874865160777, + "grad_norm": 22909.5703125, + "learning_rate": 9.989578091360896e-06, + "loss": 2725.5687, + "step": 59250 + }, + { + "epoch": 0.11970894928429159, + "grad_norm": 8953.341796875, + "learning_rate": 9.989555553176556e-06, + "loss": 4361.3129, + "step": 59260 + }, + { + "epoch": 0.1197291499169754, + "grad_norm": 2838.613525390625, + "learning_rate": 9.989532990673729e-06, + "loss": 2616.8863, + "step": 59270 + }, + { + "epoch": 0.11974935054965921, + "grad_norm": 11600.1201171875, + "learning_rate": 9.989510403852521e-06, + "loss": 1529.3997, + "step": 59280 + }, + { + "epoch": 0.11976955118234303, + "grad_norm": 573.4575805664062, + "learning_rate": 9.989487792713045e-06, + "loss": 2182.7242, + "step": 59290 + }, + { + "epoch": 0.11978975181502685, + "grad_norm": 18879.583984375, + "learning_rate": 9.989465157255413e-06, + "loss": 5798.291, + "step": 59300 + }, + { + "epoch": 0.11980995244771066, + "grad_norm": 88113.171875, + "learning_rate": 9.98944249747973e-06, + "loss": 4508.4926, + "step": 59310 + }, + { + "epoch": 0.11983015308039448, + "grad_norm": 9446.265625, + "learning_rate": 9.989419813386112e-06, + "loss": 1874.8564, + "step": 59320 + }, + { + "epoch": 0.1198503537130783, + "grad_norm": 109766.2109375, + "learning_rate": 9.989397104974665e-06, + "loss": 5732.2504, + "step": 59330 + }, + { + "epoch": 0.1198705543457621, + "grad_norm": 399.12274169921875, + "learning_rate": 9.989374372245503e-06, + "loss": 2808.1441, + "step": 59340 + }, + { + "epoch": 0.11989075497844592, + "grad_norm": 367.2723693847656, + "learning_rate": 9.989351615198734e-06, + "loss": 4229.3957, + "step": 59350 + }, + { + "epoch": 0.11991095561112974, + "grad_norm": 17016.19921875, + "learning_rate": 9.989328833834472e-06, + "loss": 7187.5953, + "step": 59360 + }, + { + "epoch": 0.11993115624381355, + "grad_norm": 717.0458374023438, + "learning_rate": 9.989306028152825e-06, + "loss": 5966.3371, + "step": 59370 + }, + { + "epoch": 0.11995135687649737, + "grad_norm": 9861.931640625, + "learning_rate": 9.989283198153908e-06, + "loss": 5417.3207, + "step": 59380 + }, + { + "epoch": 0.11997155750918119, + "grad_norm": 14094.1904296875, + "learning_rate": 9.989260343837827e-06, + "loss": 5437.6578, + "step": 59390 + }, + { + "epoch": 0.11999175814186501, + "grad_norm": 16296.7373046875, + "learning_rate": 9.989237465204698e-06, + "loss": 6503.0285, + "step": 59400 + }, + { + "epoch": 0.12001195877454882, + "grad_norm": 27215.44140625, + "learning_rate": 9.989214562254628e-06, + "loss": 4361.4477, + "step": 59410 + }, + { + "epoch": 0.12003215940723264, + "grad_norm": 0.0, + "learning_rate": 9.989191634987734e-06, + "loss": 4423.0078, + "step": 59420 + }, + { + "epoch": 0.12005236003991646, + "grad_norm": 128340.8828125, + "learning_rate": 9.989168683404125e-06, + "loss": 6084.1984, + "step": 59430 + }, + { + "epoch": 0.12007256067260026, + "grad_norm": 48862.42578125, + "learning_rate": 9.98914570750391e-06, + "loss": 3002.5588, + "step": 59440 + }, + { + "epoch": 0.12009276130528408, + "grad_norm": 40782.7109375, + "learning_rate": 9.98912270728721e-06, + "loss": 3151.0922, + "step": 59450 + }, + { + "epoch": 0.1201129619379679, + "grad_norm": 14992.9990234375, + "learning_rate": 9.989099682754125e-06, + "loss": 2566.9094, + "step": 59460 + }, + { + "epoch": 0.12013316257065171, + "grad_norm": 4244.45458984375, + "learning_rate": 9.989076633904775e-06, + "loss": 6671.9219, + "step": 59470 + }, + { + "epoch": 0.12015336320333553, + "grad_norm": 14895.404296875, + "learning_rate": 9.989053560739272e-06, + "loss": 1729.8996, + "step": 59480 + }, + { + "epoch": 0.12017356383601935, + "grad_norm": 3140.30322265625, + "learning_rate": 9.989030463257726e-06, + "loss": 2405.1576, + "step": 59490 + }, + { + "epoch": 0.12019376446870315, + "grad_norm": 25958.958984375, + "learning_rate": 9.989007341460251e-06, + "loss": 1701.7898, + "step": 59500 + }, + { + "epoch": 0.12021396510138697, + "grad_norm": 1542.9957275390625, + "learning_rate": 9.98898419534696e-06, + "loss": 3819.8988, + "step": 59510 + }, + { + "epoch": 0.1202341657340708, + "grad_norm": 1610.930419921875, + "learning_rate": 9.988961024917963e-06, + "loss": 2093.6568, + "step": 59520 + }, + { + "epoch": 0.1202543663667546, + "grad_norm": 102834.6875, + "learning_rate": 9.988937830173376e-06, + "loss": 5439.1461, + "step": 59530 + }, + { + "epoch": 0.12027456699943842, + "grad_norm": 29387.68359375, + "learning_rate": 9.988914611113311e-06, + "loss": 5813.4648, + "step": 59540 + }, + { + "epoch": 0.12029476763212224, + "grad_norm": 893.3914794921875, + "learning_rate": 9.988891367737882e-06, + "loss": 6418.4871, + "step": 59550 + }, + { + "epoch": 0.12031496826480606, + "grad_norm": 7664.4970703125, + "learning_rate": 9.988868100047203e-06, + "loss": 4697.4445, + "step": 59560 + }, + { + "epoch": 0.12033516889748987, + "grad_norm": 63718.72265625, + "learning_rate": 9.988844808041382e-06, + "loss": 3244.5766, + "step": 59570 + }, + { + "epoch": 0.12035536953017369, + "grad_norm": 8801.6318359375, + "learning_rate": 9.98882149172054e-06, + "loss": 1944.7781, + "step": 59580 + }, + { + "epoch": 0.1203755701628575, + "grad_norm": 14943.7265625, + "learning_rate": 9.988798151084783e-06, + "loss": 1522.2914, + "step": 59590 + }, + { + "epoch": 0.12039577079554131, + "grad_norm": 1978.5670166015625, + "learning_rate": 9.988774786134235e-06, + "loss": 7597.0531, + "step": 59600 + }, + { + "epoch": 0.12041597142822513, + "grad_norm": 5123.4345703125, + "learning_rate": 9.988751396869e-06, + "loss": 1128.6189, + "step": 59610 + }, + { + "epoch": 0.12043617206090895, + "grad_norm": 1910.88037109375, + "learning_rate": 9.988727983289195e-06, + "loss": 1662.785, + "step": 59620 + }, + { + "epoch": 0.12045637269359276, + "grad_norm": 139003.75, + "learning_rate": 9.988704545394936e-06, + "loss": 8858.6938, + "step": 59630 + }, + { + "epoch": 0.12047657332627658, + "grad_norm": 35733.828125, + "learning_rate": 9.988681083186336e-06, + "loss": 1748.118, + "step": 59640 + }, + { + "epoch": 0.1204967739589604, + "grad_norm": 1057.966552734375, + "learning_rate": 9.988657596663509e-06, + "loss": 2767.4254, + "step": 59650 + }, + { + "epoch": 0.1205169745916442, + "grad_norm": 2678.61083984375, + "learning_rate": 9.988634085826571e-06, + "loss": 1828.8996, + "step": 59660 + }, + { + "epoch": 0.12053717522432802, + "grad_norm": 14208.8330078125, + "learning_rate": 9.988610550675635e-06, + "loss": 2884.3543, + "step": 59670 + }, + { + "epoch": 0.12055737585701184, + "grad_norm": 77242.6796875, + "learning_rate": 9.988586991210816e-06, + "loss": 3812.2262, + "step": 59680 + }, + { + "epoch": 0.12057757648969565, + "grad_norm": 3737.8056640625, + "learning_rate": 9.98856340743223e-06, + "loss": 1669.8697, + "step": 59690 + }, + { + "epoch": 0.12059777712237947, + "grad_norm": 44887.4375, + "learning_rate": 9.988539799339989e-06, + "loss": 1770.3043, + "step": 59700 + }, + { + "epoch": 0.12061797775506329, + "grad_norm": 3951.089111328125, + "learning_rate": 9.988516166934212e-06, + "loss": 2069.2455, + "step": 59710 + }, + { + "epoch": 0.12063817838774711, + "grad_norm": 4364.43359375, + "learning_rate": 9.988492510215011e-06, + "loss": 2335.8166, + "step": 59720 + }, + { + "epoch": 0.12065837902043092, + "grad_norm": 9110.712890625, + "learning_rate": 9.988468829182504e-06, + "loss": 1705.0227, + "step": 59730 + }, + { + "epoch": 0.12067857965311474, + "grad_norm": 16518.4609375, + "learning_rate": 9.988445123836804e-06, + "loss": 3088.4213, + "step": 59740 + }, + { + "epoch": 0.12069878028579856, + "grad_norm": 2438.78271484375, + "learning_rate": 9.988421394178027e-06, + "loss": 3171.3295, + "step": 59750 + }, + { + "epoch": 0.12071898091848236, + "grad_norm": 9461.2314453125, + "learning_rate": 9.98839764020629e-06, + "loss": 2705.1201, + "step": 59760 + }, + { + "epoch": 0.12073918155116618, + "grad_norm": 7753.07177734375, + "learning_rate": 9.988373861921708e-06, + "loss": 1741.7383, + "step": 59770 + }, + { + "epoch": 0.12075938218385, + "grad_norm": 30974.0703125, + "learning_rate": 9.988350059324396e-06, + "loss": 3154.402, + "step": 59780 + }, + { + "epoch": 0.12077958281653381, + "grad_norm": 39196.86328125, + "learning_rate": 9.988326232414472e-06, + "loss": 4025.2633, + "step": 59790 + }, + { + "epoch": 0.12079978344921763, + "grad_norm": 1073.474365234375, + "learning_rate": 9.98830238119205e-06, + "loss": 1705.042, + "step": 59800 + }, + { + "epoch": 0.12081998408190145, + "grad_norm": 3789.409423828125, + "learning_rate": 9.988278505657247e-06, + "loss": 9402.4453, + "step": 59810 + }, + { + "epoch": 0.12084018471458526, + "grad_norm": 13202.4423828125, + "learning_rate": 9.98825460581018e-06, + "loss": 3571.2355, + "step": 59820 + }, + { + "epoch": 0.12086038534726908, + "grad_norm": 52569.58203125, + "learning_rate": 9.988230681650964e-06, + "loss": 5881.6828, + "step": 59830 + }, + { + "epoch": 0.1208805859799529, + "grad_norm": 1700.0101318359375, + "learning_rate": 9.988206733179718e-06, + "loss": 2438.3963, + "step": 59840 + }, + { + "epoch": 0.1209007866126367, + "grad_norm": 15410.599609375, + "learning_rate": 9.988182760396557e-06, + "loss": 3608.4953, + "step": 59850 + }, + { + "epoch": 0.12092098724532052, + "grad_norm": 3965.8916015625, + "learning_rate": 9.988158763301598e-06, + "loss": 6025.093, + "step": 59860 + }, + { + "epoch": 0.12094118787800434, + "grad_norm": 5461.8125, + "learning_rate": 9.988134741894959e-06, + "loss": 2365.7709, + "step": 59870 + }, + { + "epoch": 0.12096138851068816, + "grad_norm": 12240.1484375, + "learning_rate": 9.988110696176756e-06, + "loss": 5181.366, + "step": 59880 + }, + { + "epoch": 0.12098158914337197, + "grad_norm": 33628.65625, + "learning_rate": 9.988086626147107e-06, + "loss": 3529.9363, + "step": 59890 + }, + { + "epoch": 0.12100178977605579, + "grad_norm": 19441.41796875, + "learning_rate": 9.988062531806127e-06, + "loss": 2244.1873, + "step": 59900 + }, + { + "epoch": 0.12102199040873961, + "grad_norm": 3376.1904296875, + "learning_rate": 9.988038413153936e-06, + "loss": 4057.9945, + "step": 59910 + }, + { + "epoch": 0.12104219104142341, + "grad_norm": 187757.09375, + "learning_rate": 9.988014270190652e-06, + "loss": 9583.8406, + "step": 59920 + }, + { + "epoch": 0.12106239167410723, + "grad_norm": 18978.994140625, + "learning_rate": 9.98799010291639e-06, + "loss": 8825.7477, + "step": 59930 + }, + { + "epoch": 0.12108259230679105, + "grad_norm": 1648.2030029296875, + "learning_rate": 9.987965911331268e-06, + "loss": 3394.7395, + "step": 59940 + }, + { + "epoch": 0.12110279293947486, + "grad_norm": 27486.875, + "learning_rate": 9.987941695435409e-06, + "loss": 2427.6852, + "step": 59950 + }, + { + "epoch": 0.12112299357215868, + "grad_norm": 41283.7421875, + "learning_rate": 9.987917455228924e-06, + "loss": 2431.6857, + "step": 59960 + }, + { + "epoch": 0.1211431942048425, + "grad_norm": 61881.609375, + "learning_rate": 9.987893190711935e-06, + "loss": 1944.7098, + "step": 59970 + }, + { + "epoch": 0.1211633948375263, + "grad_norm": 9708.72265625, + "learning_rate": 9.987868901884558e-06, + "loss": 4014.5473, + "step": 59980 + }, + { + "epoch": 0.12118359547021013, + "grad_norm": 597.2442626953125, + "learning_rate": 9.987844588746916e-06, + "loss": 2160.8777, + "step": 59990 + }, + { + "epoch": 0.12120379610289395, + "grad_norm": 1819.9666748046875, + "learning_rate": 9.987820251299121e-06, + "loss": 6597.5711, + "step": 60000 + }, + { + "epoch": 0.12122399673557775, + "grad_norm": 37475.2890625, + "learning_rate": 9.987795889541298e-06, + "loss": 2830.4133, + "step": 60010 + }, + { + "epoch": 0.12124419736826157, + "grad_norm": 4649.2529296875, + "learning_rate": 9.987771503473562e-06, + "loss": 3509.8008, + "step": 60020 + }, + { + "epoch": 0.12126439800094539, + "grad_norm": 10950.857421875, + "learning_rate": 9.987747093096032e-06, + "loss": 5246.1965, + "step": 60030 + }, + { + "epoch": 0.12128459863362921, + "grad_norm": 29498.869140625, + "learning_rate": 9.987722658408828e-06, + "loss": 2721.4094, + "step": 60040 + }, + { + "epoch": 0.12130479926631302, + "grad_norm": 3102.439697265625, + "learning_rate": 9.98769819941207e-06, + "loss": 1281.1535, + "step": 60050 + }, + { + "epoch": 0.12132499989899684, + "grad_norm": 31018.720703125, + "learning_rate": 9.987673716105874e-06, + "loss": 4753.1773, + "step": 60060 + }, + { + "epoch": 0.12134520053168066, + "grad_norm": 16021.3759765625, + "learning_rate": 9.987649208490361e-06, + "loss": 2968.7203, + "step": 60070 + }, + { + "epoch": 0.12136540116436446, + "grad_norm": 24706.73046875, + "learning_rate": 9.987624676565652e-06, + "loss": 2598.4807, + "step": 60080 + }, + { + "epoch": 0.12138560179704828, + "grad_norm": 8583.3154296875, + "learning_rate": 9.987600120331864e-06, + "loss": 3734.4113, + "step": 60090 + }, + { + "epoch": 0.1214058024297321, + "grad_norm": 47683.671875, + "learning_rate": 9.987575539789119e-06, + "loss": 3754.5168, + "step": 60100 + }, + { + "epoch": 0.12142600306241591, + "grad_norm": 8539.1123046875, + "learning_rate": 9.987550934937536e-06, + "loss": 1694.8328, + "step": 60110 + }, + { + "epoch": 0.12144620369509973, + "grad_norm": 6147.77587890625, + "learning_rate": 9.987526305777234e-06, + "loss": 6956.1023, + "step": 60120 + }, + { + "epoch": 0.12146640432778355, + "grad_norm": 15975.6162109375, + "learning_rate": 9.987501652308333e-06, + "loss": 3512.1992, + "step": 60130 + }, + { + "epoch": 0.12148660496046736, + "grad_norm": 615.2431640625, + "learning_rate": 9.987476974530957e-06, + "loss": 5625.357, + "step": 60140 + }, + { + "epoch": 0.12150680559315118, + "grad_norm": 8473.9638671875, + "learning_rate": 9.98745227244522e-06, + "loss": 5360.4891, + "step": 60150 + }, + { + "epoch": 0.121527006225835, + "grad_norm": 118.85174560546875, + "learning_rate": 9.987427546051246e-06, + "loss": 2753.2766, + "step": 60160 + }, + { + "epoch": 0.1215472068585188, + "grad_norm": 3715.48828125, + "learning_rate": 9.987402795349154e-06, + "loss": 2973.6932, + "step": 60170 + }, + { + "epoch": 0.12156740749120262, + "grad_norm": 116063.0234375, + "learning_rate": 9.987378020339069e-06, + "loss": 4460.3863, + "step": 60180 + }, + { + "epoch": 0.12158760812388644, + "grad_norm": 54105.2421875, + "learning_rate": 9.987353221021106e-06, + "loss": 2465.7742, + "step": 60190 + }, + { + "epoch": 0.12160780875657026, + "grad_norm": 21177.365234375, + "learning_rate": 9.987328397395389e-06, + "loss": 1989.466, + "step": 60200 + }, + { + "epoch": 0.12162800938925407, + "grad_norm": 25262.10546875, + "learning_rate": 9.987303549462038e-06, + "loss": 3485.6859, + "step": 60210 + }, + { + "epoch": 0.12164821002193789, + "grad_norm": 18835.939453125, + "learning_rate": 9.987278677221174e-06, + "loss": 4039.7859, + "step": 60220 + }, + { + "epoch": 0.12166841065462171, + "grad_norm": 301.40338134765625, + "learning_rate": 9.987253780672918e-06, + "loss": 4762.5074, + "step": 60230 + }, + { + "epoch": 0.12168861128730551, + "grad_norm": 25662.8984375, + "learning_rate": 9.987228859817395e-06, + "loss": 4238.3359, + "step": 60240 + }, + { + "epoch": 0.12170881191998933, + "grad_norm": 22833.646484375, + "learning_rate": 9.987203914654721e-06, + "loss": 2290.1805, + "step": 60250 + }, + { + "epoch": 0.12172901255267315, + "grad_norm": 0.0, + "learning_rate": 9.987178945185019e-06, + "loss": 1349.8015, + "step": 60260 + }, + { + "epoch": 0.12174921318535696, + "grad_norm": 1638.138916015625, + "learning_rate": 9.987153951408414e-06, + "loss": 2188.1123, + "step": 60270 + }, + { + "epoch": 0.12176941381804078, + "grad_norm": 3247.018798828125, + "learning_rate": 9.987128933325025e-06, + "loss": 1488.0903, + "step": 60280 + }, + { + "epoch": 0.1217896144507246, + "grad_norm": 374.0785827636719, + "learning_rate": 9.987103890934974e-06, + "loss": 3494.3316, + "step": 60290 + }, + { + "epoch": 0.12180981508340841, + "grad_norm": 93491.9921875, + "learning_rate": 9.987078824238384e-06, + "loss": 5986.1066, + "step": 60300 + }, + { + "epoch": 0.12183001571609223, + "grad_norm": 175494.484375, + "learning_rate": 9.987053733235376e-06, + "loss": 7160.0453, + "step": 60310 + }, + { + "epoch": 0.12185021634877605, + "grad_norm": 7992.60546875, + "learning_rate": 9.987028617926074e-06, + "loss": 2928.798, + "step": 60320 + }, + { + "epoch": 0.12187041698145985, + "grad_norm": 2431.484619140625, + "learning_rate": 9.987003478310597e-06, + "loss": 3234.1848, + "step": 60330 + }, + { + "epoch": 0.12189061761414367, + "grad_norm": 3701.560302734375, + "learning_rate": 9.986978314389071e-06, + "loss": 3215.3904, + "step": 60340 + }, + { + "epoch": 0.1219108182468275, + "grad_norm": 15559.2841796875, + "learning_rate": 9.98695312616162e-06, + "loss": 4374.9777, + "step": 60350 + }, + { + "epoch": 0.12193101887951131, + "grad_norm": 6049.14892578125, + "learning_rate": 9.986927913628361e-06, + "loss": 4112.1145, + "step": 60360 + }, + { + "epoch": 0.12195121951219512, + "grad_norm": 32912.109375, + "learning_rate": 9.986902676789421e-06, + "loss": 2922.8801, + "step": 60370 + }, + { + "epoch": 0.12197142014487894, + "grad_norm": 2711.79296875, + "learning_rate": 9.986877415644925e-06, + "loss": 2884.708, + "step": 60380 + }, + { + "epoch": 0.12199162077756276, + "grad_norm": 66860.1328125, + "learning_rate": 9.98685213019499e-06, + "loss": 4506.2492, + "step": 60390 + }, + { + "epoch": 0.12201182141024657, + "grad_norm": 1444.9024658203125, + "learning_rate": 9.986826820439743e-06, + "loss": 3162.2543, + "step": 60400 + }, + { + "epoch": 0.12203202204293039, + "grad_norm": 5436.287109375, + "learning_rate": 9.986801486379307e-06, + "loss": 2522.5336, + "step": 60410 + }, + { + "epoch": 0.1220522226756142, + "grad_norm": 39415.15625, + "learning_rate": 9.986776128013807e-06, + "loss": 2460.6721, + "step": 60420 + }, + { + "epoch": 0.12207242330829801, + "grad_norm": 5633.19677734375, + "learning_rate": 9.986750745343363e-06, + "loss": 2872.8195, + "step": 60430 + }, + { + "epoch": 0.12209262394098183, + "grad_norm": 3547.241943359375, + "learning_rate": 9.986725338368103e-06, + "loss": 2226.7404, + "step": 60440 + }, + { + "epoch": 0.12211282457366565, + "grad_norm": 5645.6943359375, + "learning_rate": 9.986699907088147e-06, + "loss": 1443.766, + "step": 60450 + }, + { + "epoch": 0.12213302520634946, + "grad_norm": 108507.21875, + "learning_rate": 9.986674451503619e-06, + "loss": 5265.3691, + "step": 60460 + }, + { + "epoch": 0.12215322583903328, + "grad_norm": 1147.4263916015625, + "learning_rate": 9.986648971614646e-06, + "loss": 4988.5031, + "step": 60470 + }, + { + "epoch": 0.1221734264717171, + "grad_norm": 48348.21875, + "learning_rate": 9.98662346742135e-06, + "loss": 1691.8113, + "step": 60480 + }, + { + "epoch": 0.1221936271044009, + "grad_norm": 133570.59375, + "learning_rate": 9.986597938923859e-06, + "loss": 3346.493, + "step": 60490 + }, + { + "epoch": 0.12221382773708472, + "grad_norm": 22322.45703125, + "learning_rate": 9.98657238612229e-06, + "loss": 1051.4456, + "step": 60500 + }, + { + "epoch": 0.12223402836976854, + "grad_norm": 3974.955322265625, + "learning_rate": 9.986546809016775e-06, + "loss": 1840.4902, + "step": 60510 + }, + { + "epoch": 0.12225422900245236, + "grad_norm": 2259.66259765625, + "learning_rate": 9.986521207607436e-06, + "loss": 6012.7637, + "step": 60520 + }, + { + "epoch": 0.12227442963513617, + "grad_norm": 16785.4140625, + "learning_rate": 9.986495581894396e-06, + "loss": 2267.7613, + "step": 60530 + }, + { + "epoch": 0.12229463026781999, + "grad_norm": 7430.33203125, + "learning_rate": 9.986469931877781e-06, + "loss": 1274.8156, + "step": 60540 + }, + { + "epoch": 0.12231483090050381, + "grad_norm": 14519.638671875, + "learning_rate": 9.986444257557717e-06, + "loss": 1492.294, + "step": 60550 + }, + { + "epoch": 0.12233503153318762, + "grad_norm": 19959.90234375, + "learning_rate": 9.986418558934329e-06, + "loss": 1887.5316, + "step": 60560 + }, + { + "epoch": 0.12235523216587144, + "grad_norm": 2740.411376953125, + "learning_rate": 9.98639283600774e-06, + "loss": 3575.5117, + "step": 60570 + }, + { + "epoch": 0.12237543279855526, + "grad_norm": 1609.0753173828125, + "learning_rate": 9.98636708877808e-06, + "loss": 634.8992, + "step": 60580 + }, + { + "epoch": 0.12239563343123906, + "grad_norm": 3839.2265625, + "learning_rate": 9.986341317245469e-06, + "loss": 1458.4638, + "step": 60590 + }, + { + "epoch": 0.12241583406392288, + "grad_norm": 37358.7734375, + "learning_rate": 9.986315521410035e-06, + "loss": 4747.2473, + "step": 60600 + }, + { + "epoch": 0.1224360346966067, + "grad_norm": 136791.921875, + "learning_rate": 9.986289701271905e-06, + "loss": 5663.4812, + "step": 60610 + }, + { + "epoch": 0.12245623532929051, + "grad_norm": 56087.875, + "learning_rate": 9.986263856831204e-06, + "loss": 5467.8141, + "step": 60620 + }, + { + "epoch": 0.12247643596197433, + "grad_norm": 20470.822265625, + "learning_rate": 9.986237988088059e-06, + "loss": 2270.4703, + "step": 60630 + }, + { + "epoch": 0.12249663659465815, + "grad_norm": 5701.34619140625, + "learning_rate": 9.986212095042593e-06, + "loss": 4038.3324, + "step": 60640 + }, + { + "epoch": 0.12251683722734195, + "grad_norm": 83785.109375, + "learning_rate": 9.986186177694935e-06, + "loss": 3739.5246, + "step": 60650 + }, + { + "epoch": 0.12253703786002577, + "grad_norm": 17950.451171875, + "learning_rate": 9.986160236045207e-06, + "loss": 1931.4412, + "step": 60660 + }, + { + "epoch": 0.1225572384927096, + "grad_norm": 2634.085693359375, + "learning_rate": 9.986134270093542e-06, + "loss": 4464.6227, + "step": 60670 + }, + { + "epoch": 0.12257743912539341, + "grad_norm": 16320.5546875, + "learning_rate": 9.986108279840063e-06, + "loss": 3486.6512, + "step": 60680 + }, + { + "epoch": 0.12259763975807722, + "grad_norm": 4238.9580078125, + "learning_rate": 9.986082265284896e-06, + "loss": 2327.2242, + "step": 60690 + }, + { + "epoch": 0.12261784039076104, + "grad_norm": 86043.8203125, + "learning_rate": 9.98605622642817e-06, + "loss": 2232.3922, + "step": 60700 + }, + { + "epoch": 0.12263804102344486, + "grad_norm": 9666.8642578125, + "learning_rate": 9.986030163270011e-06, + "loss": 3086.3812, + "step": 60710 + }, + { + "epoch": 0.12265824165612867, + "grad_norm": 3583.93603515625, + "learning_rate": 9.986004075810543e-06, + "loss": 4001.9922, + "step": 60720 + }, + { + "epoch": 0.12267844228881249, + "grad_norm": 107197.3359375, + "learning_rate": 9.985977964049898e-06, + "loss": 7369.8766, + "step": 60730 + }, + { + "epoch": 0.1226986429214963, + "grad_norm": 10180.7099609375, + "learning_rate": 9.9859518279882e-06, + "loss": 3424.5184, + "step": 60740 + }, + { + "epoch": 0.12271884355418011, + "grad_norm": 8723.736328125, + "learning_rate": 9.985925667625581e-06, + "loss": 1418.4887, + "step": 60750 + }, + { + "epoch": 0.12273904418686393, + "grad_norm": 1492.6524658203125, + "learning_rate": 9.98589948296216e-06, + "loss": 1978.2473, + "step": 60760 + }, + { + "epoch": 0.12275924481954775, + "grad_norm": 48723.88671875, + "learning_rate": 9.985873273998072e-06, + "loss": 3060.6285, + "step": 60770 + }, + { + "epoch": 0.12277944545223156, + "grad_norm": 616.0737915039062, + "learning_rate": 9.985847040733442e-06, + "loss": 1504.8812, + "step": 60780 + }, + { + "epoch": 0.12279964608491538, + "grad_norm": 1042.358154296875, + "learning_rate": 9.9858207831684e-06, + "loss": 2671.6229, + "step": 60790 + }, + { + "epoch": 0.1228198467175992, + "grad_norm": 2034.6671142578125, + "learning_rate": 9.98579450130307e-06, + "loss": 2440.7375, + "step": 60800 + }, + { + "epoch": 0.122840047350283, + "grad_norm": 1181.489501953125, + "learning_rate": 9.985768195137585e-06, + "loss": 4421.9758, + "step": 60810 + }, + { + "epoch": 0.12286024798296682, + "grad_norm": 792.23876953125, + "learning_rate": 9.985741864672067e-06, + "loss": 3335.4059, + "step": 60820 + }, + { + "epoch": 0.12288044861565064, + "grad_norm": 764.6517333984375, + "learning_rate": 9.985715509906649e-06, + "loss": 2595.7316, + "step": 60830 + }, + { + "epoch": 0.12290064924833445, + "grad_norm": 35475.609375, + "learning_rate": 9.985689130841459e-06, + "loss": 1624.6469, + "step": 60840 + }, + { + "epoch": 0.12292084988101827, + "grad_norm": 16005.1669921875, + "learning_rate": 9.985662727476625e-06, + "loss": 4721.1012, + "step": 60850 + }, + { + "epoch": 0.12294105051370209, + "grad_norm": 22601.359375, + "learning_rate": 9.985636299812275e-06, + "loss": 1956.0451, + "step": 60860 + }, + { + "epoch": 0.12296125114638591, + "grad_norm": 2133.243408203125, + "learning_rate": 9.98560984784854e-06, + "loss": 4906.8344, + "step": 60870 + }, + { + "epoch": 0.12298145177906972, + "grad_norm": 116727.4296875, + "learning_rate": 9.985583371585544e-06, + "loss": 5515.0453, + "step": 60880 + }, + { + "epoch": 0.12300165241175354, + "grad_norm": 614.22509765625, + "learning_rate": 9.98555687102342e-06, + "loss": 1061.6944, + "step": 60890 + }, + { + "epoch": 0.12302185304443736, + "grad_norm": 4998.46533203125, + "learning_rate": 9.9855303461623e-06, + "loss": 3132.1943, + "step": 60900 + }, + { + "epoch": 0.12304205367712116, + "grad_norm": 8912.123046875, + "learning_rate": 9.985503797002307e-06, + "loss": 3404.1137, + "step": 60910 + }, + { + "epoch": 0.12306225430980498, + "grad_norm": 15758.466796875, + "learning_rate": 9.985477223543574e-06, + "loss": 3335.952, + "step": 60920 + }, + { + "epoch": 0.1230824549424888, + "grad_norm": 8990.0751953125, + "learning_rate": 9.985450625786228e-06, + "loss": 6397.4902, + "step": 60930 + }, + { + "epoch": 0.12310265557517261, + "grad_norm": 44474.015625, + "learning_rate": 9.985424003730403e-06, + "loss": 2284.1684, + "step": 60940 + }, + { + "epoch": 0.12312285620785643, + "grad_norm": 1433.2437744140625, + "learning_rate": 9.985397357376224e-06, + "loss": 4057.3012, + "step": 60950 + }, + { + "epoch": 0.12314305684054025, + "grad_norm": 36638.4140625, + "learning_rate": 9.985370686723823e-06, + "loss": 7357.7297, + "step": 60960 + }, + { + "epoch": 0.12316325747322406, + "grad_norm": 3376.573486328125, + "learning_rate": 9.985343991773331e-06, + "loss": 10370.6953, + "step": 60970 + }, + { + "epoch": 0.12318345810590788, + "grad_norm": 2783.5146484375, + "learning_rate": 9.985317272524876e-06, + "loss": 7031.5914, + "step": 60980 + }, + { + "epoch": 0.1232036587385917, + "grad_norm": 5471.44970703125, + "learning_rate": 9.98529052897859e-06, + "loss": 5365.2133, + "step": 60990 + }, + { + "epoch": 0.1232238593712755, + "grad_norm": 2312.595458984375, + "learning_rate": 9.985263761134602e-06, + "loss": 2139.3711, + "step": 61000 + }, + { + "epoch": 0.12324406000395932, + "grad_norm": 27075.439453125, + "learning_rate": 9.985236968993044e-06, + "loss": 2290.1148, + "step": 61010 + }, + { + "epoch": 0.12326426063664314, + "grad_norm": 102374.6171875, + "learning_rate": 9.985210152554045e-06, + "loss": 4262.2906, + "step": 61020 + }, + { + "epoch": 0.12328446126932696, + "grad_norm": 6475.6416015625, + "learning_rate": 9.985183311817736e-06, + "loss": 3870.0094, + "step": 61030 + }, + { + "epoch": 0.12330466190201077, + "grad_norm": 18243.189453125, + "learning_rate": 9.985156446784249e-06, + "loss": 4488.2723, + "step": 61040 + }, + { + "epoch": 0.12332486253469459, + "grad_norm": 2405.422119140625, + "learning_rate": 9.985129557453714e-06, + "loss": 1155.9815, + "step": 61050 + }, + { + "epoch": 0.12334506316737841, + "grad_norm": 63420.55078125, + "learning_rate": 9.985102643826261e-06, + "loss": 2408.9275, + "step": 61060 + }, + { + "epoch": 0.12336526380006221, + "grad_norm": 610.6967163085938, + "learning_rate": 9.985075705902024e-06, + "loss": 3012.8602, + "step": 61070 + }, + { + "epoch": 0.12338546443274603, + "grad_norm": 27073.865234375, + "learning_rate": 9.985048743681131e-06, + "loss": 2460.8801, + "step": 61080 + }, + { + "epoch": 0.12340566506542985, + "grad_norm": 10248.6845703125, + "learning_rate": 9.985021757163715e-06, + "loss": 4749.7465, + "step": 61090 + }, + { + "epoch": 0.12342586569811366, + "grad_norm": 54281.69140625, + "learning_rate": 9.98499474634991e-06, + "loss": 3743.3203, + "step": 61100 + }, + { + "epoch": 0.12344606633079748, + "grad_norm": 122.65604400634766, + "learning_rate": 9.984967711239844e-06, + "loss": 1226.1305, + "step": 61110 + }, + { + "epoch": 0.1234662669634813, + "grad_norm": 10581.466796875, + "learning_rate": 9.984940651833648e-06, + "loss": 2305.2178, + "step": 61120 + }, + { + "epoch": 0.1234864675961651, + "grad_norm": 2602.560302734375, + "learning_rate": 9.984913568131458e-06, + "loss": 5549.4664, + "step": 61130 + }, + { + "epoch": 0.12350666822884893, + "grad_norm": 23918.046875, + "learning_rate": 9.984886460133403e-06, + "loss": 1712.6, + "step": 61140 + }, + { + "epoch": 0.12352686886153275, + "grad_norm": 28862.78515625, + "learning_rate": 9.984859327839617e-06, + "loss": 3408.1914, + "step": 61150 + }, + { + "epoch": 0.12354706949421655, + "grad_norm": 1566.1912841796875, + "learning_rate": 9.98483217125023e-06, + "loss": 3963.609, + "step": 61160 + }, + { + "epoch": 0.12356727012690037, + "grad_norm": 101.78276824951172, + "learning_rate": 9.984804990365376e-06, + "loss": 3675.123, + "step": 61170 + }, + { + "epoch": 0.12358747075958419, + "grad_norm": 51943.40625, + "learning_rate": 9.984777785185188e-06, + "loss": 3871.3148, + "step": 61180 + }, + { + "epoch": 0.12360767139226801, + "grad_norm": 1059.836669921875, + "learning_rate": 9.984750555709797e-06, + "loss": 2872.5096, + "step": 61190 + }, + { + "epoch": 0.12362787202495182, + "grad_norm": 15709.03515625, + "learning_rate": 9.984723301939337e-06, + "loss": 1627.2647, + "step": 61200 + }, + { + "epoch": 0.12364807265763564, + "grad_norm": 12252.15625, + "learning_rate": 9.984696023873939e-06, + "loss": 1813.1695, + "step": 61210 + }, + { + "epoch": 0.12366827329031946, + "grad_norm": 10820.07421875, + "learning_rate": 9.984668721513737e-06, + "loss": 3176.9262, + "step": 61220 + }, + { + "epoch": 0.12368847392300326, + "grad_norm": 2889.476318359375, + "learning_rate": 9.984641394858865e-06, + "loss": 3244.4207, + "step": 61230 + }, + { + "epoch": 0.12370867455568708, + "grad_norm": 39414.546875, + "learning_rate": 9.984614043909455e-06, + "loss": 3434.925, + "step": 61240 + }, + { + "epoch": 0.1237288751883709, + "grad_norm": 19789.279296875, + "learning_rate": 9.984586668665641e-06, + "loss": 3786.8645, + "step": 61250 + }, + { + "epoch": 0.12374907582105471, + "grad_norm": 11345.666015625, + "learning_rate": 9.984559269127557e-06, + "loss": 3538.2602, + "step": 61260 + }, + { + "epoch": 0.12376927645373853, + "grad_norm": 1058.683837890625, + "learning_rate": 9.984531845295333e-06, + "loss": 9317.2172, + "step": 61270 + }, + { + "epoch": 0.12378947708642235, + "grad_norm": 11078.6728515625, + "learning_rate": 9.984504397169107e-06, + "loss": 5478.2121, + "step": 61280 + }, + { + "epoch": 0.12380967771910616, + "grad_norm": 2351.63916015625, + "learning_rate": 9.984476924749011e-06, + "loss": 5403.9602, + "step": 61290 + }, + { + "epoch": 0.12382987835178998, + "grad_norm": 48031.265625, + "learning_rate": 9.98444942803518e-06, + "loss": 3616.8152, + "step": 61300 + }, + { + "epoch": 0.1238500789844738, + "grad_norm": 11344.9404296875, + "learning_rate": 9.984421907027747e-06, + "loss": 1312.9378, + "step": 61310 + }, + { + "epoch": 0.1238702796171576, + "grad_norm": 9833.6064453125, + "learning_rate": 9.984394361726844e-06, + "loss": 4834.5441, + "step": 61320 + }, + { + "epoch": 0.12389048024984142, + "grad_norm": 1238.822509765625, + "learning_rate": 9.98436679213261e-06, + "loss": 1807.3527, + "step": 61330 + }, + { + "epoch": 0.12391068088252524, + "grad_norm": 347.987060546875, + "learning_rate": 9.984339198245175e-06, + "loss": 2784.4559, + "step": 61340 + }, + { + "epoch": 0.12393088151520906, + "grad_norm": 15995.09765625, + "learning_rate": 9.984311580064676e-06, + "loss": 4270.2027, + "step": 61350 + }, + { + "epoch": 0.12395108214789287, + "grad_norm": 487.0937805175781, + "learning_rate": 9.984283937591246e-06, + "loss": 789.9274, + "step": 61360 + }, + { + "epoch": 0.12397128278057669, + "grad_norm": 1705.3646240234375, + "learning_rate": 9.98425627082502e-06, + "loss": 3714.8711, + "step": 61370 + }, + { + "epoch": 0.12399148341326051, + "grad_norm": 10180.7197265625, + "learning_rate": 9.984228579766136e-06, + "loss": 2766.1988, + "step": 61380 + }, + { + "epoch": 0.12401168404594431, + "grad_norm": 22904.02734375, + "learning_rate": 9.984200864414726e-06, + "loss": 2088.3736, + "step": 61390 + }, + { + "epoch": 0.12403188467862813, + "grad_norm": 32179.646484375, + "learning_rate": 9.984173124770924e-06, + "loss": 2351.573, + "step": 61400 + }, + { + "epoch": 0.12405208531131195, + "grad_norm": 5436.36181640625, + "learning_rate": 9.984145360834868e-06, + "loss": 2438.043, + "step": 61410 + }, + { + "epoch": 0.12407228594399576, + "grad_norm": 24569.087890625, + "learning_rate": 9.984117572606691e-06, + "loss": 2868.0906, + "step": 61420 + }, + { + "epoch": 0.12409248657667958, + "grad_norm": 3812.29150390625, + "learning_rate": 9.984089760086531e-06, + "loss": 2891.716, + "step": 61430 + }, + { + "epoch": 0.1241126872093634, + "grad_norm": 17118.0390625, + "learning_rate": 9.98406192327452e-06, + "loss": 2448.2609, + "step": 61440 + }, + { + "epoch": 0.12413288784204721, + "grad_norm": 40577.92578125, + "learning_rate": 9.984034062170796e-06, + "loss": 3258.9582, + "step": 61450 + }, + { + "epoch": 0.12415308847473103, + "grad_norm": 1249.5191650390625, + "learning_rate": 9.984006176775496e-06, + "loss": 1229.9271, + "step": 61460 + }, + { + "epoch": 0.12417328910741485, + "grad_norm": 13416.6455078125, + "learning_rate": 9.983978267088753e-06, + "loss": 4004.3496, + "step": 61470 + }, + { + "epoch": 0.12419348974009865, + "grad_norm": 21646.43359375, + "learning_rate": 9.983950333110705e-06, + "loss": 3780.341, + "step": 61480 + }, + { + "epoch": 0.12421369037278247, + "grad_norm": 24681.271484375, + "learning_rate": 9.983922374841488e-06, + "loss": 1805.7313, + "step": 61490 + }, + { + "epoch": 0.1242338910054663, + "grad_norm": 14165.06640625, + "learning_rate": 9.983894392281237e-06, + "loss": 1230.9114, + "step": 61500 + }, + { + "epoch": 0.12425409163815011, + "grad_norm": 17080.9296875, + "learning_rate": 9.98386638543009e-06, + "loss": 2441.8227, + "step": 61510 + }, + { + "epoch": 0.12427429227083392, + "grad_norm": 12956.861328125, + "learning_rate": 9.983838354288181e-06, + "loss": 1211.3677, + "step": 61520 + }, + { + "epoch": 0.12429449290351774, + "grad_norm": 23752.548828125, + "learning_rate": 9.98381029885565e-06, + "loss": 1821.098, + "step": 61530 + }, + { + "epoch": 0.12431469353620156, + "grad_norm": 18162.412109375, + "learning_rate": 9.983782219132631e-06, + "loss": 4605.5461, + "step": 61540 + }, + { + "epoch": 0.12433489416888537, + "grad_norm": 19029.97265625, + "learning_rate": 9.983754115119262e-06, + "loss": 2285.7559, + "step": 61550 + }, + { + "epoch": 0.12435509480156919, + "grad_norm": 2400.649169921875, + "learning_rate": 9.983725986815682e-06, + "loss": 3786.9387, + "step": 61560 + }, + { + "epoch": 0.124375295434253, + "grad_norm": 10758.8828125, + "learning_rate": 9.983697834222024e-06, + "loss": 4720.9672, + "step": 61570 + }, + { + "epoch": 0.12439549606693681, + "grad_norm": 69873.1328125, + "learning_rate": 9.983669657338425e-06, + "loss": 4197.8352, + "step": 61580 + }, + { + "epoch": 0.12441569669962063, + "grad_norm": 72285.9296875, + "learning_rate": 9.98364145616503e-06, + "loss": 6467.7004, + "step": 61590 + }, + { + "epoch": 0.12443589733230445, + "grad_norm": 126204.453125, + "learning_rate": 9.983613230701967e-06, + "loss": 5601.5117, + "step": 61600 + }, + { + "epoch": 0.12445609796498826, + "grad_norm": 33351.16796875, + "learning_rate": 9.98358498094938e-06, + "loss": 1697.2125, + "step": 61610 + }, + { + "epoch": 0.12447629859767208, + "grad_norm": 32338.904296875, + "learning_rate": 9.983556706907401e-06, + "loss": 3578.8352, + "step": 61620 + }, + { + "epoch": 0.1244964992303559, + "grad_norm": 8274.4853515625, + "learning_rate": 9.983528408576173e-06, + "loss": 1209.8674, + "step": 61630 + }, + { + "epoch": 0.1245166998630397, + "grad_norm": 27178.875, + "learning_rate": 9.983500085955833e-06, + "loss": 4080.698, + "step": 61640 + }, + { + "epoch": 0.12453690049572352, + "grad_norm": 10880.5830078125, + "learning_rate": 9.983471739046515e-06, + "loss": 3382.0551, + "step": 61650 + }, + { + "epoch": 0.12455710112840734, + "grad_norm": 6436.4296875, + "learning_rate": 9.983443367848363e-06, + "loss": 2797.7182, + "step": 61660 + }, + { + "epoch": 0.12457730176109116, + "grad_norm": 39940.12890625, + "learning_rate": 9.98341497236151e-06, + "loss": 4716.9281, + "step": 61670 + }, + { + "epoch": 0.12459750239377497, + "grad_norm": 335.7183837890625, + "learning_rate": 9.9833865525861e-06, + "loss": 2821.9875, + "step": 61680 + }, + { + "epoch": 0.12461770302645879, + "grad_norm": 19210.576171875, + "learning_rate": 9.983358108522266e-06, + "loss": 2361.7398, + "step": 61690 + }, + { + "epoch": 0.12463790365914261, + "grad_norm": 15272.181640625, + "learning_rate": 9.98332964017015e-06, + "loss": 1782.7432, + "step": 61700 + }, + { + "epoch": 0.12465810429182642, + "grad_norm": 18786.265625, + "learning_rate": 9.98330114752989e-06, + "loss": 5280.5023, + "step": 61710 + }, + { + "epoch": 0.12467830492451024, + "grad_norm": 1035.959228515625, + "learning_rate": 9.983272630601624e-06, + "loss": 1728.584, + "step": 61720 + }, + { + "epoch": 0.12469850555719406, + "grad_norm": 52356.30859375, + "learning_rate": 9.983244089385491e-06, + "loss": 5185.4781, + "step": 61730 + }, + { + "epoch": 0.12471870618987786, + "grad_norm": 7691.5703125, + "learning_rate": 9.98321552388163e-06, + "loss": 5692.0395, + "step": 61740 + }, + { + "epoch": 0.12473890682256168, + "grad_norm": 18223.591796875, + "learning_rate": 9.983186934090183e-06, + "loss": 2114.6721, + "step": 61750 + }, + { + "epoch": 0.1247591074552455, + "grad_norm": 24118.63671875, + "learning_rate": 9.983158320011288e-06, + "loss": 2576.3998, + "step": 61760 + }, + { + "epoch": 0.12477930808792931, + "grad_norm": 68525.703125, + "learning_rate": 9.983129681645082e-06, + "loss": 6035.2109, + "step": 61770 + }, + { + "epoch": 0.12479950872061313, + "grad_norm": 26051.22265625, + "learning_rate": 9.983101018991706e-06, + "loss": 2999.2516, + "step": 61780 + }, + { + "epoch": 0.12481970935329695, + "grad_norm": 3271.053466796875, + "learning_rate": 9.9830723320513e-06, + "loss": 1510.718, + "step": 61790 + }, + { + "epoch": 0.12483990998598075, + "grad_norm": 32751.5859375, + "learning_rate": 9.983043620824005e-06, + "loss": 1366.1689, + "step": 61800 + }, + { + "epoch": 0.12486011061866457, + "grad_norm": 67832.7109375, + "learning_rate": 9.983014885309959e-06, + "loss": 4362.7055, + "step": 61810 + }, + { + "epoch": 0.1248803112513484, + "grad_norm": 2669.682861328125, + "learning_rate": 9.982986125509303e-06, + "loss": 2748.5615, + "step": 61820 + }, + { + "epoch": 0.12490051188403221, + "grad_norm": 10914.2822265625, + "learning_rate": 9.982957341422177e-06, + "loss": 2452.9396, + "step": 61830 + }, + { + "epoch": 0.12492071251671602, + "grad_norm": 3991.28515625, + "learning_rate": 9.982928533048722e-06, + "loss": 3062.4666, + "step": 61840 + }, + { + "epoch": 0.12494091314939984, + "grad_norm": 2218.591552734375, + "learning_rate": 9.982899700389077e-06, + "loss": 1982.0381, + "step": 61850 + }, + { + "epoch": 0.12496111378208366, + "grad_norm": 36165.9921875, + "learning_rate": 9.982870843443381e-06, + "loss": 1223.049, + "step": 61860 + }, + { + "epoch": 0.12498131441476747, + "grad_norm": 129831.078125, + "learning_rate": 9.98284196221178e-06, + "loss": 9423.7219, + "step": 61870 + }, + { + "epoch": 0.12500151504745127, + "grad_norm": 11810.9501953125, + "learning_rate": 9.982813056694411e-06, + "loss": 2066.0961, + "step": 61880 + }, + { + "epoch": 0.1250217156801351, + "grad_norm": 3571.320556640625, + "learning_rate": 9.982784126891416e-06, + "loss": 2259.3084, + "step": 61890 + }, + { + "epoch": 0.1250419163128189, + "grad_norm": 778.0187377929688, + "learning_rate": 9.982755172802933e-06, + "loss": 1109.7163, + "step": 61900 + }, + { + "epoch": 0.12506211694550273, + "grad_norm": 29975.5703125, + "learning_rate": 9.98272619442911e-06, + "loss": 3055.2934, + "step": 61910 + }, + { + "epoch": 0.12508231757818655, + "grad_norm": 28437.4453125, + "learning_rate": 9.982697191770079e-06, + "loss": 3026.0465, + "step": 61920 + }, + { + "epoch": 0.12510251821087037, + "grad_norm": 1028.0411376953125, + "learning_rate": 9.982668164825989e-06, + "loss": 3642.1988, + "step": 61930 + }, + { + "epoch": 0.1251227188435542, + "grad_norm": 27965.00390625, + "learning_rate": 9.982639113596978e-06, + "loss": 1667.8297, + "step": 61940 + }, + { + "epoch": 0.12514291947623798, + "grad_norm": 43192.15234375, + "learning_rate": 9.982610038083188e-06, + "loss": 2940.9844, + "step": 61950 + }, + { + "epoch": 0.1251631201089218, + "grad_norm": 75727.5703125, + "learning_rate": 9.98258093828476e-06, + "loss": 2528.1342, + "step": 61960 + }, + { + "epoch": 0.12518332074160562, + "grad_norm": 15823.5107421875, + "learning_rate": 9.98255181420184e-06, + "loss": 1762.6414, + "step": 61970 + }, + { + "epoch": 0.12520352137428944, + "grad_norm": 3360.115234375, + "learning_rate": 9.982522665834565e-06, + "loss": 3086.784, + "step": 61980 + }, + { + "epoch": 0.12522372200697326, + "grad_norm": 2892.57275390625, + "learning_rate": 9.982493493183079e-06, + "loss": 2698.251, + "step": 61990 + }, + { + "epoch": 0.12524392263965708, + "grad_norm": 5355.1005859375, + "learning_rate": 9.982464296247523e-06, + "loss": 2559.6438, + "step": 62000 + }, + { + "epoch": 0.12526412327234088, + "grad_norm": 6822.35546875, + "learning_rate": 9.98243507502804e-06, + "loss": 1812.6957, + "step": 62010 + }, + { + "epoch": 0.1252843239050247, + "grad_norm": 2101.65380859375, + "learning_rate": 9.982405829524774e-06, + "loss": 1177.5099, + "step": 62020 + }, + { + "epoch": 0.12530452453770852, + "grad_norm": 64419.7578125, + "learning_rate": 9.982376559737866e-06, + "loss": 2806.4215, + "step": 62030 + }, + { + "epoch": 0.12532472517039234, + "grad_norm": 5028.44677734375, + "learning_rate": 9.982347265667459e-06, + "loss": 5786.2672, + "step": 62040 + }, + { + "epoch": 0.12534492580307616, + "grad_norm": 23970.2421875, + "learning_rate": 9.982317947313695e-06, + "loss": 2165.3168, + "step": 62050 + }, + { + "epoch": 0.12536512643575998, + "grad_norm": 1358.4024658203125, + "learning_rate": 9.982288604676719e-06, + "loss": 5030.7492, + "step": 62060 + }, + { + "epoch": 0.1253853270684438, + "grad_norm": 42074.49609375, + "learning_rate": 9.982259237756674e-06, + "loss": 1517.4367, + "step": 62070 + }, + { + "epoch": 0.1254055277011276, + "grad_norm": 247812.5625, + "learning_rate": 9.982229846553698e-06, + "loss": 5386.4906, + "step": 62080 + }, + { + "epoch": 0.1254257283338114, + "grad_norm": 20906.892578125, + "learning_rate": 9.982200431067939e-06, + "loss": 4158.1617, + "step": 62090 + }, + { + "epoch": 0.12544592896649523, + "grad_norm": 3861.146484375, + "learning_rate": 9.98217099129954e-06, + "loss": 1323.4672, + "step": 62100 + }, + { + "epoch": 0.12546612959917905, + "grad_norm": 5814.69091796875, + "learning_rate": 9.982141527248646e-06, + "loss": 1977.2148, + "step": 62110 + }, + { + "epoch": 0.12548633023186287, + "grad_norm": 8721.91796875, + "learning_rate": 9.982112038915394e-06, + "loss": 6637.7102, + "step": 62120 + }, + { + "epoch": 0.1255065308645467, + "grad_norm": 32432.56640625, + "learning_rate": 9.982082526299935e-06, + "loss": 2837.1928, + "step": 62130 + }, + { + "epoch": 0.12552673149723048, + "grad_norm": 65284.82421875, + "learning_rate": 9.98205298940241e-06, + "loss": 2654.5619, + "step": 62140 + }, + { + "epoch": 0.1255469321299143, + "grad_norm": 1833.07080078125, + "learning_rate": 9.982023428222963e-06, + "loss": 1224.4909, + "step": 62150 + }, + { + "epoch": 0.12556713276259812, + "grad_norm": 38951.7421875, + "learning_rate": 9.981993842761737e-06, + "loss": 3538.6055, + "step": 62160 + }, + { + "epoch": 0.12558733339528194, + "grad_norm": 4687.1494140625, + "learning_rate": 9.981964233018877e-06, + "loss": 2079.7035, + "step": 62170 + }, + { + "epoch": 0.12560753402796576, + "grad_norm": 25593.5, + "learning_rate": 9.981934598994529e-06, + "loss": 4018.5363, + "step": 62180 + }, + { + "epoch": 0.12562773466064958, + "grad_norm": 38221.8359375, + "learning_rate": 9.981904940688836e-06, + "loss": 2304.8551, + "step": 62190 + }, + { + "epoch": 0.12564793529333337, + "grad_norm": 11973.7431640625, + "learning_rate": 9.981875258101944e-06, + "loss": 3211.0031, + "step": 62200 + }, + { + "epoch": 0.1256681359260172, + "grad_norm": 66498.3671875, + "learning_rate": 9.981845551233993e-06, + "loss": 3398.8234, + "step": 62210 + }, + { + "epoch": 0.125688336558701, + "grad_norm": 92880.2421875, + "learning_rate": 9.981815820085132e-06, + "loss": 5137.1246, + "step": 62220 + }, + { + "epoch": 0.12570853719138483, + "grad_norm": 2685.471923828125, + "learning_rate": 9.981786064655505e-06, + "loss": 2587.184, + "step": 62230 + }, + { + "epoch": 0.12572873782406865, + "grad_norm": 33271.33203125, + "learning_rate": 9.981756284945256e-06, + "loss": 2872.927, + "step": 62240 + }, + { + "epoch": 0.12574893845675247, + "grad_norm": 31085.828125, + "learning_rate": 9.981726480954532e-06, + "loss": 3225.3936, + "step": 62250 + }, + { + "epoch": 0.1257691390894363, + "grad_norm": 278.3596496582031, + "learning_rate": 9.981696652683479e-06, + "loss": 3039.7127, + "step": 62260 + }, + { + "epoch": 0.12578933972212009, + "grad_norm": 3066.088623046875, + "learning_rate": 9.98166680013224e-06, + "loss": 2847.4523, + "step": 62270 + }, + { + "epoch": 0.1258095403548039, + "grad_norm": 11280.197265625, + "learning_rate": 9.981636923300959e-06, + "loss": 2112.1246, + "step": 62280 + }, + { + "epoch": 0.12582974098748773, + "grad_norm": 24973.353515625, + "learning_rate": 9.981607022189785e-06, + "loss": 2470.8715, + "step": 62290 + }, + { + "epoch": 0.12584994162017155, + "grad_norm": 2963.62353515625, + "learning_rate": 9.981577096798864e-06, + "loss": 2784.1832, + "step": 62300 + }, + { + "epoch": 0.12587014225285537, + "grad_norm": 7638.2666015625, + "learning_rate": 9.981547147128338e-06, + "loss": 2343.5334, + "step": 62310 + }, + { + "epoch": 0.12589034288553919, + "grad_norm": 13815.076171875, + "learning_rate": 9.981517173178357e-06, + "loss": 4965.3895, + "step": 62320 + }, + { + "epoch": 0.12591054351822298, + "grad_norm": 204339.421875, + "learning_rate": 9.981487174949065e-06, + "loss": 4401.6039, + "step": 62330 + }, + { + "epoch": 0.1259307441509068, + "grad_norm": 23629.3125, + "learning_rate": 9.98145715244061e-06, + "loss": 2950.967, + "step": 62340 + }, + { + "epoch": 0.12595094478359062, + "grad_norm": 67510.359375, + "learning_rate": 9.981427105653135e-06, + "loss": 1081.7263, + "step": 62350 + }, + { + "epoch": 0.12597114541627444, + "grad_norm": 4033.1005859375, + "learning_rate": 9.981397034586789e-06, + "loss": 1952.825, + "step": 62360 + }, + { + "epoch": 0.12599134604895826, + "grad_norm": 17030.771484375, + "learning_rate": 9.981366939241719e-06, + "loss": 1723.3547, + "step": 62370 + }, + { + "epoch": 0.12601154668164208, + "grad_norm": 744.0303955078125, + "learning_rate": 9.98133681961807e-06, + "loss": 1009.0185, + "step": 62380 + }, + { + "epoch": 0.1260317473143259, + "grad_norm": 877.2825317382812, + "learning_rate": 9.981306675715989e-06, + "loss": 4113.1406, + "step": 62390 + }, + { + "epoch": 0.1260519479470097, + "grad_norm": 16017.84375, + "learning_rate": 9.981276507535625e-06, + "loss": 2659.2074, + "step": 62400 + }, + { + "epoch": 0.1260721485796935, + "grad_norm": 3844.106201171875, + "learning_rate": 9.981246315077123e-06, + "loss": 2414.7855, + "step": 62410 + }, + { + "epoch": 0.12609234921237733, + "grad_norm": 2036.12646484375, + "learning_rate": 9.98121609834063e-06, + "loss": 2328.4176, + "step": 62420 + }, + { + "epoch": 0.12611254984506115, + "grad_norm": 4356.30322265625, + "learning_rate": 9.981185857326292e-06, + "loss": 1905.0504, + "step": 62430 + }, + { + "epoch": 0.12613275047774497, + "grad_norm": 95650.9609375, + "learning_rate": 9.98115559203426e-06, + "loss": 4566.9523, + "step": 62440 + }, + { + "epoch": 0.1261529511104288, + "grad_norm": 3926.173095703125, + "learning_rate": 9.981125302464681e-06, + "loss": 6850.8, + "step": 62450 + }, + { + "epoch": 0.12617315174311258, + "grad_norm": 11986.4912109375, + "learning_rate": 9.9810949886177e-06, + "loss": 2186.8301, + "step": 62460 + }, + { + "epoch": 0.1261933523757964, + "grad_norm": 52400.22265625, + "learning_rate": 9.981064650493466e-06, + "loss": 3957.0016, + "step": 62470 + }, + { + "epoch": 0.12621355300848022, + "grad_norm": 2332.295654296875, + "learning_rate": 9.981034288092129e-06, + "loss": 1442.7105, + "step": 62480 + }, + { + "epoch": 0.12623375364116404, + "grad_norm": 3816.990966796875, + "learning_rate": 9.981003901413833e-06, + "loss": 1551.7761, + "step": 62490 + }, + { + "epoch": 0.12625395427384786, + "grad_norm": 119256.234375, + "learning_rate": 9.980973490458728e-06, + "loss": 5124.593, + "step": 62500 + }, + { + "epoch": 0.12627415490653168, + "grad_norm": 23680.5390625, + "learning_rate": 9.980943055226964e-06, + "loss": 6331.6648, + "step": 62510 + }, + { + "epoch": 0.12629435553921547, + "grad_norm": 4324.0361328125, + "learning_rate": 9.980912595718686e-06, + "loss": 1948.1896, + "step": 62520 + }, + { + "epoch": 0.1263145561718993, + "grad_norm": 9733.3466796875, + "learning_rate": 9.980882111934046e-06, + "loss": 2218.0752, + "step": 62530 + }, + { + "epoch": 0.12633475680458311, + "grad_norm": 4115.40087890625, + "learning_rate": 9.980851603873189e-06, + "loss": 1774.3684, + "step": 62540 + }, + { + "epoch": 0.12635495743726693, + "grad_norm": 10020.798828125, + "learning_rate": 9.980821071536266e-06, + "loss": 1090.5064, + "step": 62550 + }, + { + "epoch": 0.12637515806995075, + "grad_norm": 1849.2899169921875, + "learning_rate": 9.980790514923425e-06, + "loss": 1482.3883, + "step": 62560 + }, + { + "epoch": 0.12639535870263457, + "grad_norm": 2065.262939453125, + "learning_rate": 9.980759934034816e-06, + "loss": 1920.4672, + "step": 62570 + }, + { + "epoch": 0.1264155593353184, + "grad_norm": 17703.974609375, + "learning_rate": 9.980729328870586e-06, + "loss": 3891.1926, + "step": 62580 + }, + { + "epoch": 0.1264357599680022, + "grad_norm": 105666.546875, + "learning_rate": 9.980698699430884e-06, + "loss": 4418.9902, + "step": 62590 + }, + { + "epoch": 0.126455960600686, + "grad_norm": 3034.750732421875, + "learning_rate": 9.980668045715864e-06, + "loss": 1318.2733, + "step": 62600 + }, + { + "epoch": 0.12647616123336983, + "grad_norm": 2040.3475341796875, + "learning_rate": 9.98063736772567e-06, + "loss": 2145.1705, + "step": 62610 + }, + { + "epoch": 0.12649636186605365, + "grad_norm": 59094.46875, + "learning_rate": 9.980606665460453e-06, + "loss": 4065.8941, + "step": 62620 + }, + { + "epoch": 0.12651656249873747, + "grad_norm": 954.151611328125, + "learning_rate": 9.980575938920364e-06, + "loss": 1787.9746, + "step": 62630 + }, + { + "epoch": 0.1265367631314213, + "grad_norm": 5503.26220703125, + "learning_rate": 9.980545188105553e-06, + "loss": 5175.7551, + "step": 62640 + }, + { + "epoch": 0.12655696376410508, + "grad_norm": 11088.873046875, + "learning_rate": 9.980514413016167e-06, + "loss": 1276.9973, + "step": 62650 + }, + { + "epoch": 0.1265771643967889, + "grad_norm": 50069.375, + "learning_rate": 9.980483613652359e-06, + "loss": 1457.1762, + "step": 62660 + }, + { + "epoch": 0.12659736502947272, + "grad_norm": 23233.353515625, + "learning_rate": 9.980452790014278e-06, + "loss": 2169.0893, + "step": 62670 + }, + { + "epoch": 0.12661756566215654, + "grad_norm": 24591.349609375, + "learning_rate": 9.980421942102075e-06, + "loss": 2714.7863, + "step": 62680 + }, + { + "epoch": 0.12663776629484036, + "grad_norm": 75074.5625, + "learning_rate": 9.980391069915897e-06, + "loss": 1827.2434, + "step": 62690 + }, + { + "epoch": 0.12665796692752418, + "grad_norm": 2688.216796875, + "learning_rate": 9.980360173455899e-06, + "loss": 2390.4236, + "step": 62700 + }, + { + "epoch": 0.126678167560208, + "grad_norm": 1868.887451171875, + "learning_rate": 9.980329252722227e-06, + "loss": 869.9669, + "step": 62710 + }, + { + "epoch": 0.1266983681928918, + "grad_norm": 6861.8486328125, + "learning_rate": 9.980298307715038e-06, + "loss": 2339.8367, + "step": 62720 + }, + { + "epoch": 0.1267185688255756, + "grad_norm": 30674.958984375, + "learning_rate": 9.980267338434477e-06, + "loss": 3905.4125, + "step": 62730 + }, + { + "epoch": 0.12673876945825943, + "grad_norm": 853.1932373046875, + "learning_rate": 9.980236344880696e-06, + "loss": 1871.0574, + "step": 62740 + }, + { + "epoch": 0.12675897009094325, + "grad_norm": 114980.296875, + "learning_rate": 9.98020532705385e-06, + "loss": 4524.268, + "step": 62750 + }, + { + "epoch": 0.12677917072362707, + "grad_norm": 19668.44140625, + "learning_rate": 9.980174284954084e-06, + "loss": 5807.9812, + "step": 62760 + }, + { + "epoch": 0.1267993713563109, + "grad_norm": 27790.59765625, + "learning_rate": 9.980143218581555e-06, + "loss": 3492.5914, + "step": 62770 + }, + { + "epoch": 0.12681957198899468, + "grad_norm": 100012.6328125, + "learning_rate": 9.98011212793641e-06, + "loss": 4355.7133, + "step": 62780 + }, + { + "epoch": 0.1268397726216785, + "grad_norm": 4076.001708984375, + "learning_rate": 9.980081013018804e-06, + "loss": 2542.2318, + "step": 62790 + }, + { + "epoch": 0.12685997325436232, + "grad_norm": 695.8663330078125, + "learning_rate": 9.980049873828887e-06, + "loss": 881.7999, + "step": 62800 + }, + { + "epoch": 0.12688017388704614, + "grad_norm": 36559.95703125, + "learning_rate": 9.98001871036681e-06, + "loss": 4250.768, + "step": 62810 + }, + { + "epoch": 0.12690037451972996, + "grad_norm": 145186.609375, + "learning_rate": 9.979987522632727e-06, + "loss": 4401.65, + "step": 62820 + }, + { + "epoch": 0.12692057515241378, + "grad_norm": 182249.671875, + "learning_rate": 9.979956310626788e-06, + "loss": 5106.2773, + "step": 62830 + }, + { + "epoch": 0.12694077578509758, + "grad_norm": 4580.15625, + "learning_rate": 9.979925074349146e-06, + "loss": 2141.9121, + "step": 62840 + }, + { + "epoch": 0.1269609764177814, + "grad_norm": 35162.64453125, + "learning_rate": 9.979893813799953e-06, + "loss": 889.8377, + "step": 62850 + }, + { + "epoch": 0.12698117705046522, + "grad_norm": 865.6868896484375, + "learning_rate": 9.979862528979362e-06, + "loss": 6152.8711, + "step": 62860 + }, + { + "epoch": 0.12700137768314904, + "grad_norm": 10300.4990234375, + "learning_rate": 9.979831219887526e-06, + "loss": 1036.2176, + "step": 62870 + }, + { + "epoch": 0.12702157831583286, + "grad_norm": 12004.69140625, + "learning_rate": 9.979799886524594e-06, + "loss": 2064.9313, + "step": 62880 + }, + { + "epoch": 0.12704177894851668, + "grad_norm": 38325.71484375, + "learning_rate": 9.979768528890725e-06, + "loss": 2667.541, + "step": 62890 + }, + { + "epoch": 0.1270619795812005, + "grad_norm": 68372.6328125, + "learning_rate": 9.979737146986064e-06, + "loss": 4438.7133, + "step": 62900 + }, + { + "epoch": 0.1270821802138843, + "grad_norm": 1461.884521484375, + "learning_rate": 9.979705740810771e-06, + "loss": 3341.7246, + "step": 62910 + }, + { + "epoch": 0.1271023808465681, + "grad_norm": 26410.826171875, + "learning_rate": 9.979674310364996e-06, + "loss": 2642.9949, + "step": 62920 + }, + { + "epoch": 0.12712258147925193, + "grad_norm": 30113.119140625, + "learning_rate": 9.979642855648892e-06, + "loss": 2812.4133, + "step": 62930 + }, + { + "epoch": 0.12714278211193575, + "grad_norm": 12032.669921875, + "learning_rate": 9.979611376662613e-06, + "loss": 1534.284, + "step": 62940 + }, + { + "epoch": 0.12716298274461957, + "grad_norm": 8419.0849609375, + "learning_rate": 9.97957987340631e-06, + "loss": 2283.1408, + "step": 62950 + }, + { + "epoch": 0.1271831833773034, + "grad_norm": 54104.53125, + "learning_rate": 9.979548345880142e-06, + "loss": 3073.3035, + "step": 62960 + }, + { + "epoch": 0.12720338400998718, + "grad_norm": 122663.4140625, + "learning_rate": 9.979516794084256e-06, + "loss": 3363.9773, + "step": 62970 + }, + { + "epoch": 0.127223584642671, + "grad_norm": 21340.0546875, + "learning_rate": 9.97948521801881e-06, + "loss": 4709.9309, + "step": 62980 + }, + { + "epoch": 0.12724378527535482, + "grad_norm": 11609.9599609375, + "learning_rate": 9.979453617683958e-06, + "loss": 1559.7121, + "step": 62990 + }, + { + "epoch": 0.12726398590803864, + "grad_norm": 63880.6640625, + "learning_rate": 9.979421993079853e-06, + "loss": 2164.5975, + "step": 63000 + }, + { + "epoch": 0.12728418654072246, + "grad_norm": 38814.07421875, + "learning_rate": 9.979390344206648e-06, + "loss": 5304.109, + "step": 63010 + }, + { + "epoch": 0.12730438717340628, + "grad_norm": 6298.359375, + "learning_rate": 9.9793586710645e-06, + "loss": 2942.8705, + "step": 63020 + }, + { + "epoch": 0.1273245878060901, + "grad_norm": 129985.4921875, + "learning_rate": 9.97932697365356e-06, + "loss": 4531.5773, + "step": 63030 + }, + { + "epoch": 0.1273447884387739, + "grad_norm": 82100.921875, + "learning_rate": 9.979295251973986e-06, + "loss": 1831.5662, + "step": 63040 + }, + { + "epoch": 0.1273649890714577, + "grad_norm": 31669.087890625, + "learning_rate": 9.97926350602593e-06, + "loss": 5128.0547, + "step": 63050 + }, + { + "epoch": 0.12738518970414153, + "grad_norm": 19026.61328125, + "learning_rate": 9.979231735809546e-06, + "loss": 1181.9695, + "step": 63060 + }, + { + "epoch": 0.12740539033682535, + "grad_norm": 33658.1328125, + "learning_rate": 9.979199941324994e-06, + "loss": 1897.748, + "step": 63070 + }, + { + "epoch": 0.12742559096950917, + "grad_norm": 7901.150390625, + "learning_rate": 9.979168122572422e-06, + "loss": 2207.1893, + "step": 63080 + }, + { + "epoch": 0.127445791602193, + "grad_norm": 8037.83984375, + "learning_rate": 9.97913627955199e-06, + "loss": 5247.9578, + "step": 63090 + }, + { + "epoch": 0.12746599223487678, + "grad_norm": 20712.984375, + "learning_rate": 9.979104412263851e-06, + "loss": 1421.5471, + "step": 63100 + }, + { + "epoch": 0.1274861928675606, + "grad_norm": 126520.328125, + "learning_rate": 9.979072520708162e-06, + "loss": 8626.5086, + "step": 63110 + }, + { + "epoch": 0.12750639350024442, + "grad_norm": 2201.874267578125, + "learning_rate": 9.979040604885077e-06, + "loss": 2028.8447, + "step": 63120 + }, + { + "epoch": 0.12752659413292824, + "grad_norm": 15572.6494140625, + "learning_rate": 9.979008664794751e-06, + "loss": 4111.325, + "step": 63130 + }, + { + "epoch": 0.12754679476561206, + "grad_norm": 26447.1015625, + "learning_rate": 9.978976700437341e-06, + "loss": 3007.0824, + "step": 63140 + }, + { + "epoch": 0.12756699539829588, + "grad_norm": 60909.52734375, + "learning_rate": 9.978944711813003e-06, + "loss": 1667.1457, + "step": 63150 + }, + { + "epoch": 0.12758719603097968, + "grad_norm": 40889.24609375, + "learning_rate": 9.978912698921892e-06, + "loss": 2441.2701, + "step": 63160 + }, + { + "epoch": 0.1276073966636635, + "grad_norm": 28721.546875, + "learning_rate": 9.978880661764166e-06, + "loss": 2652.4434, + "step": 63170 + }, + { + "epoch": 0.12762759729634732, + "grad_norm": 23716.6640625, + "learning_rate": 9.978848600339978e-06, + "loss": 1259.7803, + "step": 63180 + }, + { + "epoch": 0.12764779792903114, + "grad_norm": 1476.09521484375, + "learning_rate": 9.978816514649486e-06, + "loss": 5344.0773, + "step": 63190 + }, + { + "epoch": 0.12766799856171496, + "grad_norm": 25555.109375, + "learning_rate": 9.978784404692847e-06, + "loss": 1398.203, + "step": 63200 + }, + { + "epoch": 0.12768819919439878, + "grad_norm": 39693.4375, + "learning_rate": 9.978752270470216e-06, + "loss": 3498.093, + "step": 63210 + }, + { + "epoch": 0.1277083998270826, + "grad_norm": 1638.7998046875, + "learning_rate": 9.97872011198175e-06, + "loss": 1039.6909, + "step": 63220 + }, + { + "epoch": 0.1277286004597664, + "grad_norm": 2893.328125, + "learning_rate": 9.978687929227606e-06, + "loss": 1157.7198, + "step": 63230 + }, + { + "epoch": 0.1277488010924502, + "grad_norm": 3604.14306640625, + "learning_rate": 9.97865572220794e-06, + "loss": 4506.9148, + "step": 63240 + }, + { + "epoch": 0.12776900172513403, + "grad_norm": 16422.54296875, + "learning_rate": 9.978623490922913e-06, + "loss": 2929.7033, + "step": 63250 + }, + { + "epoch": 0.12778920235781785, + "grad_norm": 14273.3212890625, + "learning_rate": 9.978591235372675e-06, + "loss": 2178.7293, + "step": 63260 + }, + { + "epoch": 0.12780940299050167, + "grad_norm": 4348.13623046875, + "learning_rate": 9.97855895555739e-06, + "loss": 4632.8195, + "step": 63270 + }, + { + "epoch": 0.1278296036231855, + "grad_norm": 76460.7265625, + "learning_rate": 9.978526651477211e-06, + "loss": 3147.9854, + "step": 63280 + }, + { + "epoch": 0.12784980425586928, + "grad_norm": 19734.716796875, + "learning_rate": 9.978494323132296e-06, + "loss": 1867.6691, + "step": 63290 + }, + { + "epoch": 0.1278700048885531, + "grad_norm": 17386.66796875, + "learning_rate": 9.978461970522807e-06, + "loss": 5477.0129, + "step": 63300 + }, + { + "epoch": 0.12789020552123692, + "grad_norm": 6190.21533203125, + "learning_rate": 9.978429593648894e-06, + "loss": 4407.482, + "step": 63310 + }, + { + "epoch": 0.12791040615392074, + "grad_norm": 113804.7890625, + "learning_rate": 9.978397192510722e-06, + "loss": 5275.0207, + "step": 63320 + }, + { + "epoch": 0.12793060678660456, + "grad_norm": 28182.15625, + "learning_rate": 9.978364767108444e-06, + "loss": 1979.566, + "step": 63330 + }, + { + "epoch": 0.12795080741928838, + "grad_norm": 11143.021484375, + "learning_rate": 9.97833231744222e-06, + "loss": 4590.1996, + "step": 63340 + }, + { + "epoch": 0.1279710080519722, + "grad_norm": 25824.80859375, + "learning_rate": 9.97829984351221e-06, + "loss": 2468.9561, + "step": 63350 + }, + { + "epoch": 0.127991208684656, + "grad_norm": 24831.3046875, + "learning_rate": 9.978267345318569e-06, + "loss": 1133.8786, + "step": 63360 + }, + { + "epoch": 0.1280114093173398, + "grad_norm": 1344.269287109375, + "learning_rate": 9.978234822861456e-06, + "loss": 533.8485, + "step": 63370 + }, + { + "epoch": 0.12803160995002363, + "grad_norm": 68939.6484375, + "learning_rate": 9.978202276141032e-06, + "loss": 4984.7273, + "step": 63380 + }, + { + "epoch": 0.12805181058270745, + "grad_norm": 7077.2041015625, + "learning_rate": 9.978169705157455e-06, + "loss": 4231.709, + "step": 63390 + }, + { + "epoch": 0.12807201121539127, + "grad_norm": 437.13458251953125, + "learning_rate": 9.97813710991088e-06, + "loss": 4050.3602, + "step": 63400 + }, + { + "epoch": 0.1280922118480751, + "grad_norm": 30245.0859375, + "learning_rate": 9.978104490401468e-06, + "loss": 4351.4961, + "step": 63410 + }, + { + "epoch": 0.12811241248075889, + "grad_norm": 55845.5625, + "learning_rate": 9.978071846629381e-06, + "loss": 1514.4932, + "step": 63420 + }, + { + "epoch": 0.1281326131134427, + "grad_norm": 64212.7109375, + "learning_rate": 9.978039178594774e-06, + "loss": 3542.0805, + "step": 63430 + }, + { + "epoch": 0.12815281374612653, + "grad_norm": 24060.005859375, + "learning_rate": 9.978006486297808e-06, + "loss": 3149.535, + "step": 63440 + }, + { + "epoch": 0.12817301437881035, + "grad_norm": 10488.1181640625, + "learning_rate": 9.977973769738642e-06, + "loss": 1226.0568, + "step": 63450 + }, + { + "epoch": 0.12819321501149417, + "grad_norm": 55024.1640625, + "learning_rate": 9.977941028917436e-06, + "loss": 2956.9629, + "step": 63460 + }, + { + "epoch": 0.12821341564417799, + "grad_norm": 6870.41796875, + "learning_rate": 9.977908263834348e-06, + "loss": 4536.4848, + "step": 63470 + }, + { + "epoch": 0.12823361627686178, + "grad_norm": 8983.7001953125, + "learning_rate": 9.97787547448954e-06, + "loss": 2383.0645, + "step": 63480 + }, + { + "epoch": 0.1282538169095456, + "grad_norm": 1458.4915771484375, + "learning_rate": 9.977842660883172e-06, + "loss": 972.6456, + "step": 63490 + }, + { + "epoch": 0.12827401754222942, + "grad_norm": 9160.509765625, + "learning_rate": 9.9778098230154e-06, + "loss": 4005.9387, + "step": 63500 + }, + { + "epoch": 0.12829421817491324, + "grad_norm": 41480.29296875, + "learning_rate": 9.97777696088639e-06, + "loss": 2684.8033, + "step": 63510 + }, + { + "epoch": 0.12831441880759706, + "grad_norm": 184235.453125, + "learning_rate": 9.977744074496297e-06, + "loss": 3153.0746, + "step": 63520 + }, + { + "epoch": 0.12833461944028088, + "grad_norm": 2904.7431640625, + "learning_rate": 9.97771116384528e-06, + "loss": 1981.6627, + "step": 63530 + }, + { + "epoch": 0.1283548200729647, + "grad_norm": 18930.93359375, + "learning_rate": 9.977678228933508e-06, + "loss": 4065.7406, + "step": 63540 + }, + { + "epoch": 0.1283750207056485, + "grad_norm": 7118.13623046875, + "learning_rate": 9.977645269761131e-06, + "loss": 4582.3285, + "step": 63550 + }, + { + "epoch": 0.1283952213383323, + "grad_norm": 5565.62255859375, + "learning_rate": 9.977612286328317e-06, + "loss": 1711.6658, + "step": 63560 + }, + { + "epoch": 0.12841542197101613, + "grad_norm": 5067.41943359375, + "learning_rate": 9.977579278635225e-06, + "loss": 1838.884, + "step": 63570 + }, + { + "epoch": 0.12843562260369995, + "grad_norm": 491.17431640625, + "learning_rate": 9.977546246682015e-06, + "loss": 3727.4703, + "step": 63580 + }, + { + "epoch": 0.12845582323638377, + "grad_norm": 13071.5537109375, + "learning_rate": 9.977513190468848e-06, + "loss": 5566.2516, + "step": 63590 + }, + { + "epoch": 0.1284760238690676, + "grad_norm": 8728.4384765625, + "learning_rate": 9.977480109995886e-06, + "loss": 919.6779, + "step": 63600 + }, + { + "epoch": 0.12849622450175138, + "grad_norm": 53.26127624511719, + "learning_rate": 9.977447005263289e-06, + "loss": 3083.0018, + "step": 63610 + }, + { + "epoch": 0.1285164251344352, + "grad_norm": 274280.125, + "learning_rate": 9.97741387627122e-06, + "loss": 5614.9441, + "step": 63620 + }, + { + "epoch": 0.12853662576711902, + "grad_norm": 6025.552734375, + "learning_rate": 9.977380723019838e-06, + "loss": 1908.124, + "step": 63630 + }, + { + "epoch": 0.12855682639980284, + "grad_norm": 5061.34228515625, + "learning_rate": 9.977347545509307e-06, + "loss": 2650.6166, + "step": 63640 + }, + { + "epoch": 0.12857702703248666, + "grad_norm": 23026.091796875, + "learning_rate": 9.977314343739785e-06, + "loss": 4119.5926, + "step": 63650 + }, + { + "epoch": 0.12859722766517048, + "grad_norm": 18555.87109375, + "learning_rate": 9.97728111771144e-06, + "loss": 1702.5568, + "step": 63660 + }, + { + "epoch": 0.1286174282978543, + "grad_norm": 142102.78125, + "learning_rate": 9.97724786742443e-06, + "loss": 4861.541, + "step": 63670 + }, + { + "epoch": 0.1286376289305381, + "grad_norm": 30826.853515625, + "learning_rate": 9.977214592878917e-06, + "loss": 1412.852, + "step": 63680 + }, + { + "epoch": 0.12865782956322191, + "grad_norm": 10596.541015625, + "learning_rate": 9.977181294075063e-06, + "loss": 3344.2016, + "step": 63690 + }, + { + "epoch": 0.12867803019590573, + "grad_norm": 6517.9013671875, + "learning_rate": 9.977147971013033e-06, + "loss": 5063.3199, + "step": 63700 + }, + { + "epoch": 0.12869823082858955, + "grad_norm": 3083.8056640625, + "learning_rate": 9.977114623692985e-06, + "loss": 1331.8885, + "step": 63710 + }, + { + "epoch": 0.12871843146127337, + "grad_norm": 77212.6484375, + "learning_rate": 9.977081252115085e-06, + "loss": 3888.9863, + "step": 63720 + }, + { + "epoch": 0.1287386320939572, + "grad_norm": 843.1259765625, + "learning_rate": 9.977047856279496e-06, + "loss": 4264.3121, + "step": 63730 + }, + { + "epoch": 0.128758832726641, + "grad_norm": 7768.43212890625, + "learning_rate": 9.977014436186377e-06, + "loss": 2375.1289, + "step": 63740 + }, + { + "epoch": 0.1287790333593248, + "grad_norm": 6297.2412109375, + "learning_rate": 9.976980991835896e-06, + "loss": 4666.7824, + "step": 63750 + }, + { + "epoch": 0.12879923399200863, + "grad_norm": 5593.09326171875, + "learning_rate": 9.97694752322821e-06, + "loss": 1013.5552, + "step": 63760 + }, + { + "epoch": 0.12881943462469245, + "grad_norm": 1252.6927490234375, + "learning_rate": 9.976914030363488e-06, + "loss": 1890.8791, + "step": 63770 + }, + { + "epoch": 0.12883963525737627, + "grad_norm": 11118.8046875, + "learning_rate": 9.976880513241889e-06, + "loss": 2453.5514, + "step": 63780 + }, + { + "epoch": 0.1288598358900601, + "grad_norm": 828.9957885742188, + "learning_rate": 9.976846971863579e-06, + "loss": 2248.4904, + "step": 63790 + }, + { + "epoch": 0.12888003652274388, + "grad_norm": 14985.3671875, + "learning_rate": 9.97681340622872e-06, + "loss": 4467.7652, + "step": 63800 + }, + { + "epoch": 0.1289002371554277, + "grad_norm": 1855.6517333984375, + "learning_rate": 9.976779816337476e-06, + "loss": 1038.6627, + "step": 63810 + }, + { + "epoch": 0.12892043778811152, + "grad_norm": 2713.68798828125, + "learning_rate": 9.976746202190012e-06, + "loss": 1436.8436, + "step": 63820 + }, + { + "epoch": 0.12894063842079534, + "grad_norm": 1505.5250244140625, + "learning_rate": 9.97671256378649e-06, + "loss": 1040.495, + "step": 63830 + }, + { + "epoch": 0.12896083905347916, + "grad_norm": 4587.3349609375, + "learning_rate": 9.976678901127074e-06, + "loss": 5989.168, + "step": 63840 + }, + { + "epoch": 0.12898103968616298, + "grad_norm": 26205.15625, + "learning_rate": 9.976645214211929e-06, + "loss": 2372.4021, + "step": 63850 + }, + { + "epoch": 0.1290012403188468, + "grad_norm": 12474.99609375, + "learning_rate": 9.976611503041218e-06, + "loss": 4669.2949, + "step": 63860 + }, + { + "epoch": 0.1290214409515306, + "grad_norm": 27403.896484375, + "learning_rate": 9.976577767615108e-06, + "loss": 4259.4305, + "step": 63870 + }, + { + "epoch": 0.1290416415842144, + "grad_norm": 6940.59326171875, + "learning_rate": 9.97654400793376e-06, + "loss": 1622.7104, + "step": 63880 + }, + { + "epoch": 0.12906184221689823, + "grad_norm": 28823.861328125, + "learning_rate": 9.97651022399734e-06, + "loss": 2762.7703, + "step": 63890 + }, + { + "epoch": 0.12908204284958205, + "grad_norm": 19745.578125, + "learning_rate": 9.976476415806013e-06, + "loss": 2398.057, + "step": 63900 + }, + { + "epoch": 0.12910224348226587, + "grad_norm": 6161.71728515625, + "learning_rate": 9.976442583359944e-06, + "loss": 1777.7076, + "step": 63910 + }, + { + "epoch": 0.1291224441149497, + "grad_norm": 27024.52734375, + "learning_rate": 9.976408726659296e-06, + "loss": 923.275, + "step": 63920 + }, + { + "epoch": 0.12914264474763348, + "grad_norm": 2027.0347900390625, + "learning_rate": 9.976374845704238e-06, + "loss": 1930.483, + "step": 63930 + }, + { + "epoch": 0.1291628453803173, + "grad_norm": 1216.120849609375, + "learning_rate": 9.976340940494931e-06, + "loss": 1662.1131, + "step": 63940 + }, + { + "epoch": 0.12918304601300112, + "grad_norm": 40171.7421875, + "learning_rate": 9.976307011031542e-06, + "loss": 2612.2768, + "step": 63950 + }, + { + "epoch": 0.12920324664568494, + "grad_norm": 530.2669067382812, + "learning_rate": 9.976273057314236e-06, + "loss": 912.8248, + "step": 63960 + }, + { + "epoch": 0.12922344727836876, + "grad_norm": 13466.5546875, + "learning_rate": 9.97623907934318e-06, + "loss": 3343.7773, + "step": 63970 + }, + { + "epoch": 0.12924364791105258, + "grad_norm": 62984.33984375, + "learning_rate": 9.976205077118536e-06, + "loss": 3698.9434, + "step": 63980 + }, + { + "epoch": 0.12926384854373638, + "grad_norm": 9567.955078125, + "learning_rate": 9.976171050640473e-06, + "loss": 2414.8361, + "step": 63990 + }, + { + "epoch": 0.1292840491764202, + "grad_norm": 71562.3125, + "learning_rate": 9.976136999909156e-06, + "loss": 4267.4266, + "step": 64000 + }, + { + "epoch": 0.12930424980910402, + "grad_norm": 4217.81689453125, + "learning_rate": 9.976102924924752e-06, + "loss": 1336.0041, + "step": 64010 + }, + { + "epoch": 0.12932445044178784, + "grad_norm": 7480.00244140625, + "learning_rate": 9.976068825687424e-06, + "loss": 4976.8793, + "step": 64020 + }, + { + "epoch": 0.12934465107447166, + "grad_norm": 294762.40625, + "learning_rate": 9.97603470219734e-06, + "loss": 2065.9422, + "step": 64030 + }, + { + "epoch": 0.12936485170715548, + "grad_norm": 66362.453125, + "learning_rate": 9.976000554454668e-06, + "loss": 3573.3418, + "step": 64040 + }, + { + "epoch": 0.1293850523398393, + "grad_norm": 4505.86572265625, + "learning_rate": 9.975966382459571e-06, + "loss": 2426.7295, + "step": 64050 + }, + { + "epoch": 0.1294052529725231, + "grad_norm": 7666.0673828125, + "learning_rate": 9.975932186212217e-06, + "loss": 3052.1811, + "step": 64060 + }, + { + "epoch": 0.1294254536052069, + "grad_norm": 26307.21875, + "learning_rate": 9.975897965712777e-06, + "loss": 3694.5926, + "step": 64070 + }, + { + "epoch": 0.12944565423789073, + "grad_norm": 12197.3037109375, + "learning_rate": 9.975863720961411e-06, + "loss": 2547.6697, + "step": 64080 + }, + { + "epoch": 0.12946585487057455, + "grad_norm": 4642.69091796875, + "learning_rate": 9.975829451958288e-06, + "loss": 2256.5926, + "step": 64090 + }, + { + "epoch": 0.12948605550325837, + "grad_norm": 126881.0703125, + "learning_rate": 9.975795158703576e-06, + "loss": 2043.974, + "step": 64100 + }, + { + "epoch": 0.1295062561359422, + "grad_norm": 20698.861328125, + "learning_rate": 9.975760841197443e-06, + "loss": 1165.8029, + "step": 64110 + }, + { + "epoch": 0.12952645676862598, + "grad_norm": 180233.34375, + "learning_rate": 9.975726499440055e-06, + "loss": 6011.6527, + "step": 64120 + }, + { + "epoch": 0.1295466574013098, + "grad_norm": 5611.91943359375, + "learning_rate": 9.975692133431579e-06, + "loss": 2761.5938, + "step": 64130 + }, + { + "epoch": 0.12956685803399362, + "grad_norm": 79411.9375, + "learning_rate": 9.975657743172182e-06, + "loss": 3836.2961, + "step": 64140 + }, + { + "epoch": 0.12958705866667744, + "grad_norm": 53833.68359375, + "learning_rate": 9.975623328662036e-06, + "loss": 6764.1906, + "step": 64150 + }, + { + "epoch": 0.12960725929936126, + "grad_norm": 324.3952941894531, + "learning_rate": 9.975588889901302e-06, + "loss": 4759.5602, + "step": 64160 + }, + { + "epoch": 0.12962745993204508, + "grad_norm": 8504.4765625, + "learning_rate": 9.975554426890152e-06, + "loss": 3129.3918, + "step": 64170 + }, + { + "epoch": 0.1296476605647289, + "grad_norm": 43079.1640625, + "learning_rate": 9.975519939628754e-06, + "loss": 1806.1391, + "step": 64180 + }, + { + "epoch": 0.1296678611974127, + "grad_norm": 207997.09375, + "learning_rate": 9.975485428117276e-06, + "loss": 3375.9086, + "step": 64190 + }, + { + "epoch": 0.1296880618300965, + "grad_norm": 114654.890625, + "learning_rate": 9.975450892355882e-06, + "loss": 2389.8355, + "step": 64200 + }, + { + "epoch": 0.12970826246278033, + "grad_norm": 21207.751953125, + "learning_rate": 9.975416332344747e-06, + "loss": 1050.6431, + "step": 64210 + }, + { + "epoch": 0.12972846309546415, + "grad_norm": 6463.09619140625, + "learning_rate": 9.975381748084035e-06, + "loss": 2789.2914, + "step": 64220 + }, + { + "epoch": 0.12974866372814797, + "grad_norm": 26537.73828125, + "learning_rate": 9.975347139573917e-06, + "loss": 1642.8031, + "step": 64230 + }, + { + "epoch": 0.1297688643608318, + "grad_norm": 17153.39453125, + "learning_rate": 9.97531250681456e-06, + "loss": 3942.4199, + "step": 64240 + }, + { + "epoch": 0.12978906499351558, + "grad_norm": 3087.972412109375, + "learning_rate": 9.975277849806133e-06, + "loss": 2137.732, + "step": 64250 + }, + { + "epoch": 0.1298092656261994, + "grad_norm": 15229.23046875, + "learning_rate": 9.975243168548804e-06, + "loss": 2437.4187, + "step": 64260 + }, + { + "epoch": 0.12982946625888322, + "grad_norm": 5923.77587890625, + "learning_rate": 9.975208463042745e-06, + "loss": 2064.6451, + "step": 64270 + }, + { + "epoch": 0.12984966689156704, + "grad_norm": 939.7130126953125, + "learning_rate": 9.975173733288122e-06, + "loss": 1276.5072, + "step": 64280 + }, + { + "epoch": 0.12986986752425086, + "grad_norm": 4875.93505859375, + "learning_rate": 9.975138979285107e-06, + "loss": 1200.392, + "step": 64290 + }, + { + "epoch": 0.12989006815693468, + "grad_norm": 991.8863525390625, + "learning_rate": 9.975104201033868e-06, + "loss": 2002.6971, + "step": 64300 + }, + { + "epoch": 0.12991026878961848, + "grad_norm": 11711.6181640625, + "learning_rate": 9.975069398534574e-06, + "loss": 1886.3381, + "step": 64310 + }, + { + "epoch": 0.1299304694223023, + "grad_norm": 22065.51953125, + "learning_rate": 9.975034571787394e-06, + "loss": 4643.9492, + "step": 64320 + }, + { + "epoch": 0.12995067005498612, + "grad_norm": 51932.06640625, + "learning_rate": 9.9749997207925e-06, + "loss": 5444.1426, + "step": 64330 + }, + { + "epoch": 0.12997087068766994, + "grad_norm": 161526.21875, + "learning_rate": 9.974964845550062e-06, + "loss": 3289.3742, + "step": 64340 + }, + { + "epoch": 0.12999107132035376, + "grad_norm": 4055.110107421875, + "learning_rate": 9.974929946060246e-06, + "loss": 2481.9947, + "step": 64350 + }, + { + "epoch": 0.13001127195303758, + "grad_norm": 8362.03515625, + "learning_rate": 9.974895022323226e-06, + "loss": 2026.509, + "step": 64360 + }, + { + "epoch": 0.1300314725857214, + "grad_norm": 672.4752197265625, + "learning_rate": 9.974860074339173e-06, + "loss": 1910.0838, + "step": 64370 + }, + { + "epoch": 0.1300516732184052, + "grad_norm": 52359.484375, + "learning_rate": 9.974825102108251e-06, + "loss": 2177.8373, + "step": 64380 + }, + { + "epoch": 0.130071873851089, + "grad_norm": 47475.76171875, + "learning_rate": 9.974790105630639e-06, + "loss": 2515.4187, + "step": 64390 + }, + { + "epoch": 0.13009207448377283, + "grad_norm": 8088.29052734375, + "learning_rate": 9.974755084906503e-06, + "loss": 2845.2111, + "step": 64400 + }, + { + "epoch": 0.13011227511645665, + "grad_norm": 14988.259765625, + "learning_rate": 9.974720039936012e-06, + "loss": 2348.6148, + "step": 64410 + }, + { + "epoch": 0.13013247574914047, + "grad_norm": 6075.46826171875, + "learning_rate": 9.97468497071934e-06, + "loss": 1520.6032, + "step": 64420 + }, + { + "epoch": 0.1301526763818243, + "grad_norm": 13632.0966796875, + "learning_rate": 9.974649877256657e-06, + "loss": 2529.2344, + "step": 64430 + }, + { + "epoch": 0.13017287701450808, + "grad_norm": 26855.490234375, + "learning_rate": 9.974614759548133e-06, + "loss": 2528.2201, + "step": 64440 + }, + { + "epoch": 0.1301930776471919, + "grad_norm": 1808.3297119140625, + "learning_rate": 9.97457961759394e-06, + "loss": 1216.5012, + "step": 64450 + }, + { + "epoch": 0.13021327827987572, + "grad_norm": 5676.43310546875, + "learning_rate": 9.97454445139425e-06, + "loss": 1219.1083, + "step": 64460 + }, + { + "epoch": 0.13023347891255954, + "grad_norm": 1006.4689331054688, + "learning_rate": 9.974509260949233e-06, + "loss": 1690.5287, + "step": 64470 + }, + { + "epoch": 0.13025367954524336, + "grad_norm": 45321.61328125, + "learning_rate": 9.97447404625906e-06, + "loss": 3233.2988, + "step": 64480 + }, + { + "epoch": 0.13027388017792718, + "grad_norm": 84006.4765625, + "learning_rate": 9.974438807323907e-06, + "loss": 4786.0477, + "step": 64490 + }, + { + "epoch": 0.130294080810611, + "grad_norm": 5051.1171875, + "learning_rate": 9.974403544143942e-06, + "loss": 2148.1578, + "step": 64500 + }, + { + "epoch": 0.1303142814432948, + "grad_norm": 59650.16796875, + "learning_rate": 9.974368256719335e-06, + "loss": 1767.3502, + "step": 64510 + }, + { + "epoch": 0.1303344820759786, + "grad_norm": 143627.25, + "learning_rate": 9.974332945050263e-06, + "loss": 1687.6334, + "step": 64520 + }, + { + "epoch": 0.13035468270866243, + "grad_norm": 49012.8203125, + "learning_rate": 9.974297609136895e-06, + "loss": 1217.7883, + "step": 64530 + }, + { + "epoch": 0.13037488334134625, + "grad_norm": 200701.78125, + "learning_rate": 9.974262248979402e-06, + "loss": 1831.5871, + "step": 64540 + }, + { + "epoch": 0.13039508397403007, + "grad_norm": 35230.4453125, + "learning_rate": 9.97422686457796e-06, + "loss": 2021.3734, + "step": 64550 + }, + { + "epoch": 0.1304152846067139, + "grad_norm": 26513.40234375, + "learning_rate": 9.97419145593274e-06, + "loss": 2495.3789, + "step": 64560 + }, + { + "epoch": 0.13043548523939769, + "grad_norm": 1232.1143798828125, + "learning_rate": 9.974156023043912e-06, + "loss": 4008.4984, + "step": 64570 + }, + { + "epoch": 0.1304556858720815, + "grad_norm": 110172.25, + "learning_rate": 9.974120565911653e-06, + "loss": 3422.2883, + "step": 64580 + }, + { + "epoch": 0.13047588650476533, + "grad_norm": 2313.545654296875, + "learning_rate": 9.974085084536132e-06, + "loss": 782.2841, + "step": 64590 + }, + { + "epoch": 0.13049608713744915, + "grad_norm": 171.2788848876953, + "learning_rate": 9.974049578917524e-06, + "loss": 1298.16, + "step": 64600 + }, + { + "epoch": 0.13051628777013297, + "grad_norm": 39361.32421875, + "learning_rate": 9.974014049056003e-06, + "loss": 2053.8646, + "step": 64610 + }, + { + "epoch": 0.13053648840281679, + "grad_norm": 87885.171875, + "learning_rate": 9.973978494951739e-06, + "loss": 2505.1096, + "step": 64620 + }, + { + "epoch": 0.13055668903550058, + "grad_norm": 5306.14892578125, + "learning_rate": 9.973942916604907e-06, + "loss": 1863.583, + "step": 64630 + }, + { + "epoch": 0.1305768896681844, + "grad_norm": 20678.888671875, + "learning_rate": 9.973907314015682e-06, + "loss": 2905.0863, + "step": 64640 + }, + { + "epoch": 0.13059709030086822, + "grad_norm": 12246.994140625, + "learning_rate": 9.973871687184234e-06, + "loss": 2588.927, + "step": 64650 + }, + { + "epoch": 0.13061729093355204, + "grad_norm": 39762.19140625, + "learning_rate": 9.97383603611074e-06, + "loss": 2815.2031, + "step": 64660 + }, + { + "epoch": 0.13063749156623586, + "grad_norm": 26631.4453125, + "learning_rate": 9.973800360795372e-06, + "loss": 2715.9822, + "step": 64670 + }, + { + "epoch": 0.13065769219891968, + "grad_norm": 7777.4248046875, + "learning_rate": 9.973764661238306e-06, + "loss": 1442.762, + "step": 64680 + }, + { + "epoch": 0.1306778928316035, + "grad_norm": 29113.42578125, + "learning_rate": 9.973728937439714e-06, + "loss": 768.8608, + "step": 64690 + }, + { + "epoch": 0.1306980934642873, + "grad_norm": 192956.71875, + "learning_rate": 9.973693189399767e-06, + "loss": 4220.6102, + "step": 64700 + }, + { + "epoch": 0.1307182940969711, + "grad_norm": 17539.4453125, + "learning_rate": 9.973657417118646e-06, + "loss": 3072.9896, + "step": 64710 + }, + { + "epoch": 0.13073849472965493, + "grad_norm": 0.0, + "learning_rate": 9.97362162059652e-06, + "loss": 5431.6637, + "step": 64720 + }, + { + "epoch": 0.13075869536233875, + "grad_norm": 599.9068603515625, + "learning_rate": 9.973585799833567e-06, + "loss": 1568.8847, + "step": 64730 + }, + { + "epoch": 0.13077889599502257, + "grad_norm": 11218.955078125, + "learning_rate": 9.97354995482996e-06, + "loss": 2429.9951, + "step": 64740 + }, + { + "epoch": 0.1307990966277064, + "grad_norm": 12439.2900390625, + "learning_rate": 9.973514085585871e-06, + "loss": 5172.0027, + "step": 64750 + }, + { + "epoch": 0.13081929726039018, + "grad_norm": 27227.96875, + "learning_rate": 9.97347819210148e-06, + "loss": 2165.9168, + "step": 64760 + }, + { + "epoch": 0.130839497893074, + "grad_norm": 33531.109375, + "learning_rate": 9.973442274376958e-06, + "loss": 1697.0758, + "step": 64770 + }, + { + "epoch": 0.13085969852575782, + "grad_norm": 98797.015625, + "learning_rate": 9.973406332412484e-06, + "loss": 3385.5051, + "step": 64780 + }, + { + "epoch": 0.13087989915844164, + "grad_norm": 26370.13671875, + "learning_rate": 9.97337036620823e-06, + "loss": 5280.6559, + "step": 64790 + }, + { + "epoch": 0.13090009979112546, + "grad_norm": 5837.884765625, + "learning_rate": 9.973334375764372e-06, + "loss": 2926.8828, + "step": 64800 + }, + { + "epoch": 0.13092030042380928, + "grad_norm": 47750.15625, + "learning_rate": 9.973298361081083e-06, + "loss": 2374.4871, + "step": 64810 + }, + { + "epoch": 0.1309405010564931, + "grad_norm": 137785.625, + "learning_rate": 9.973262322158544e-06, + "loss": 3436.4387, + "step": 64820 + }, + { + "epoch": 0.1309607016891769, + "grad_norm": 167863.734375, + "learning_rate": 9.973226258996926e-06, + "loss": 2182.7566, + "step": 64830 + }, + { + "epoch": 0.13098090232186071, + "grad_norm": 13753.3583984375, + "learning_rate": 9.973190171596407e-06, + "loss": 1907.907, + "step": 64840 + }, + { + "epoch": 0.13100110295454453, + "grad_norm": 54717.9375, + "learning_rate": 9.973154059957162e-06, + "loss": 2225.3934, + "step": 64850 + }, + { + "epoch": 0.13102130358722835, + "grad_norm": 21185.12109375, + "learning_rate": 9.973117924079367e-06, + "loss": 5178.309, + "step": 64860 + }, + { + "epoch": 0.13104150421991217, + "grad_norm": 50602.953125, + "learning_rate": 9.973081763963199e-06, + "loss": 2405.7311, + "step": 64870 + }, + { + "epoch": 0.131061704852596, + "grad_norm": 13226.3818359375, + "learning_rate": 9.973045579608834e-06, + "loss": 4503.9223, + "step": 64880 + }, + { + "epoch": 0.1310819054852798, + "grad_norm": 5988.15234375, + "learning_rate": 9.973009371016447e-06, + "loss": 2286.4721, + "step": 64890 + }, + { + "epoch": 0.1311021061179636, + "grad_norm": 3791.581787109375, + "learning_rate": 9.972973138186217e-06, + "loss": 1611.0245, + "step": 64900 + }, + { + "epoch": 0.13112230675064743, + "grad_norm": 16970.681640625, + "learning_rate": 9.972936881118318e-06, + "loss": 3807.216, + "step": 64910 + }, + { + "epoch": 0.13114250738333125, + "grad_norm": 49481.11328125, + "learning_rate": 9.972900599812928e-06, + "loss": 3698.8602, + "step": 64920 + }, + { + "epoch": 0.13116270801601507, + "grad_norm": 27413.634765625, + "learning_rate": 9.972864294270224e-06, + "loss": 4006.2387, + "step": 64930 + }, + { + "epoch": 0.1311829086486989, + "grad_norm": 16785.373046875, + "learning_rate": 9.972827964490382e-06, + "loss": 2337.4867, + "step": 64940 + }, + { + "epoch": 0.13120310928138268, + "grad_norm": 485.88214111328125, + "learning_rate": 9.972791610473578e-06, + "loss": 2151.7771, + "step": 64950 + }, + { + "epoch": 0.1312233099140665, + "grad_norm": 20278.32421875, + "learning_rate": 9.972755232219992e-06, + "loss": 2196.5266, + "step": 64960 + }, + { + "epoch": 0.13124351054675032, + "grad_norm": 45976.55859375, + "learning_rate": 9.972718829729802e-06, + "loss": 2518.2346, + "step": 64970 + }, + { + "epoch": 0.13126371117943414, + "grad_norm": 50977.5859375, + "learning_rate": 9.972682403003182e-06, + "loss": 3501.9641, + "step": 64980 + }, + { + "epoch": 0.13128391181211796, + "grad_norm": 139211.703125, + "learning_rate": 9.972645952040311e-06, + "loss": 4016.0203, + "step": 64990 + }, + { + "epoch": 0.13130411244480178, + "grad_norm": 116659.7109375, + "learning_rate": 9.972609476841368e-06, + "loss": 2303.5418, + "step": 65000 + }, + { + "epoch": 0.1313243130774856, + "grad_norm": 56922.98828125, + "learning_rate": 9.972572977406527e-06, + "loss": 1266.5348, + "step": 65010 + }, + { + "epoch": 0.1313445137101694, + "grad_norm": 72872.171875, + "learning_rate": 9.97253645373597e-06, + "loss": 6867.9297, + "step": 65020 + }, + { + "epoch": 0.1313647143428532, + "grad_norm": 18156.228515625, + "learning_rate": 9.972499905829874e-06, + "loss": 2448.3898, + "step": 65030 + }, + { + "epoch": 0.13138491497553703, + "grad_norm": 35489.15234375, + "learning_rate": 9.972463333688416e-06, + "loss": 1961.7061, + "step": 65040 + }, + { + "epoch": 0.13140511560822085, + "grad_norm": 51864.3046875, + "learning_rate": 9.972426737311775e-06, + "loss": 6297.3555, + "step": 65050 + }, + { + "epoch": 0.13142531624090467, + "grad_norm": 52599.4609375, + "learning_rate": 9.972390116700128e-06, + "loss": 3344.0074, + "step": 65060 + }, + { + "epoch": 0.1314455168735885, + "grad_norm": 22404.029296875, + "learning_rate": 9.972353471853655e-06, + "loss": 2094.8039, + "step": 65070 + }, + { + "epoch": 0.13146571750627228, + "grad_norm": 14338.4453125, + "learning_rate": 9.972316802772536e-06, + "loss": 2700.4656, + "step": 65080 + }, + { + "epoch": 0.1314859181389561, + "grad_norm": 6200.29345703125, + "learning_rate": 9.972280109456946e-06, + "loss": 1676.4377, + "step": 65090 + }, + { + "epoch": 0.13150611877163992, + "grad_norm": 94949.4921875, + "learning_rate": 9.972243391907068e-06, + "loss": 2051.7139, + "step": 65100 + }, + { + "epoch": 0.13152631940432374, + "grad_norm": 1208.706787109375, + "learning_rate": 9.972206650123077e-06, + "loss": 2113.4652, + "step": 65110 + }, + { + "epoch": 0.13154652003700756, + "grad_norm": 61480.6328125, + "learning_rate": 9.972169884105155e-06, + "loss": 4691.2285, + "step": 65120 + }, + { + "epoch": 0.13156672066969138, + "grad_norm": 11946.73046875, + "learning_rate": 9.972133093853477e-06, + "loss": 3075.7063, + "step": 65130 + }, + { + "epoch": 0.1315869213023752, + "grad_norm": 11355.40625, + "learning_rate": 9.972096279368228e-06, + "loss": 1501.1763, + "step": 65140 + }, + { + "epoch": 0.131607121935059, + "grad_norm": 60359.3984375, + "learning_rate": 9.972059440649584e-06, + "loss": 1371.5871, + "step": 65150 + }, + { + "epoch": 0.13162732256774282, + "grad_norm": 5033.60595703125, + "learning_rate": 9.972022577697726e-06, + "loss": 1750.7576, + "step": 65160 + }, + { + "epoch": 0.13164752320042664, + "grad_norm": 1990.4803466796875, + "learning_rate": 9.971985690512834e-06, + "loss": 7558.9242, + "step": 65170 + }, + { + "epoch": 0.13166772383311046, + "grad_norm": 12667.4638671875, + "learning_rate": 9.971948779095084e-06, + "loss": 1479.6084, + "step": 65180 + }, + { + "epoch": 0.13168792446579428, + "grad_norm": 12890.77734375, + "learning_rate": 9.97191184344466e-06, + "loss": 1823.2109, + "step": 65190 + }, + { + "epoch": 0.1317081250984781, + "grad_norm": 22821.46484375, + "learning_rate": 9.97187488356174e-06, + "loss": 4885.6547, + "step": 65200 + }, + { + "epoch": 0.1317283257311619, + "grad_norm": 19160.474609375, + "learning_rate": 9.971837899446505e-06, + "loss": 1339.8656, + "step": 65210 + }, + { + "epoch": 0.1317485263638457, + "grad_norm": 3814.5712890625, + "learning_rate": 9.971800891099137e-06, + "loss": 5301.582, + "step": 65220 + }, + { + "epoch": 0.13176872699652953, + "grad_norm": 4084.857421875, + "learning_rate": 9.971763858519812e-06, + "loss": 1860.1555, + "step": 65230 + }, + { + "epoch": 0.13178892762921335, + "grad_norm": 43213.88671875, + "learning_rate": 9.971726801708715e-06, + "loss": 5188.3438, + "step": 65240 + }, + { + "epoch": 0.13180912826189717, + "grad_norm": 2955.465087890625, + "learning_rate": 9.971689720666024e-06, + "loss": 1982.3709, + "step": 65250 + }, + { + "epoch": 0.131829328894581, + "grad_norm": 2200.270751953125, + "learning_rate": 9.97165261539192e-06, + "loss": 4321.8637, + "step": 65260 + }, + { + "epoch": 0.13184952952726478, + "grad_norm": 27063.791015625, + "learning_rate": 9.971615485886583e-06, + "loss": 4238.1828, + "step": 65270 + }, + { + "epoch": 0.1318697301599486, + "grad_norm": 1397.6290283203125, + "learning_rate": 9.971578332150197e-06, + "loss": 4034.5062, + "step": 65280 + }, + { + "epoch": 0.13188993079263242, + "grad_norm": 16451.435546875, + "learning_rate": 9.97154115418294e-06, + "loss": 2585.8121, + "step": 65290 + }, + { + "epoch": 0.13191013142531624, + "grad_norm": 23451.14453125, + "learning_rate": 9.971503951984996e-06, + "loss": 2187.573, + "step": 65300 + }, + { + "epoch": 0.13193033205800006, + "grad_norm": 51826.4296875, + "learning_rate": 9.971466725556542e-06, + "loss": 2383.0141, + "step": 65310 + }, + { + "epoch": 0.13195053269068388, + "grad_norm": 170789.96875, + "learning_rate": 9.971429474897765e-06, + "loss": 4343.8195, + "step": 65320 + }, + { + "epoch": 0.1319707333233677, + "grad_norm": 18488.53515625, + "learning_rate": 9.971392200008842e-06, + "loss": 935.8298, + "step": 65330 + }, + { + "epoch": 0.1319909339560515, + "grad_norm": 16091.279296875, + "learning_rate": 9.971354900889955e-06, + "loss": 2905.8098, + "step": 65340 + }, + { + "epoch": 0.1320111345887353, + "grad_norm": 14632.6591796875, + "learning_rate": 9.97131757754129e-06, + "loss": 847.9492, + "step": 65350 + }, + { + "epoch": 0.13203133522141913, + "grad_norm": 10979.392578125, + "learning_rate": 9.971280229963026e-06, + "loss": 3024.8707, + "step": 65360 + }, + { + "epoch": 0.13205153585410295, + "grad_norm": 69372.15625, + "learning_rate": 9.971242858155344e-06, + "loss": 2055.816, + "step": 65370 + }, + { + "epoch": 0.13207173648678677, + "grad_norm": 19253.625, + "learning_rate": 9.971205462118427e-06, + "loss": 1896.4375, + "step": 65380 + }, + { + "epoch": 0.1320919371194706, + "grad_norm": 9669.15234375, + "learning_rate": 9.971168041852456e-06, + "loss": 2759.235, + "step": 65390 + }, + { + "epoch": 0.13211213775215438, + "grad_norm": 56177.96484375, + "learning_rate": 9.971130597357618e-06, + "loss": 2729.5316, + "step": 65400 + }, + { + "epoch": 0.1321323383848382, + "grad_norm": 34078.859375, + "learning_rate": 9.97109312863409e-06, + "loss": 1338.7204, + "step": 65410 + }, + { + "epoch": 0.13215253901752202, + "grad_norm": 106194.5234375, + "learning_rate": 9.971055635682059e-06, + "loss": 5281.6687, + "step": 65420 + }, + { + "epoch": 0.13217273965020584, + "grad_norm": 3657.4052734375, + "learning_rate": 9.971018118501706e-06, + "loss": 4577.9496, + "step": 65430 + }, + { + "epoch": 0.13219294028288966, + "grad_norm": 119542.3125, + "learning_rate": 9.970980577093212e-06, + "loss": 3373.1328, + "step": 65440 + }, + { + "epoch": 0.13221314091557348, + "grad_norm": 1278.7138671875, + "learning_rate": 9.970943011456762e-06, + "loss": 594.6092, + "step": 65450 + }, + { + "epoch": 0.1322333415482573, + "grad_norm": 1393.1549072265625, + "learning_rate": 9.970905421592538e-06, + "loss": 2709.0109, + "step": 65460 + }, + { + "epoch": 0.1322535421809411, + "grad_norm": 14861.0, + "learning_rate": 9.970867807500725e-06, + "loss": 1438.9153, + "step": 65470 + }, + { + "epoch": 0.13227374281362492, + "grad_norm": 7935.81201171875, + "learning_rate": 9.970830169181504e-06, + "loss": 3693.5109, + "step": 65480 + }, + { + "epoch": 0.13229394344630874, + "grad_norm": 9424.6064453125, + "learning_rate": 9.97079250663506e-06, + "loss": 1272.3051, + "step": 65490 + }, + { + "epoch": 0.13231414407899256, + "grad_norm": 22424.05078125, + "learning_rate": 9.970754819861577e-06, + "loss": 3378.909, + "step": 65500 + }, + { + "epoch": 0.13233434471167638, + "grad_norm": 6740.126953125, + "learning_rate": 9.97071710886124e-06, + "loss": 6484.3898, + "step": 65510 + }, + { + "epoch": 0.1323545453443602, + "grad_norm": 485.5788269042969, + "learning_rate": 9.970679373634227e-06, + "loss": 2918.2961, + "step": 65520 + }, + { + "epoch": 0.132374745977044, + "grad_norm": 14254.7919921875, + "learning_rate": 9.970641614180727e-06, + "loss": 3366.3383, + "step": 65530 + }, + { + "epoch": 0.1323949466097278, + "grad_norm": 52865.1328125, + "learning_rate": 9.970603830500923e-06, + "loss": 1559.3297, + "step": 65540 + }, + { + "epoch": 0.13241514724241163, + "grad_norm": 2616.91796875, + "learning_rate": 9.970566022594996e-06, + "loss": 752.9297, + "step": 65550 + }, + { + "epoch": 0.13243534787509545, + "grad_norm": 43435.01171875, + "learning_rate": 9.970528190463136e-06, + "loss": 1238.7918, + "step": 65560 + }, + { + "epoch": 0.13245554850777927, + "grad_norm": 1285.1409912109375, + "learning_rate": 9.970490334105525e-06, + "loss": 2785.7162, + "step": 65570 + }, + { + "epoch": 0.1324757491404631, + "grad_norm": 50664.83203125, + "learning_rate": 9.970452453522344e-06, + "loss": 3290.1625, + "step": 65580 + }, + { + "epoch": 0.13249594977314688, + "grad_norm": 9793.30859375, + "learning_rate": 9.970414548713783e-06, + "loss": 1984.2744, + "step": 65590 + }, + { + "epoch": 0.1325161504058307, + "grad_norm": 3490.42578125, + "learning_rate": 9.970376619680024e-06, + "loss": 4211.9465, + "step": 65600 + }, + { + "epoch": 0.13253635103851452, + "grad_norm": 54616.5234375, + "learning_rate": 9.970338666421251e-06, + "loss": 1664.343, + "step": 65610 + }, + { + "epoch": 0.13255655167119834, + "grad_norm": 65816.21875, + "learning_rate": 9.970300688937651e-06, + "loss": 3297.2543, + "step": 65620 + }, + { + "epoch": 0.13257675230388216, + "grad_norm": 125661.59375, + "learning_rate": 9.970262687229409e-06, + "loss": 2859.0156, + "step": 65630 + }, + { + "epoch": 0.13259695293656598, + "grad_norm": 2715.15625, + "learning_rate": 9.970224661296708e-06, + "loss": 1491.3369, + "step": 65640 + }, + { + "epoch": 0.1326171535692498, + "grad_norm": 19137.2890625, + "learning_rate": 9.970186611139736e-06, + "loss": 1318.5415, + "step": 65650 + }, + { + "epoch": 0.1326373542019336, + "grad_norm": 1153.0400390625, + "learning_rate": 9.970148536758678e-06, + "loss": 4272.6621, + "step": 65660 + }, + { + "epoch": 0.1326575548346174, + "grad_norm": 19677.7578125, + "learning_rate": 9.970110438153717e-06, + "loss": 2801.4656, + "step": 65670 + }, + { + "epoch": 0.13267775546730123, + "grad_norm": 15324.80078125, + "learning_rate": 9.970072315325041e-06, + "loss": 5252.9488, + "step": 65680 + }, + { + "epoch": 0.13269795609998505, + "grad_norm": 29354.02734375, + "learning_rate": 9.970034168272835e-06, + "loss": 4870.1914, + "step": 65690 + }, + { + "epoch": 0.13271815673266887, + "grad_norm": 6029.83935546875, + "learning_rate": 9.969995996997285e-06, + "loss": 4395.7871, + "step": 65700 + }, + { + "epoch": 0.1327383573653527, + "grad_norm": 16494.841796875, + "learning_rate": 9.96995780149858e-06, + "loss": 1244.3723, + "step": 65710 + }, + { + "epoch": 0.13275855799803649, + "grad_norm": 6047.4814453125, + "learning_rate": 9.969919581776902e-06, + "loss": 3938.8098, + "step": 65720 + }, + { + "epoch": 0.1327787586307203, + "grad_norm": 110325.46875, + "learning_rate": 9.969881337832437e-06, + "loss": 5160.6781, + "step": 65730 + }, + { + "epoch": 0.13279895926340413, + "grad_norm": 7794.96240234375, + "learning_rate": 9.969843069665375e-06, + "loss": 1941.5877, + "step": 65740 + }, + { + "epoch": 0.13281915989608795, + "grad_norm": 27115.77734375, + "learning_rate": 9.9698047772759e-06, + "loss": 2692.6828, + "step": 65750 + }, + { + "epoch": 0.13283936052877177, + "grad_norm": 66407.59375, + "learning_rate": 9.969766460664199e-06, + "loss": 947.2372, + "step": 65760 + }, + { + "epoch": 0.13285956116145559, + "grad_norm": 9366.52734375, + "learning_rate": 9.96972811983046e-06, + "loss": 2095.8187, + "step": 65770 + }, + { + "epoch": 0.1328797617941394, + "grad_norm": 78526.34375, + "learning_rate": 9.969689754774868e-06, + "loss": 3912.9098, + "step": 65780 + }, + { + "epoch": 0.1328999624268232, + "grad_norm": 735.8782958984375, + "learning_rate": 9.96965136549761e-06, + "loss": 3220.2094, + "step": 65790 + }, + { + "epoch": 0.13292016305950702, + "grad_norm": 1258.4195556640625, + "learning_rate": 9.969612951998874e-06, + "loss": 727.0463, + "step": 65800 + }, + { + "epoch": 0.13294036369219084, + "grad_norm": 4831.25927734375, + "learning_rate": 9.96957451427885e-06, + "loss": 1145.871, + "step": 65810 + }, + { + "epoch": 0.13296056432487466, + "grad_norm": 64193.99609375, + "learning_rate": 9.96953605233772e-06, + "loss": 2594.85, + "step": 65820 + }, + { + "epoch": 0.13298076495755848, + "grad_norm": 11087.169921875, + "learning_rate": 9.969497566175675e-06, + "loss": 3688.3539, + "step": 65830 + }, + { + "epoch": 0.1330009655902423, + "grad_norm": 64120.66015625, + "learning_rate": 9.969459055792903e-06, + "loss": 4022.9227, + "step": 65840 + }, + { + "epoch": 0.1330211662229261, + "grad_norm": 169163.96875, + "learning_rate": 9.969420521189587e-06, + "loss": 5136.7105, + "step": 65850 + }, + { + "epoch": 0.1330413668556099, + "grad_norm": 66031.2265625, + "learning_rate": 9.96938196236592e-06, + "loss": 1843.9346, + "step": 65860 + }, + { + "epoch": 0.13306156748829373, + "grad_norm": 74616.3515625, + "learning_rate": 9.96934337932209e-06, + "loss": 2972.1703, + "step": 65870 + }, + { + "epoch": 0.13308176812097755, + "grad_norm": 5944.8759765625, + "learning_rate": 9.969304772058279e-06, + "loss": 2928.0559, + "step": 65880 + }, + { + "epoch": 0.13310196875366137, + "grad_norm": 12762.9423828125, + "learning_rate": 9.969266140574682e-06, + "loss": 1455.3915, + "step": 65890 + }, + { + "epoch": 0.1331221693863452, + "grad_norm": 864.548583984375, + "learning_rate": 9.969227484871485e-06, + "loss": 2963.6184, + "step": 65900 + }, + { + "epoch": 0.13314237001902898, + "grad_norm": 3683.837158203125, + "learning_rate": 9.969188804948872e-06, + "loss": 3427.7836, + "step": 65910 + }, + { + "epoch": 0.1331625706517128, + "grad_norm": 11194.5556640625, + "learning_rate": 9.969150100807039e-06, + "loss": 2052.2699, + "step": 65920 + }, + { + "epoch": 0.13318277128439662, + "grad_norm": 3065.552490234375, + "learning_rate": 9.969111372446171e-06, + "loss": 2397.8455, + "step": 65930 + }, + { + "epoch": 0.13320297191708044, + "grad_norm": 834.9616088867188, + "learning_rate": 9.969072619866455e-06, + "loss": 1846.1893, + "step": 65940 + }, + { + "epoch": 0.13322317254976426, + "grad_norm": 14684.4169921875, + "learning_rate": 9.969033843068083e-06, + "loss": 1381.1769, + "step": 65950 + }, + { + "epoch": 0.13324337318244808, + "grad_norm": 1433.0623779296875, + "learning_rate": 9.968995042051244e-06, + "loss": 3004.3334, + "step": 65960 + }, + { + "epoch": 0.1332635738151319, + "grad_norm": 1545.1988525390625, + "learning_rate": 9.968956216816123e-06, + "loss": 1658.6561, + "step": 65970 + }, + { + "epoch": 0.1332837744478157, + "grad_norm": 1611.6064453125, + "learning_rate": 9.968917367362914e-06, + "loss": 1337.4755, + "step": 65980 + }, + { + "epoch": 0.13330397508049951, + "grad_norm": 5697.39013671875, + "learning_rate": 9.968878493691803e-06, + "loss": 1942.7684, + "step": 65990 + }, + { + "epoch": 0.13332417571318333, + "grad_norm": 43449.6640625, + "learning_rate": 9.968839595802982e-06, + "loss": 4174.4289, + "step": 66000 + }, + { + "epoch": 0.13334437634586715, + "grad_norm": 7455.92529296875, + "learning_rate": 9.968800673696638e-06, + "loss": 3407.1746, + "step": 66010 + }, + { + "epoch": 0.13336457697855097, + "grad_norm": 28112.3671875, + "learning_rate": 9.968761727372965e-06, + "loss": 1674.8473, + "step": 66020 + }, + { + "epoch": 0.1333847776112348, + "grad_norm": 77361.3515625, + "learning_rate": 9.968722756832148e-06, + "loss": 1637.7129, + "step": 66030 + }, + { + "epoch": 0.1334049782439186, + "grad_norm": 3657.040283203125, + "learning_rate": 9.96868376207438e-06, + "loss": 1901.8385, + "step": 66040 + }, + { + "epoch": 0.1334251788766024, + "grad_norm": 25290.97265625, + "learning_rate": 9.968644743099848e-06, + "loss": 1038.8201, + "step": 66050 + }, + { + "epoch": 0.13344537950928623, + "grad_norm": 48819.43359375, + "learning_rate": 9.968605699908747e-06, + "loss": 1708.5666, + "step": 66060 + }, + { + "epoch": 0.13346558014197005, + "grad_norm": 20080.138671875, + "learning_rate": 9.968566632501262e-06, + "loss": 3807.0945, + "step": 66070 + }, + { + "epoch": 0.13348578077465387, + "grad_norm": 1742.374755859375, + "learning_rate": 9.968527540877586e-06, + "loss": 3905.1578, + "step": 66080 + }, + { + "epoch": 0.1335059814073377, + "grad_norm": 16548.076171875, + "learning_rate": 9.96848842503791e-06, + "loss": 3306.0523, + "step": 66090 + }, + { + "epoch": 0.1335261820400215, + "grad_norm": 1570.756591796875, + "learning_rate": 9.968449284982424e-06, + "loss": 2566.0229, + "step": 66100 + }, + { + "epoch": 0.1335463826727053, + "grad_norm": 8805.0390625, + "learning_rate": 9.968410120711321e-06, + "loss": 1939.1428, + "step": 66110 + }, + { + "epoch": 0.13356658330538912, + "grad_norm": 24401.287109375, + "learning_rate": 9.968370932224787e-06, + "loss": 3453.8613, + "step": 66120 + }, + { + "epoch": 0.13358678393807294, + "grad_norm": 29151.431640625, + "learning_rate": 9.968331719523015e-06, + "loss": 2647.4518, + "step": 66130 + }, + { + "epoch": 0.13360698457075676, + "grad_norm": 31403.390625, + "learning_rate": 9.968292482606199e-06, + "loss": 4065.848, + "step": 66140 + }, + { + "epoch": 0.13362718520344058, + "grad_norm": 158052.125, + "learning_rate": 9.968253221474527e-06, + "loss": 4712.9438, + "step": 66150 + }, + { + "epoch": 0.1336473858361244, + "grad_norm": 68750.28125, + "learning_rate": 9.96821393612819e-06, + "loss": 2893.4164, + "step": 66160 + }, + { + "epoch": 0.1336675864688082, + "grad_norm": 163.60858154296875, + "learning_rate": 9.968174626567382e-06, + "loss": 1408.9127, + "step": 66170 + }, + { + "epoch": 0.133687787101492, + "grad_norm": 4946.51220703125, + "learning_rate": 9.968135292792294e-06, + "loss": 4526.1598, + "step": 66180 + }, + { + "epoch": 0.13370798773417583, + "grad_norm": 73706.8046875, + "learning_rate": 9.968095934803116e-06, + "loss": 4199.4172, + "step": 66190 + }, + { + "epoch": 0.13372818836685965, + "grad_norm": 4173.47705078125, + "learning_rate": 9.968056552600043e-06, + "loss": 3434.4766, + "step": 66200 + }, + { + "epoch": 0.13374838899954347, + "grad_norm": 3934.320068359375, + "learning_rate": 9.968017146183263e-06, + "loss": 2341.3805, + "step": 66210 + }, + { + "epoch": 0.1337685896322273, + "grad_norm": 7395.8681640625, + "learning_rate": 9.967977715552972e-06, + "loss": 5341.0457, + "step": 66220 + }, + { + "epoch": 0.13378879026491108, + "grad_norm": 48391.9765625, + "learning_rate": 9.967938260709357e-06, + "loss": 2725.14, + "step": 66230 + }, + { + "epoch": 0.1338089908975949, + "grad_norm": 33323.23828125, + "learning_rate": 9.967898781652616e-06, + "loss": 1119.0224, + "step": 66240 + }, + { + "epoch": 0.13382919153027872, + "grad_norm": 229530.125, + "learning_rate": 9.967859278382939e-06, + "loss": 2318.8086, + "step": 66250 + }, + { + "epoch": 0.13384939216296254, + "grad_norm": 103.32881927490234, + "learning_rate": 9.967819750900517e-06, + "loss": 3603.3137, + "step": 66260 + }, + { + "epoch": 0.13386959279564636, + "grad_norm": 7078.35693359375, + "learning_rate": 9.967780199205544e-06, + "loss": 2998.9408, + "step": 66270 + }, + { + "epoch": 0.13388979342833018, + "grad_norm": 3100.1103515625, + "learning_rate": 9.967740623298214e-06, + "loss": 1555.4058, + "step": 66280 + }, + { + "epoch": 0.133909994061014, + "grad_norm": 3923.3662109375, + "learning_rate": 9.967701023178717e-06, + "loss": 1543.2463, + "step": 66290 + }, + { + "epoch": 0.1339301946936978, + "grad_norm": 57273.0625, + "learning_rate": 9.96766139884725e-06, + "loss": 3477.6133, + "step": 66300 + }, + { + "epoch": 0.13395039532638162, + "grad_norm": 18039.623046875, + "learning_rate": 9.967621750304002e-06, + "loss": 3493.8738, + "step": 66310 + }, + { + "epoch": 0.13397059595906544, + "grad_norm": 8752.935546875, + "learning_rate": 9.96758207754917e-06, + "loss": 3617.3738, + "step": 66320 + }, + { + "epoch": 0.13399079659174926, + "grad_norm": 4229.01904296875, + "learning_rate": 9.967542380582944e-06, + "loss": 3082.0377, + "step": 66330 + }, + { + "epoch": 0.13401099722443308, + "grad_norm": 64590.7734375, + "learning_rate": 9.96750265940552e-06, + "loss": 2301.9176, + "step": 66340 + }, + { + "epoch": 0.1340311978571169, + "grad_norm": 28530.212890625, + "learning_rate": 9.967462914017087e-06, + "loss": 1556.3838, + "step": 66350 + }, + { + "epoch": 0.1340513984898007, + "grad_norm": 12326.900390625, + "learning_rate": 9.967423144417847e-06, + "loss": 1496.4629, + "step": 66360 + }, + { + "epoch": 0.1340715991224845, + "grad_norm": 12714.7275390625, + "learning_rate": 9.967383350607986e-06, + "loss": 3108.8436, + "step": 66370 + }, + { + "epoch": 0.13409179975516833, + "grad_norm": 17933.6484375, + "learning_rate": 9.967343532587701e-06, + "loss": 1642.2686, + "step": 66380 + }, + { + "epoch": 0.13411200038785215, + "grad_norm": 1000.1652221679688, + "learning_rate": 9.967303690357189e-06, + "loss": 2946.1475, + "step": 66390 + }, + { + "epoch": 0.13413220102053597, + "grad_norm": 2357.86669921875, + "learning_rate": 9.967263823916638e-06, + "loss": 1939.9082, + "step": 66400 + }, + { + "epoch": 0.1341524016532198, + "grad_norm": 15925.1259765625, + "learning_rate": 9.967223933266247e-06, + "loss": 1643.5834, + "step": 66410 + }, + { + "epoch": 0.1341726022859036, + "grad_norm": 11095.462890625, + "learning_rate": 9.96718401840621e-06, + "loss": 3007.0984, + "step": 66420 + }, + { + "epoch": 0.1341928029185874, + "grad_norm": 879.4083862304688, + "learning_rate": 9.96714407933672e-06, + "loss": 1675.4484, + "step": 66430 + }, + { + "epoch": 0.13421300355127122, + "grad_norm": 3302.322021484375, + "learning_rate": 9.96710411605797e-06, + "loss": 1129.0674, + "step": 66440 + }, + { + "epoch": 0.13423320418395504, + "grad_norm": 39198.55859375, + "learning_rate": 9.96706412857016e-06, + "loss": 2711.2701, + "step": 66450 + }, + { + "epoch": 0.13425340481663886, + "grad_norm": 5339.18798828125, + "learning_rate": 9.967024116873481e-06, + "loss": 1102.7547, + "step": 66460 + }, + { + "epoch": 0.13427360544932268, + "grad_norm": 32565.607421875, + "learning_rate": 9.966984080968128e-06, + "loss": 2745.083, + "step": 66470 + }, + { + "epoch": 0.1342938060820065, + "grad_norm": 18434.65234375, + "learning_rate": 9.966944020854297e-06, + "loss": 2932.7707, + "step": 66480 + }, + { + "epoch": 0.1343140067146903, + "grad_norm": 10549.0556640625, + "learning_rate": 9.966903936532184e-06, + "loss": 1637.0139, + "step": 66490 + }, + { + "epoch": 0.1343342073473741, + "grad_norm": 56912.0625, + "learning_rate": 9.966863828001982e-06, + "loss": 1911.116, + "step": 66500 + }, + { + "epoch": 0.13435440798005793, + "grad_norm": 1184.3336181640625, + "learning_rate": 9.96682369526389e-06, + "loss": 1146.7786, + "step": 66510 + }, + { + "epoch": 0.13437460861274175, + "grad_norm": 144315.921875, + "learning_rate": 9.966783538318101e-06, + "loss": 2899.1734, + "step": 66520 + }, + { + "epoch": 0.13439480924542557, + "grad_norm": 7167.36376953125, + "learning_rate": 9.966743357164812e-06, + "loss": 2691.0363, + "step": 66530 + }, + { + "epoch": 0.1344150098781094, + "grad_norm": 22900.841796875, + "learning_rate": 9.966703151804219e-06, + "loss": 1568.5037, + "step": 66540 + }, + { + "epoch": 0.13443521051079318, + "grad_norm": 20654.150390625, + "learning_rate": 9.966662922236515e-06, + "loss": 3148.3232, + "step": 66550 + }, + { + "epoch": 0.134455411143477, + "grad_norm": 12038.8603515625, + "learning_rate": 9.966622668461899e-06, + "loss": 4218.1434, + "step": 66560 + }, + { + "epoch": 0.13447561177616082, + "grad_norm": 43434.2890625, + "learning_rate": 9.966582390480567e-06, + "loss": 1376.4655, + "step": 66570 + }, + { + "epoch": 0.13449581240884464, + "grad_norm": 10727.068359375, + "learning_rate": 9.966542088292714e-06, + "loss": 1413.4643, + "step": 66580 + }, + { + "epoch": 0.13451601304152846, + "grad_norm": 4452.59375, + "learning_rate": 9.96650176189854e-06, + "loss": 2000.4424, + "step": 66590 + }, + { + "epoch": 0.13453621367421228, + "grad_norm": 9202.685546875, + "learning_rate": 9.966461411298235e-06, + "loss": 2946.9045, + "step": 66600 + }, + { + "epoch": 0.1345564143068961, + "grad_norm": 4676.5361328125, + "learning_rate": 9.966421036492003e-06, + "loss": 1715.9553, + "step": 66610 + }, + { + "epoch": 0.1345766149395799, + "grad_norm": 145504.109375, + "learning_rate": 9.966380637480034e-06, + "loss": 4069.1609, + "step": 66620 + }, + { + "epoch": 0.13459681557226372, + "grad_norm": 9494.8798828125, + "learning_rate": 9.96634021426253e-06, + "loss": 2744.658, + "step": 66630 + }, + { + "epoch": 0.13461701620494754, + "grad_norm": 10051.6416015625, + "learning_rate": 9.966299766839685e-06, + "loss": 4734.5617, + "step": 66640 + }, + { + "epoch": 0.13463721683763136, + "grad_norm": 66854.3671875, + "learning_rate": 9.966259295211698e-06, + "loss": 1575.0996, + "step": 66650 + }, + { + "epoch": 0.13465741747031518, + "grad_norm": 2932.129150390625, + "learning_rate": 9.966218799378766e-06, + "loss": 1516.6342, + "step": 66660 + }, + { + "epoch": 0.134677618102999, + "grad_norm": 17521.2109375, + "learning_rate": 9.966178279341084e-06, + "loss": 6100.5449, + "step": 66670 + }, + { + "epoch": 0.1346978187356828, + "grad_norm": 20610.96875, + "learning_rate": 9.966137735098853e-06, + "loss": 3625.1613, + "step": 66680 + }, + { + "epoch": 0.1347180193683666, + "grad_norm": 21070.796875, + "learning_rate": 9.966097166652268e-06, + "loss": 2761.2289, + "step": 66690 + }, + { + "epoch": 0.13473822000105043, + "grad_norm": 2616.2060546875, + "learning_rate": 9.966056574001528e-06, + "loss": 1500.2562, + "step": 66700 + }, + { + "epoch": 0.13475842063373425, + "grad_norm": 1659.3812255859375, + "learning_rate": 9.966015957146832e-06, + "loss": 2901.9004, + "step": 66710 + }, + { + "epoch": 0.13477862126641807, + "grad_norm": 311.388671875, + "learning_rate": 9.965975316088377e-06, + "loss": 1712.0043, + "step": 66720 + }, + { + "epoch": 0.1347988218991019, + "grad_norm": 1224.322021484375, + "learning_rate": 9.96593465082636e-06, + "loss": 1522.7856, + "step": 66730 + }, + { + "epoch": 0.1348190225317857, + "grad_norm": 847.7900390625, + "learning_rate": 9.965893961360977e-06, + "loss": 778.6143, + "step": 66740 + }, + { + "epoch": 0.1348392231644695, + "grad_norm": 1850.9443359375, + "learning_rate": 9.965853247692433e-06, + "loss": 1464.8082, + "step": 66750 + }, + { + "epoch": 0.13485942379715332, + "grad_norm": 9535.5263671875, + "learning_rate": 9.965812509820918e-06, + "loss": 835.3194, + "step": 66760 + }, + { + "epoch": 0.13487962442983714, + "grad_norm": 18832.212890625, + "learning_rate": 9.965771747746638e-06, + "loss": 3017.5707, + "step": 66770 + }, + { + "epoch": 0.13489982506252096, + "grad_norm": 8230.154296875, + "learning_rate": 9.96573096146979e-06, + "loss": 978.8585, + "step": 66780 + }, + { + "epoch": 0.13492002569520478, + "grad_norm": 4412.689453125, + "learning_rate": 9.96569015099057e-06, + "loss": 1280.2822, + "step": 66790 + }, + { + "epoch": 0.1349402263278886, + "grad_norm": 47851.8046875, + "learning_rate": 9.965649316309178e-06, + "loss": 1637.8658, + "step": 66800 + }, + { + "epoch": 0.1349604269605724, + "grad_norm": 18578.267578125, + "learning_rate": 9.965608457425813e-06, + "loss": 1884.0754, + "step": 66810 + }, + { + "epoch": 0.1349806275932562, + "grad_norm": 8433.8916015625, + "learning_rate": 9.965567574340676e-06, + "loss": 2553.1822, + "step": 66820 + }, + { + "epoch": 0.13500082822594003, + "grad_norm": 54491.06640625, + "learning_rate": 9.965526667053964e-06, + "loss": 2141.2705, + "step": 66830 + }, + { + "epoch": 0.13502102885862385, + "grad_norm": 96430.8125, + "learning_rate": 9.965485735565878e-06, + "loss": 2988.6799, + "step": 66840 + }, + { + "epoch": 0.13504122949130767, + "grad_norm": 115219.4453125, + "learning_rate": 9.965444779876618e-06, + "loss": 3037.208, + "step": 66850 + }, + { + "epoch": 0.1350614301239915, + "grad_norm": 6534.33349609375, + "learning_rate": 9.96540379998638e-06, + "loss": 5190.8195, + "step": 66860 + }, + { + "epoch": 0.13508163075667529, + "grad_norm": 5753.01953125, + "learning_rate": 9.965362795895368e-06, + "loss": 2876.4896, + "step": 66870 + }, + { + "epoch": 0.1351018313893591, + "grad_norm": 7636.8896484375, + "learning_rate": 9.965321767603778e-06, + "loss": 2273.9293, + "step": 66880 + }, + { + "epoch": 0.13512203202204293, + "grad_norm": 8437.900390625, + "learning_rate": 9.965280715111814e-06, + "loss": 2147.9609, + "step": 66890 + }, + { + "epoch": 0.13514223265472675, + "grad_norm": 9251.5615234375, + "learning_rate": 9.965239638419673e-06, + "loss": 4211.8582, + "step": 66900 + }, + { + "epoch": 0.13516243328741057, + "grad_norm": 24733.06640625, + "learning_rate": 9.965198537527556e-06, + "loss": 2581.0795, + "step": 66910 + }, + { + "epoch": 0.13518263392009439, + "grad_norm": 131688.5625, + "learning_rate": 9.965157412435663e-06, + "loss": 2989.2301, + "step": 66920 + }, + { + "epoch": 0.1352028345527782, + "grad_norm": 12168.419921875, + "learning_rate": 9.965116263144196e-06, + "loss": 2342.3881, + "step": 66930 + }, + { + "epoch": 0.135223035185462, + "grad_norm": 217550.9375, + "learning_rate": 9.965075089653354e-06, + "loss": 4378.1215, + "step": 66940 + }, + { + "epoch": 0.13524323581814582, + "grad_norm": 53600.24609375, + "learning_rate": 9.965033891963338e-06, + "loss": 1963.8207, + "step": 66950 + }, + { + "epoch": 0.13526343645082964, + "grad_norm": 264.78582763671875, + "learning_rate": 9.96499267007435e-06, + "loss": 1790.2197, + "step": 66960 + }, + { + "epoch": 0.13528363708351346, + "grad_norm": 7055.23583984375, + "learning_rate": 9.964951423986588e-06, + "loss": 2078.2443, + "step": 66970 + }, + { + "epoch": 0.13530383771619728, + "grad_norm": 6084.53662109375, + "learning_rate": 9.964910153700258e-06, + "loss": 3573.4742, + "step": 66980 + }, + { + "epoch": 0.1353240383488811, + "grad_norm": 46747.95703125, + "learning_rate": 9.964868859215555e-06, + "loss": 1559.0271, + "step": 66990 + }, + { + "epoch": 0.1353442389815649, + "grad_norm": 9112.369140625, + "learning_rate": 9.964827540532685e-06, + "loss": 1499.2458, + "step": 67000 + }, + { + "epoch": 0.1353644396142487, + "grad_norm": 7811.853515625, + "learning_rate": 9.964786197651848e-06, + "loss": 4529.2133, + "step": 67010 + }, + { + "epoch": 0.13538464024693253, + "grad_norm": 12790.685546875, + "learning_rate": 9.964744830573245e-06, + "loss": 1980.3721, + "step": 67020 + }, + { + "epoch": 0.13540484087961635, + "grad_norm": 3377.149658203125, + "learning_rate": 9.964703439297076e-06, + "loss": 3241.1934, + "step": 67030 + }, + { + "epoch": 0.13542504151230017, + "grad_norm": 6777.029296875, + "learning_rate": 9.964662023823548e-06, + "loss": 4003.0047, + "step": 67040 + }, + { + "epoch": 0.135445242144984, + "grad_norm": 51315.9140625, + "learning_rate": 9.964620584152858e-06, + "loss": 2422.1635, + "step": 67050 + }, + { + "epoch": 0.13546544277766778, + "grad_norm": 153085.078125, + "learning_rate": 9.964579120285208e-06, + "loss": 1881.0, + "step": 67060 + }, + { + "epoch": 0.1354856434103516, + "grad_norm": 30928.5, + "learning_rate": 9.964537632220801e-06, + "loss": 1622.876, + "step": 67070 + }, + { + "epoch": 0.13550584404303542, + "grad_norm": 18163.888671875, + "learning_rate": 9.964496119959842e-06, + "loss": 2093.8568, + "step": 67080 + }, + { + "epoch": 0.13552604467571924, + "grad_norm": 110543.9375, + "learning_rate": 9.96445458350253e-06, + "loss": 2074.7613, + "step": 67090 + }, + { + "epoch": 0.13554624530840306, + "grad_norm": 13131.958984375, + "learning_rate": 9.964413022849069e-06, + "loss": 1455.7802, + "step": 67100 + }, + { + "epoch": 0.13556644594108688, + "grad_norm": 13398.8662109375, + "learning_rate": 9.964371437999661e-06, + "loss": 2730.2654, + "step": 67110 + }, + { + "epoch": 0.1355866465737707, + "grad_norm": 35797.55078125, + "learning_rate": 9.96432982895451e-06, + "loss": 3359.8234, + "step": 67120 + }, + { + "epoch": 0.1356068472064545, + "grad_norm": 12399.587890625, + "learning_rate": 9.964288195713814e-06, + "loss": 1353.8707, + "step": 67130 + }, + { + "epoch": 0.13562704783913831, + "grad_norm": 2231.640625, + "learning_rate": 9.964246538277782e-06, + "loss": 1210.6621, + "step": 67140 + }, + { + "epoch": 0.13564724847182213, + "grad_norm": 11137.0908203125, + "learning_rate": 9.964204856646613e-06, + "loss": 4209.8648, + "step": 67150 + }, + { + "epoch": 0.13566744910450595, + "grad_norm": 32675.7734375, + "learning_rate": 9.964163150820512e-06, + "loss": 2520.6623, + "step": 67160 + }, + { + "epoch": 0.13568764973718977, + "grad_norm": 43791.21484375, + "learning_rate": 9.964121420799682e-06, + "loss": 2246.8223, + "step": 67170 + }, + { + "epoch": 0.1357078503698736, + "grad_norm": 850.1107788085938, + "learning_rate": 9.964079666584327e-06, + "loss": 1977.7295, + "step": 67180 + }, + { + "epoch": 0.1357280510025574, + "grad_norm": 93544.4375, + "learning_rate": 9.96403788817465e-06, + "loss": 3945.4633, + "step": 67190 + }, + { + "epoch": 0.1357482516352412, + "grad_norm": 13993.791015625, + "learning_rate": 9.963996085570854e-06, + "loss": 5037.884, + "step": 67200 + }, + { + "epoch": 0.13576845226792503, + "grad_norm": 8686.8017578125, + "learning_rate": 9.963954258773143e-06, + "loss": 1341.7041, + "step": 67210 + }, + { + "epoch": 0.13578865290060885, + "grad_norm": 65899.46875, + "learning_rate": 9.963912407781721e-06, + "loss": 4311.9086, + "step": 67220 + }, + { + "epoch": 0.13580885353329267, + "grad_norm": 80648.890625, + "learning_rate": 9.963870532596791e-06, + "loss": 2614.3139, + "step": 67230 + }, + { + "epoch": 0.1358290541659765, + "grad_norm": 22668.61328125, + "learning_rate": 9.96382863321856e-06, + "loss": 2512.5023, + "step": 67240 + }, + { + "epoch": 0.1358492547986603, + "grad_norm": 3664.954833984375, + "learning_rate": 9.963786709647228e-06, + "loss": 2387.2639, + "step": 67250 + }, + { + "epoch": 0.1358694554313441, + "grad_norm": 1981.769775390625, + "learning_rate": 9.963744761883003e-06, + "loss": 1569.4985, + "step": 67260 + }, + { + "epoch": 0.13588965606402792, + "grad_norm": 9840.1953125, + "learning_rate": 9.963702789926089e-06, + "loss": 2212.0189, + "step": 67270 + }, + { + "epoch": 0.13590985669671174, + "grad_norm": 209908.890625, + "learning_rate": 9.963660793776689e-06, + "loss": 5281.441, + "step": 67280 + }, + { + "epoch": 0.13593005732939556, + "grad_norm": 112042.390625, + "learning_rate": 9.963618773435006e-06, + "loss": 2570.8084, + "step": 67290 + }, + { + "epoch": 0.13595025796207938, + "grad_norm": 18219.736328125, + "learning_rate": 9.96357672890125e-06, + "loss": 3284.8117, + "step": 67300 + }, + { + "epoch": 0.1359704585947632, + "grad_norm": 12468.5009765625, + "learning_rate": 9.963534660175622e-06, + "loss": 1584.0218, + "step": 67310 + }, + { + "epoch": 0.135990659227447, + "grad_norm": 15145.49609375, + "learning_rate": 9.963492567258327e-06, + "loss": 2219.4057, + "step": 67320 + }, + { + "epoch": 0.1360108598601308, + "grad_norm": 30797.0703125, + "learning_rate": 9.963450450149572e-06, + "loss": 2767.7311, + "step": 67330 + }, + { + "epoch": 0.13603106049281463, + "grad_norm": 134213.859375, + "learning_rate": 9.963408308849563e-06, + "loss": 3336.7258, + "step": 67340 + }, + { + "epoch": 0.13605126112549845, + "grad_norm": 39638.01171875, + "learning_rate": 9.963366143358502e-06, + "loss": 3354.0656, + "step": 67350 + }, + { + "epoch": 0.13607146175818227, + "grad_norm": 76441.140625, + "learning_rate": 9.963323953676599e-06, + "loss": 4345.8102, + "step": 67360 + }, + { + "epoch": 0.1360916623908661, + "grad_norm": 93.91429901123047, + "learning_rate": 9.963281739804054e-06, + "loss": 4120.7617, + "step": 67370 + }, + { + "epoch": 0.13611186302354988, + "grad_norm": 3456.750244140625, + "learning_rate": 9.963239501741076e-06, + "loss": 2243.8861, + "step": 67380 + }, + { + "epoch": 0.1361320636562337, + "grad_norm": 17278.638671875, + "learning_rate": 9.963197239487871e-06, + "loss": 1511.7691, + "step": 67390 + }, + { + "epoch": 0.13615226428891752, + "grad_norm": 18680.767578125, + "learning_rate": 9.963154953044646e-06, + "loss": 2958.0572, + "step": 67400 + }, + { + "epoch": 0.13617246492160134, + "grad_norm": 15332.21875, + "learning_rate": 9.963112642411606e-06, + "loss": 1730.7779, + "step": 67410 + }, + { + "epoch": 0.13619266555428516, + "grad_norm": 9142.3427734375, + "learning_rate": 9.963070307588955e-06, + "loss": 1736.8973, + "step": 67420 + }, + { + "epoch": 0.13621286618696898, + "grad_norm": 54561.5078125, + "learning_rate": 9.963027948576902e-06, + "loss": 3645.0687, + "step": 67430 + }, + { + "epoch": 0.1362330668196528, + "grad_norm": 4749.7880859375, + "learning_rate": 9.96298556537565e-06, + "loss": 3133.107, + "step": 67440 + }, + { + "epoch": 0.1362532674523366, + "grad_norm": 32320.62109375, + "learning_rate": 9.962943157985412e-06, + "loss": 3933.6223, + "step": 67450 + }, + { + "epoch": 0.13627346808502042, + "grad_norm": 31164.787109375, + "learning_rate": 9.96290072640639e-06, + "loss": 2024.0855, + "step": 67460 + }, + { + "epoch": 0.13629366871770424, + "grad_norm": 9359.859375, + "learning_rate": 9.962858270638793e-06, + "loss": 2281.2781, + "step": 67470 + }, + { + "epoch": 0.13631386935038806, + "grad_norm": 52349.453125, + "learning_rate": 9.962815790682825e-06, + "loss": 1444.3185, + "step": 67480 + }, + { + "epoch": 0.13633406998307188, + "grad_norm": 547.71484375, + "learning_rate": 9.962773286538696e-06, + "loss": 905.7214, + "step": 67490 + }, + { + "epoch": 0.1363542706157557, + "grad_norm": 14744.7646484375, + "learning_rate": 9.962730758206612e-06, + "loss": 2030.4793, + "step": 67500 + }, + { + "epoch": 0.1363744712484395, + "grad_norm": 345.1991271972656, + "learning_rate": 9.962688205686778e-06, + "loss": 2524.2561, + "step": 67510 + }, + { + "epoch": 0.1363946718811233, + "grad_norm": 4671.14111328125, + "learning_rate": 9.962645628979406e-06, + "loss": 1489.5399, + "step": 67520 + }, + { + "epoch": 0.13641487251380713, + "grad_norm": 21353.541015625, + "learning_rate": 9.962603028084699e-06, + "loss": 1577.2139, + "step": 67530 + }, + { + "epoch": 0.13643507314649095, + "grad_norm": 367002.25, + "learning_rate": 9.962560403002868e-06, + "loss": 2753.0066, + "step": 67540 + }, + { + "epoch": 0.13645527377917477, + "grad_norm": 1125.194580078125, + "learning_rate": 9.96251775373412e-06, + "loss": 5288.9152, + "step": 67550 + }, + { + "epoch": 0.1364754744118586, + "grad_norm": 10338.615234375, + "learning_rate": 9.962475080278662e-06, + "loss": 3107.0383, + "step": 67560 + }, + { + "epoch": 0.1364956750445424, + "grad_norm": 8052.32080078125, + "learning_rate": 9.9624323826367e-06, + "loss": 2249.7596, + "step": 67570 + }, + { + "epoch": 0.1365158756772262, + "grad_norm": 4771.77294921875, + "learning_rate": 9.962389660808447e-06, + "loss": 2143.6537, + "step": 67580 + }, + { + "epoch": 0.13653607630991002, + "grad_norm": 76704.296875, + "learning_rate": 9.96234691479411e-06, + "loss": 4249.4211, + "step": 67590 + }, + { + "epoch": 0.13655627694259384, + "grad_norm": 2630.283447265625, + "learning_rate": 9.962304144593893e-06, + "loss": 1656.9691, + "step": 67600 + }, + { + "epoch": 0.13657647757527766, + "grad_norm": 137491.828125, + "learning_rate": 9.962261350208008e-06, + "loss": 3012.2273, + "step": 67610 + }, + { + "epoch": 0.13659667820796148, + "grad_norm": 10222.4189453125, + "learning_rate": 9.962218531636664e-06, + "loss": 5627.8012, + "step": 67620 + }, + { + "epoch": 0.1366168788406453, + "grad_norm": 167801.328125, + "learning_rate": 9.962175688880067e-06, + "loss": 3959.7688, + "step": 67630 + }, + { + "epoch": 0.1366370794733291, + "grad_norm": 13132.267578125, + "learning_rate": 9.96213282193843e-06, + "loss": 1655.4734, + "step": 67640 + }, + { + "epoch": 0.1366572801060129, + "grad_norm": 2522.949951171875, + "learning_rate": 9.962089930811959e-06, + "loss": 2755.542, + "step": 67650 + }, + { + "epoch": 0.13667748073869673, + "grad_norm": 16409.87109375, + "learning_rate": 9.962047015500861e-06, + "loss": 3621.116, + "step": 67660 + }, + { + "epoch": 0.13669768137138055, + "grad_norm": 146030.1875, + "learning_rate": 9.96200407600535e-06, + "loss": 3392.1164, + "step": 67670 + }, + { + "epoch": 0.13671788200406437, + "grad_norm": 11502.9951171875, + "learning_rate": 9.961961112325633e-06, + "loss": 2184.0238, + "step": 67680 + }, + { + "epoch": 0.1367380826367482, + "grad_norm": 18281.2109375, + "learning_rate": 9.961918124461918e-06, + "loss": 1506.1508, + "step": 67690 + }, + { + "epoch": 0.13675828326943198, + "grad_norm": 96383.859375, + "learning_rate": 9.961875112414417e-06, + "loss": 2133.0832, + "step": 67700 + }, + { + "epoch": 0.1367784839021158, + "grad_norm": 523.5892944335938, + "learning_rate": 9.961832076183337e-06, + "loss": 2201.4871, + "step": 67710 + }, + { + "epoch": 0.13679868453479962, + "grad_norm": 100735.1953125, + "learning_rate": 9.96178901576889e-06, + "loss": 2593.5854, + "step": 67720 + }, + { + "epoch": 0.13681888516748344, + "grad_norm": 67337.21875, + "learning_rate": 9.961745931171288e-06, + "loss": 2537.3725, + "step": 67730 + }, + { + "epoch": 0.13683908580016726, + "grad_norm": 5126.77685546875, + "learning_rate": 9.961702822390735e-06, + "loss": 1853.9645, + "step": 67740 + }, + { + "epoch": 0.13685928643285108, + "grad_norm": 747.7778930664062, + "learning_rate": 9.961659689427444e-06, + "loss": 3229.7027, + "step": 67750 + }, + { + "epoch": 0.1368794870655349, + "grad_norm": 1070.2283935546875, + "learning_rate": 9.961616532281626e-06, + "loss": 8126.4633, + "step": 67760 + }, + { + "epoch": 0.1368996876982187, + "grad_norm": 845.4146118164062, + "learning_rate": 9.961573350953491e-06, + "loss": 2497.7029, + "step": 67770 + }, + { + "epoch": 0.13691988833090252, + "grad_norm": 48362.15234375, + "learning_rate": 9.96153014544325e-06, + "loss": 4442.1184, + "step": 67780 + }, + { + "epoch": 0.13694008896358634, + "grad_norm": 47824.703125, + "learning_rate": 9.961486915751114e-06, + "loss": 2750.0891, + "step": 67790 + }, + { + "epoch": 0.13696028959627016, + "grad_norm": 8227.0068359375, + "learning_rate": 9.96144366187729e-06, + "loss": 2794.4453, + "step": 67800 + }, + { + "epoch": 0.13698049022895398, + "grad_norm": 5046.001953125, + "learning_rate": 9.961400383821992e-06, + "loss": 1443.286, + "step": 67810 + }, + { + "epoch": 0.1370006908616378, + "grad_norm": 24321.68359375, + "learning_rate": 9.96135708158543e-06, + "loss": 1958.1016, + "step": 67820 + }, + { + "epoch": 0.1370208914943216, + "grad_norm": 48315.1953125, + "learning_rate": 9.961313755167816e-06, + "loss": 3227.4244, + "step": 67830 + }, + { + "epoch": 0.1370410921270054, + "grad_norm": 22514.109375, + "learning_rate": 9.961270404569358e-06, + "loss": 3900.7926, + "step": 67840 + }, + { + "epoch": 0.13706129275968923, + "grad_norm": 6834.59765625, + "learning_rate": 9.961227029790272e-06, + "loss": 1696.7318, + "step": 67850 + }, + { + "epoch": 0.13708149339237305, + "grad_norm": 30401.0234375, + "learning_rate": 9.961183630830768e-06, + "loss": 1746.8273, + "step": 67860 + }, + { + "epoch": 0.13710169402505687, + "grad_norm": 10765.513671875, + "learning_rate": 9.961140207691055e-06, + "loss": 2208.851, + "step": 67870 + }, + { + "epoch": 0.1371218946577407, + "grad_norm": 9956.58984375, + "learning_rate": 9.961096760371349e-06, + "loss": 3025.5189, + "step": 67880 + }, + { + "epoch": 0.1371420952904245, + "grad_norm": 7355.84521484375, + "learning_rate": 9.961053288871855e-06, + "loss": 1147.3713, + "step": 67890 + }, + { + "epoch": 0.1371622959231083, + "grad_norm": 16806.876953125, + "learning_rate": 9.961009793192793e-06, + "loss": 996.5599, + "step": 67900 + }, + { + "epoch": 0.13718249655579212, + "grad_norm": 39381.953125, + "learning_rate": 9.96096627333437e-06, + "loss": 2894.6791, + "step": 67910 + }, + { + "epoch": 0.13720269718847594, + "grad_norm": 2986.8193359375, + "learning_rate": 9.960922729296797e-06, + "loss": 2009.6461, + "step": 67920 + }, + { + "epoch": 0.13722289782115976, + "grad_norm": 36104.53515625, + "learning_rate": 9.96087916108029e-06, + "loss": 1613.2924, + "step": 67930 + }, + { + "epoch": 0.13724309845384358, + "grad_norm": 15638.890625, + "learning_rate": 9.960835568685058e-06, + "loss": 2251.577, + "step": 67940 + }, + { + "epoch": 0.1372632990865274, + "grad_norm": 219025.4375, + "learning_rate": 9.960791952111318e-06, + "loss": 2744.9703, + "step": 67950 + }, + { + "epoch": 0.1372834997192112, + "grad_norm": 16748.65625, + "learning_rate": 9.960748311359278e-06, + "loss": 2274.8455, + "step": 67960 + }, + { + "epoch": 0.137303700351895, + "grad_norm": 1532.0299072265625, + "learning_rate": 9.96070464642915e-06, + "loss": 2893.0645, + "step": 67970 + }, + { + "epoch": 0.13732390098457883, + "grad_norm": 10176.7666015625, + "learning_rate": 9.960660957321153e-06, + "loss": 2313.1676, + "step": 67980 + }, + { + "epoch": 0.13734410161726265, + "grad_norm": 3893.560546875, + "learning_rate": 9.960617244035495e-06, + "loss": 3187.7943, + "step": 67990 + }, + { + "epoch": 0.13736430224994647, + "grad_norm": 24309.310546875, + "learning_rate": 9.960573506572391e-06, + "loss": 3001.5984, + "step": 68000 + }, + { + "epoch": 0.1373845028826303, + "grad_norm": 27068.89453125, + "learning_rate": 9.960529744932051e-06, + "loss": 2090.0881, + "step": 68010 + }, + { + "epoch": 0.13740470351531409, + "grad_norm": 351.0208435058594, + "learning_rate": 9.960485959114693e-06, + "loss": 759.1414, + "step": 68020 + }, + { + "epoch": 0.1374249041479979, + "grad_norm": 9055.30078125, + "learning_rate": 9.960442149120527e-06, + "loss": 2096.7674, + "step": 68030 + }, + { + "epoch": 0.13744510478068173, + "grad_norm": 2490.149169921875, + "learning_rate": 9.960398314949767e-06, + "loss": 2850.3184, + "step": 68040 + }, + { + "epoch": 0.13746530541336555, + "grad_norm": 1419.6868896484375, + "learning_rate": 9.960354456602627e-06, + "loss": 1016.2286, + "step": 68050 + }, + { + "epoch": 0.13748550604604937, + "grad_norm": 14241.8505859375, + "learning_rate": 9.960310574079324e-06, + "loss": 2646.8961, + "step": 68060 + }, + { + "epoch": 0.13750570667873319, + "grad_norm": 513.03125, + "learning_rate": 9.960266667380065e-06, + "loss": 609.7435, + "step": 68070 + }, + { + "epoch": 0.137525907311417, + "grad_norm": 87059.703125, + "learning_rate": 9.96022273650507e-06, + "loss": 1020.492, + "step": 68080 + }, + { + "epoch": 0.1375461079441008, + "grad_norm": 41093.609375, + "learning_rate": 9.96017878145455e-06, + "loss": 3708.7035, + "step": 68090 + }, + { + "epoch": 0.13756630857678462, + "grad_norm": 990.798828125, + "learning_rate": 9.960134802228722e-06, + "loss": 1300.1492, + "step": 68100 + }, + { + "epoch": 0.13758650920946844, + "grad_norm": 11106.37890625, + "learning_rate": 9.960090798827798e-06, + "loss": 3851.7469, + "step": 68110 + }, + { + "epoch": 0.13760670984215226, + "grad_norm": 14118.0517578125, + "learning_rate": 9.960046771251991e-06, + "loss": 4094.916, + "step": 68120 + }, + { + "epoch": 0.13762691047483608, + "grad_norm": 0.0, + "learning_rate": 9.96000271950152e-06, + "loss": 2059.3334, + "step": 68130 + }, + { + "epoch": 0.1376471111075199, + "grad_norm": 9659.8505859375, + "learning_rate": 9.959958643576597e-06, + "loss": 1001.7901, + "step": 68140 + }, + { + "epoch": 0.1376673117402037, + "grad_norm": 46992.0859375, + "learning_rate": 9.959914543477436e-06, + "loss": 1644.6969, + "step": 68150 + }, + { + "epoch": 0.1376875123728875, + "grad_norm": 2731.925048828125, + "learning_rate": 9.959870419204253e-06, + "loss": 1723.8406, + "step": 68160 + }, + { + "epoch": 0.13770771300557133, + "grad_norm": 17587.814453125, + "learning_rate": 9.959826270757265e-06, + "loss": 1405.5155, + "step": 68170 + }, + { + "epoch": 0.13772791363825515, + "grad_norm": 12664.6357421875, + "learning_rate": 9.959782098136683e-06, + "loss": 2316.91, + "step": 68180 + }, + { + "epoch": 0.13774811427093897, + "grad_norm": 13312.9111328125, + "learning_rate": 9.959737901342725e-06, + "loss": 1345.8937, + "step": 68190 + }, + { + "epoch": 0.1377683149036228, + "grad_norm": 29240.01953125, + "learning_rate": 9.959693680375608e-06, + "loss": 1983.1789, + "step": 68200 + }, + { + "epoch": 0.1377885155363066, + "grad_norm": 600.8543701171875, + "learning_rate": 9.959649435235543e-06, + "loss": 1925.8848, + "step": 68210 + }, + { + "epoch": 0.1378087161689904, + "grad_norm": 27805.634765625, + "learning_rate": 9.95960516592275e-06, + "loss": 2915.4738, + "step": 68220 + }, + { + "epoch": 0.13782891680167422, + "grad_norm": 349.16900634765625, + "learning_rate": 9.959560872437443e-06, + "loss": 3703.441, + "step": 68230 + }, + { + "epoch": 0.13784911743435804, + "grad_norm": 216602.8125, + "learning_rate": 9.959516554779838e-06, + "loss": 3071.0547, + "step": 68240 + }, + { + "epoch": 0.13786931806704186, + "grad_norm": 10905.447265625, + "learning_rate": 9.95947221295015e-06, + "loss": 2539.1623, + "step": 68250 + }, + { + "epoch": 0.13788951869972568, + "grad_norm": 22685.58203125, + "learning_rate": 9.959427846948595e-06, + "loss": 1094.4707, + "step": 68260 + }, + { + "epoch": 0.1379097193324095, + "grad_norm": 41065.7421875, + "learning_rate": 9.959383456775392e-06, + "loss": 3846.6484, + "step": 68270 + }, + { + "epoch": 0.1379299199650933, + "grad_norm": 186891.796875, + "learning_rate": 9.959339042430753e-06, + "loss": 4359.043, + "step": 68280 + }, + { + "epoch": 0.13795012059777711, + "grad_norm": 19323.216796875, + "learning_rate": 9.9592946039149e-06, + "loss": 2706.5732, + "step": 68290 + }, + { + "epoch": 0.13797032123046093, + "grad_norm": 13721.1201171875, + "learning_rate": 9.959250141228046e-06, + "loss": 1007.5845, + "step": 68300 + }, + { + "epoch": 0.13799052186314475, + "grad_norm": 24333.361328125, + "learning_rate": 9.959205654370406e-06, + "loss": 4701.7379, + "step": 68310 + }, + { + "epoch": 0.13801072249582857, + "grad_norm": 24144.0703125, + "learning_rate": 9.959161143342201e-06, + "loss": 2197.2414, + "step": 68320 + }, + { + "epoch": 0.1380309231285124, + "grad_norm": 18958.177734375, + "learning_rate": 9.959116608143647e-06, + "loss": 2495.5359, + "step": 68330 + }, + { + "epoch": 0.1380511237611962, + "grad_norm": 8158.8818359375, + "learning_rate": 9.959072048774958e-06, + "loss": 3190.4566, + "step": 68340 + }, + { + "epoch": 0.13807132439388, + "grad_norm": 1540.2279052734375, + "learning_rate": 9.959027465236354e-06, + "loss": 2534.5945, + "step": 68350 + }, + { + "epoch": 0.13809152502656383, + "grad_norm": 821.962158203125, + "learning_rate": 9.958982857528053e-06, + "loss": 1572.6336, + "step": 68360 + }, + { + "epoch": 0.13811172565924765, + "grad_norm": 355.7807922363281, + "learning_rate": 9.958938225650268e-06, + "loss": 4210.1059, + "step": 68370 + }, + { + "epoch": 0.13813192629193147, + "grad_norm": 32457.283203125, + "learning_rate": 9.958893569603222e-06, + "loss": 3711.4133, + "step": 68380 + }, + { + "epoch": 0.1381521269246153, + "grad_norm": 127738.4765625, + "learning_rate": 9.958848889387129e-06, + "loss": 2564.1711, + "step": 68390 + }, + { + "epoch": 0.1381723275572991, + "grad_norm": 49224.66015625, + "learning_rate": 9.958804185002209e-06, + "loss": 1990.2352, + "step": 68400 + }, + { + "epoch": 0.1381925281899829, + "grad_norm": 26026.50390625, + "learning_rate": 9.958759456448677e-06, + "loss": 2091.3439, + "step": 68410 + }, + { + "epoch": 0.13821272882266672, + "grad_norm": 54232.5703125, + "learning_rate": 9.958714703726755e-06, + "loss": 3903.0188, + "step": 68420 + }, + { + "epoch": 0.13823292945535054, + "grad_norm": 14108.271484375, + "learning_rate": 9.958669926836658e-06, + "loss": 1941.0285, + "step": 68430 + }, + { + "epoch": 0.13825313008803436, + "grad_norm": 15261.2685546875, + "learning_rate": 9.958625125778606e-06, + "loss": 2028.3348, + "step": 68440 + }, + { + "epoch": 0.13827333072071818, + "grad_norm": 237.34014892578125, + "learning_rate": 9.958580300552816e-06, + "loss": 3749.7238, + "step": 68450 + }, + { + "epoch": 0.138293531353402, + "grad_norm": 9321.74609375, + "learning_rate": 9.958535451159506e-06, + "loss": 1762.4838, + "step": 68460 + }, + { + "epoch": 0.1383137319860858, + "grad_norm": 49.38560485839844, + "learning_rate": 9.958490577598896e-06, + "loss": 1669.8242, + "step": 68470 + }, + { + "epoch": 0.1383339326187696, + "grad_norm": 4443.22607421875, + "learning_rate": 9.958445679871204e-06, + "loss": 1578.2394, + "step": 68480 + }, + { + "epoch": 0.13835413325145343, + "grad_norm": 113192.125, + "learning_rate": 9.958400757976651e-06, + "loss": 2199.1512, + "step": 68490 + }, + { + "epoch": 0.13837433388413725, + "grad_norm": 132479.515625, + "learning_rate": 9.958355811915452e-06, + "loss": 3708.8941, + "step": 68500 + }, + { + "epoch": 0.13839453451682107, + "grad_norm": 88287.09375, + "learning_rate": 9.95831084168783e-06, + "loss": 4976.6988, + "step": 68510 + }, + { + "epoch": 0.1384147351495049, + "grad_norm": 13947.53125, + "learning_rate": 9.958265847294001e-06, + "loss": 1425.5903, + "step": 68520 + }, + { + "epoch": 0.1384349357821887, + "grad_norm": 7910.4130859375, + "learning_rate": 9.958220828734187e-06, + "loss": 3208.0492, + "step": 68530 + }, + { + "epoch": 0.1384551364148725, + "grad_norm": 2270.76416015625, + "learning_rate": 9.958175786008605e-06, + "loss": 1015.6818, + "step": 68540 + }, + { + "epoch": 0.13847533704755632, + "grad_norm": 34738.2421875, + "learning_rate": 9.958130719117476e-06, + "loss": 1561.2709, + "step": 68550 + }, + { + "epoch": 0.13849553768024014, + "grad_norm": 742.0711059570312, + "learning_rate": 9.958085628061018e-06, + "loss": 2214.9047, + "step": 68560 + }, + { + "epoch": 0.13851573831292396, + "grad_norm": 7353.81689453125, + "learning_rate": 9.958040512839453e-06, + "loss": 1819.5205, + "step": 68570 + }, + { + "epoch": 0.13853593894560778, + "grad_norm": 39529.57421875, + "learning_rate": 9.957995373453e-06, + "loss": 3861.6305, + "step": 68580 + }, + { + "epoch": 0.1385561395782916, + "grad_norm": 8088.83935546875, + "learning_rate": 9.95795020990188e-06, + "loss": 2544.2004, + "step": 68590 + }, + { + "epoch": 0.1385763402109754, + "grad_norm": 10666.7763671875, + "learning_rate": 9.957905022186309e-06, + "loss": 1464.8115, + "step": 68600 + }, + { + "epoch": 0.13859654084365922, + "grad_norm": 579.4666137695312, + "learning_rate": 9.957859810306511e-06, + "loss": 2338.7682, + "step": 68610 + }, + { + "epoch": 0.13861674147634304, + "grad_norm": 67770.375, + "learning_rate": 9.957814574262707e-06, + "loss": 4027.7934, + "step": 68620 + }, + { + "epoch": 0.13863694210902686, + "grad_norm": 19119.927734375, + "learning_rate": 9.957769314055117e-06, + "loss": 1921.2, + "step": 68630 + }, + { + "epoch": 0.13865714274171068, + "grad_norm": 3843.26318359375, + "learning_rate": 9.957724029683958e-06, + "loss": 716.1168, + "step": 68640 + }, + { + "epoch": 0.1386773433743945, + "grad_norm": 135965.734375, + "learning_rate": 9.957678721149454e-06, + "loss": 1971.5879, + "step": 68650 + }, + { + "epoch": 0.1386975440070783, + "grad_norm": 53683.86328125, + "learning_rate": 9.957633388451827e-06, + "loss": 3070.2844, + "step": 68660 + }, + { + "epoch": 0.1387177446397621, + "grad_norm": 52801.96484375, + "learning_rate": 9.957588031591295e-06, + "loss": 1845.7232, + "step": 68670 + }, + { + "epoch": 0.13873794527244593, + "grad_norm": 1904.1265869140625, + "learning_rate": 9.957542650568079e-06, + "loss": 2335.1762, + "step": 68680 + }, + { + "epoch": 0.13875814590512975, + "grad_norm": 11446.666015625, + "learning_rate": 9.957497245382403e-06, + "loss": 752.4816, + "step": 68690 + }, + { + "epoch": 0.13877834653781357, + "grad_norm": 2120.017822265625, + "learning_rate": 9.957451816034487e-06, + "loss": 1925.7713, + "step": 68700 + }, + { + "epoch": 0.1387985471704974, + "grad_norm": 13111.1064453125, + "learning_rate": 9.95740636252455e-06, + "loss": 2345.0871, + "step": 68710 + }, + { + "epoch": 0.1388187478031812, + "grad_norm": 1207.4775390625, + "learning_rate": 9.957360884852819e-06, + "loss": 1509.1257, + "step": 68720 + }, + { + "epoch": 0.138838948435865, + "grad_norm": 79823.3828125, + "learning_rate": 9.95731538301951e-06, + "loss": 3104.1262, + "step": 68730 + }, + { + "epoch": 0.13885914906854882, + "grad_norm": 8863.8935546875, + "learning_rate": 9.957269857024847e-06, + "loss": 1580.6643, + "step": 68740 + }, + { + "epoch": 0.13887934970123264, + "grad_norm": 4954.45654296875, + "learning_rate": 9.957224306869053e-06, + "loss": 1671.7434, + "step": 68750 + }, + { + "epoch": 0.13889955033391646, + "grad_norm": 33334.90625, + "learning_rate": 9.957178732552348e-06, + "loss": 1932.4971, + "step": 68760 + }, + { + "epoch": 0.13891975096660028, + "grad_norm": 53108.265625, + "learning_rate": 9.957133134074955e-06, + "loss": 1965.7496, + "step": 68770 + }, + { + "epoch": 0.1389399515992841, + "grad_norm": 623.8773193359375, + "learning_rate": 9.957087511437099e-06, + "loss": 3973.3566, + "step": 68780 + }, + { + "epoch": 0.1389601522319679, + "grad_norm": 4841.29638671875, + "learning_rate": 9.957041864638997e-06, + "loss": 1149.1394, + "step": 68790 + }, + { + "epoch": 0.1389803528646517, + "grad_norm": 19323.826171875, + "learning_rate": 9.956996193680874e-06, + "loss": 2451.3506, + "step": 68800 + }, + { + "epoch": 0.13900055349733553, + "grad_norm": 13300.6220703125, + "learning_rate": 9.956950498562954e-06, + "loss": 1524.326, + "step": 68810 + }, + { + "epoch": 0.13902075413001935, + "grad_norm": 4437.96044921875, + "learning_rate": 9.956904779285457e-06, + "loss": 1716.6172, + "step": 68820 + }, + { + "epoch": 0.13904095476270317, + "grad_norm": 148662.46875, + "learning_rate": 9.956859035848608e-06, + "loss": 2963.55, + "step": 68830 + }, + { + "epoch": 0.139061155395387, + "grad_norm": 1885.716064453125, + "learning_rate": 9.95681326825263e-06, + "loss": 4084.2805, + "step": 68840 + }, + { + "epoch": 0.1390813560280708, + "grad_norm": 818.1267700195312, + "learning_rate": 9.956767476497745e-06, + "loss": 1991.8213, + "step": 68850 + }, + { + "epoch": 0.1391015566607546, + "grad_norm": 7981.06494140625, + "learning_rate": 9.956721660584175e-06, + "loss": 5366.3207, + "step": 68860 + }, + { + "epoch": 0.13912175729343843, + "grad_norm": 11696.43359375, + "learning_rate": 9.956675820512146e-06, + "loss": 4017.5605, + "step": 68870 + }, + { + "epoch": 0.13914195792612225, + "grad_norm": 57819.3671875, + "learning_rate": 9.956629956281881e-06, + "loss": 3102.2656, + "step": 68880 + }, + { + "epoch": 0.13916215855880607, + "grad_norm": 23230.20703125, + "learning_rate": 9.956584067893602e-06, + "loss": 1038.7036, + "step": 68890 + }, + { + "epoch": 0.13918235919148988, + "grad_norm": 4199.20263671875, + "learning_rate": 9.956538155347534e-06, + "loss": 1749.0758, + "step": 68900 + }, + { + "epoch": 0.1392025598241737, + "grad_norm": 62125.9296875, + "learning_rate": 9.9564922186439e-06, + "loss": 1487.9796, + "step": 68910 + }, + { + "epoch": 0.1392227604568575, + "grad_norm": 52059.4140625, + "learning_rate": 9.956446257782923e-06, + "loss": 2792.3256, + "step": 68920 + }, + { + "epoch": 0.13924296108954132, + "grad_norm": 31364.490234375, + "learning_rate": 9.95640027276483e-06, + "loss": 3631.9398, + "step": 68930 + }, + { + "epoch": 0.13926316172222514, + "grad_norm": 53677.91015625, + "learning_rate": 9.95635426358984e-06, + "loss": 3265.6953, + "step": 68940 + }, + { + "epoch": 0.13928336235490896, + "grad_norm": 51096.734375, + "learning_rate": 9.956308230258182e-06, + "loss": 2115.9979, + "step": 68950 + }, + { + "epoch": 0.13930356298759278, + "grad_norm": 9368.435546875, + "learning_rate": 9.956262172770082e-06, + "loss": 2768.7508, + "step": 68960 + }, + { + "epoch": 0.1393237636202766, + "grad_norm": 1912.8411865234375, + "learning_rate": 9.956216091125756e-06, + "loss": 2586.5062, + "step": 68970 + }, + { + "epoch": 0.1393439642529604, + "grad_norm": 12742.029296875, + "learning_rate": 9.956169985325438e-06, + "loss": 2711.4055, + "step": 68980 + }, + { + "epoch": 0.1393641648856442, + "grad_norm": 31140.951171875, + "learning_rate": 9.956123855369346e-06, + "loss": 3798.3906, + "step": 68990 + }, + { + "epoch": 0.13938436551832803, + "grad_norm": 1024.71142578125, + "learning_rate": 9.95607770125771e-06, + "loss": 1179.8317, + "step": 69000 + }, + { + "epoch": 0.13940456615101185, + "grad_norm": 2113.6025390625, + "learning_rate": 9.95603152299075e-06, + "loss": 3791.7914, + "step": 69010 + }, + { + "epoch": 0.13942476678369567, + "grad_norm": 980.563232421875, + "learning_rate": 9.955985320568696e-06, + "loss": 1013.4892, + "step": 69020 + }, + { + "epoch": 0.1394449674163795, + "grad_norm": 19478.775390625, + "learning_rate": 9.955939093991767e-06, + "loss": 1160.7498, + "step": 69030 + }, + { + "epoch": 0.1394651680490633, + "grad_norm": 12438.55078125, + "learning_rate": 9.955892843260195e-06, + "loss": 4889.2996, + "step": 69040 + }, + { + "epoch": 0.1394853686817471, + "grad_norm": 65893.734375, + "learning_rate": 9.955846568374201e-06, + "loss": 1599.1963, + "step": 69050 + }, + { + "epoch": 0.13950556931443092, + "grad_norm": 172992.8125, + "learning_rate": 9.955800269334013e-06, + "loss": 3518.4668, + "step": 69060 + }, + { + "epoch": 0.13952576994711474, + "grad_norm": 2807.778076171875, + "learning_rate": 9.955753946139855e-06, + "loss": 3001.167, + "step": 69070 + }, + { + "epoch": 0.13954597057979856, + "grad_norm": 12492.32421875, + "learning_rate": 9.955707598791952e-06, + "loss": 1761.6381, + "step": 69080 + }, + { + "epoch": 0.13956617121248238, + "grad_norm": 10845.85546875, + "learning_rate": 9.955661227290531e-06, + "loss": 4035.2738, + "step": 69090 + }, + { + "epoch": 0.1395863718451662, + "grad_norm": 6585.8984375, + "learning_rate": 9.95561483163582e-06, + "loss": 2639.4336, + "step": 69100 + }, + { + "epoch": 0.13960657247785, + "grad_norm": 13106.185546875, + "learning_rate": 9.955568411828043e-06, + "loss": 3225.624, + "step": 69110 + }, + { + "epoch": 0.13962677311053381, + "grad_norm": 12225.1279296875, + "learning_rate": 9.955521967867427e-06, + "loss": 3643.7859, + "step": 69120 + }, + { + "epoch": 0.13964697374321763, + "grad_norm": 19742.841796875, + "learning_rate": 9.955475499754197e-06, + "loss": 1092.8798, + "step": 69130 + }, + { + "epoch": 0.13966717437590145, + "grad_norm": 515.8137817382812, + "learning_rate": 9.955429007488582e-06, + "loss": 1648.0127, + "step": 69140 + }, + { + "epoch": 0.13968737500858527, + "grad_norm": 9688.05078125, + "learning_rate": 9.955382491070806e-06, + "loss": 1648.1813, + "step": 69150 + }, + { + "epoch": 0.1397075756412691, + "grad_norm": 3142.03271484375, + "learning_rate": 9.955335950501097e-06, + "loss": 959.8227, + "step": 69160 + }, + { + "epoch": 0.13972777627395291, + "grad_norm": 113779.3984375, + "learning_rate": 9.955289385779681e-06, + "loss": 3858.0113, + "step": 69170 + }, + { + "epoch": 0.1397479769066367, + "grad_norm": 20972.37890625, + "learning_rate": 9.955242796906785e-06, + "loss": 1304.8413, + "step": 69180 + }, + { + "epoch": 0.13976817753932053, + "grad_norm": 10405.8525390625, + "learning_rate": 9.955196183882637e-06, + "loss": 3221.4807, + "step": 69190 + }, + { + "epoch": 0.13978837817200435, + "grad_norm": 10964.546875, + "learning_rate": 9.955149546707465e-06, + "loss": 1626.7233, + "step": 69200 + }, + { + "epoch": 0.13980857880468817, + "grad_norm": 4395.56640625, + "learning_rate": 9.955102885381494e-06, + "loss": 2365.5354, + "step": 69210 + }, + { + "epoch": 0.13982877943737199, + "grad_norm": 38090.42578125, + "learning_rate": 9.955056199904953e-06, + "loss": 2628.4609, + "step": 69220 + }, + { + "epoch": 0.1398489800700558, + "grad_norm": 3504.706787109375, + "learning_rate": 9.955009490278069e-06, + "loss": 1875.3377, + "step": 69230 + }, + { + "epoch": 0.1398691807027396, + "grad_norm": 9158.7734375, + "learning_rate": 9.95496275650107e-06, + "loss": 2722.6604, + "step": 69240 + }, + { + "epoch": 0.13988938133542342, + "grad_norm": 3321.257080078125, + "learning_rate": 9.954915998574182e-06, + "loss": 712.6105, + "step": 69250 + }, + { + "epoch": 0.13990958196810724, + "grad_norm": 75.98146057128906, + "learning_rate": 9.954869216497636e-06, + "loss": 2145.8889, + "step": 69260 + }, + { + "epoch": 0.13992978260079106, + "grad_norm": 360.027587890625, + "learning_rate": 9.954822410271657e-06, + "loss": 1938.0309, + "step": 69270 + }, + { + "epoch": 0.13994998323347488, + "grad_norm": 33696.875, + "learning_rate": 9.954775579896476e-06, + "loss": 2238.0484, + "step": 69280 + }, + { + "epoch": 0.1399701838661587, + "grad_norm": 16479.19140625, + "learning_rate": 9.954728725372319e-06, + "loss": 1392.9764, + "step": 69290 + }, + { + "epoch": 0.1399903844988425, + "grad_norm": 12845.580078125, + "learning_rate": 9.954681846699414e-06, + "loss": 965.2918, + "step": 69300 + }, + { + "epoch": 0.1400105851315263, + "grad_norm": 27853.337890625, + "learning_rate": 9.954634943877993e-06, + "loss": 2478.8225, + "step": 69310 + }, + { + "epoch": 0.14003078576421013, + "grad_norm": 871.0928955078125, + "learning_rate": 9.95458801690828e-06, + "loss": 2890.5328, + "step": 69320 + }, + { + "epoch": 0.14005098639689395, + "grad_norm": 6409.82666015625, + "learning_rate": 9.954541065790509e-06, + "loss": 1886.2547, + "step": 69330 + }, + { + "epoch": 0.14007118702957777, + "grad_norm": 2357.6123046875, + "learning_rate": 9.954494090524903e-06, + "loss": 4784.7922, + "step": 69340 + }, + { + "epoch": 0.1400913876622616, + "grad_norm": 38990.40234375, + "learning_rate": 9.954447091111695e-06, + "loss": 1073.3432, + "step": 69350 + }, + { + "epoch": 0.1401115882949454, + "grad_norm": 55644.6015625, + "learning_rate": 9.95440006755111e-06, + "loss": 2994.4816, + "step": 69360 + }, + { + "epoch": 0.1401317889276292, + "grad_norm": 3510.14599609375, + "learning_rate": 9.954353019843384e-06, + "loss": 2686.1084, + "step": 69370 + }, + { + "epoch": 0.14015198956031302, + "grad_norm": 75368.4921875, + "learning_rate": 9.95430594798874e-06, + "loss": 2471.7186, + "step": 69380 + }, + { + "epoch": 0.14017219019299684, + "grad_norm": 167460.75, + "learning_rate": 9.954258851987411e-06, + "loss": 4623.6027, + "step": 69390 + }, + { + "epoch": 0.14019239082568066, + "grad_norm": 37070.328125, + "learning_rate": 9.954211731839623e-06, + "loss": 3842.4918, + "step": 69400 + }, + { + "epoch": 0.14021259145836448, + "grad_norm": 36382.578125, + "learning_rate": 9.95416458754561e-06, + "loss": 1842.9156, + "step": 69410 + }, + { + "epoch": 0.1402327920910483, + "grad_norm": 145881.953125, + "learning_rate": 9.954117419105599e-06, + "loss": 2297.4539, + "step": 69420 + }, + { + "epoch": 0.1402529927237321, + "grad_norm": 822.9708251953125, + "learning_rate": 9.95407022651982e-06, + "loss": 1934.5053, + "step": 69430 + }, + { + "epoch": 0.14027319335641592, + "grad_norm": 2285.23486328125, + "learning_rate": 9.954023009788505e-06, + "loss": 766.3743, + "step": 69440 + }, + { + "epoch": 0.14029339398909974, + "grad_norm": 31608.46484375, + "learning_rate": 9.953975768911881e-06, + "loss": 3033.2354, + "step": 69450 + }, + { + "epoch": 0.14031359462178356, + "grad_norm": 72925.171875, + "learning_rate": 9.953928503890181e-06, + "loss": 2169.7477, + "step": 69460 + }, + { + "epoch": 0.14033379525446738, + "grad_norm": 732.8397827148438, + "learning_rate": 9.953881214723636e-06, + "loss": 1404.0478, + "step": 69470 + }, + { + "epoch": 0.1403539958871512, + "grad_norm": 2096.177001953125, + "learning_rate": 9.95383390141247e-06, + "loss": 1865.9408, + "step": 69480 + }, + { + "epoch": 0.14037419651983502, + "grad_norm": 276.8294982910156, + "learning_rate": 9.953786563956923e-06, + "loss": 1565.0736, + "step": 69490 + }, + { + "epoch": 0.1403943971525188, + "grad_norm": 48867.76953125, + "learning_rate": 9.953739202357219e-06, + "loss": 4003.3449, + "step": 69500 + }, + { + "epoch": 0.14041459778520263, + "grad_norm": 161492.015625, + "learning_rate": 9.953691816613592e-06, + "loss": 2666.7904, + "step": 69510 + }, + { + "epoch": 0.14043479841788645, + "grad_norm": 33384.39453125, + "learning_rate": 9.95364440672627e-06, + "loss": 5451.0105, + "step": 69520 + }, + { + "epoch": 0.14045499905057027, + "grad_norm": 12843.0966796875, + "learning_rate": 9.953596972695487e-06, + "loss": 1568.2848, + "step": 69530 + }, + { + "epoch": 0.1404751996832541, + "grad_norm": 21490.759765625, + "learning_rate": 9.953549514521474e-06, + "loss": 2258.2441, + "step": 69540 + }, + { + "epoch": 0.1404954003159379, + "grad_norm": 983.0607299804688, + "learning_rate": 9.953502032204461e-06, + "loss": 856.244, + "step": 69550 + }, + { + "epoch": 0.1405156009486217, + "grad_norm": 0.0, + "learning_rate": 9.95345452574468e-06, + "loss": 3009.2172, + "step": 69560 + }, + { + "epoch": 0.14053580158130552, + "grad_norm": 1053.0111083984375, + "learning_rate": 9.95340699514236e-06, + "loss": 1687.9924, + "step": 69570 + }, + { + "epoch": 0.14055600221398934, + "grad_norm": 48434.9296875, + "learning_rate": 9.953359440397738e-06, + "loss": 2410.5949, + "step": 69580 + }, + { + "epoch": 0.14057620284667316, + "grad_norm": 42850.2890625, + "learning_rate": 9.953311861511043e-06, + "loss": 1277.2543, + "step": 69590 + }, + { + "epoch": 0.14059640347935698, + "grad_norm": 5607.14111328125, + "learning_rate": 9.953264258482505e-06, + "loss": 2324.727, + "step": 69600 + }, + { + "epoch": 0.1406166041120408, + "grad_norm": 7981.78857421875, + "learning_rate": 9.953216631312358e-06, + "loss": 824.2006, + "step": 69610 + }, + { + "epoch": 0.1406368047447246, + "grad_norm": 6064.63330078125, + "learning_rate": 9.953168980000836e-06, + "loss": 3807.9477, + "step": 69620 + }, + { + "epoch": 0.1406570053774084, + "grad_norm": 25910.037109375, + "learning_rate": 9.953121304548167e-06, + "loss": 1431.759, + "step": 69630 + }, + { + "epoch": 0.14067720601009223, + "grad_norm": 19596.12109375, + "learning_rate": 9.953073604954586e-06, + "loss": 3299.4582, + "step": 69640 + }, + { + "epoch": 0.14069740664277605, + "grad_norm": 3510.617919921875, + "learning_rate": 9.953025881220325e-06, + "loss": 2016.227, + "step": 69650 + }, + { + "epoch": 0.14071760727545987, + "grad_norm": 56819.25, + "learning_rate": 9.952978133345616e-06, + "loss": 1585.5008, + "step": 69660 + }, + { + "epoch": 0.1407378079081437, + "grad_norm": 5375.89208984375, + "learning_rate": 9.952930361330694e-06, + "loss": 2049.8463, + "step": 69670 + }, + { + "epoch": 0.1407580085408275, + "grad_norm": 1153.7623291015625, + "learning_rate": 9.952882565175788e-06, + "loss": 1980.5605, + "step": 69680 + }, + { + "epoch": 0.1407782091735113, + "grad_norm": 104714.2734375, + "learning_rate": 9.952834744881135e-06, + "loss": 1846.9887, + "step": 69690 + }, + { + "epoch": 0.14079840980619512, + "grad_norm": 55156.66015625, + "learning_rate": 9.952786900446964e-06, + "loss": 2151.4273, + "step": 69700 + }, + { + "epoch": 0.14081861043887894, + "grad_norm": 102140.2109375, + "learning_rate": 9.952739031873513e-06, + "loss": 2352.0797, + "step": 69710 + }, + { + "epoch": 0.14083881107156276, + "grad_norm": 73974.59375, + "learning_rate": 9.952691139161012e-06, + "loss": 3265.9297, + "step": 69720 + }, + { + "epoch": 0.14085901170424658, + "grad_norm": 24660.21484375, + "learning_rate": 9.952643222309694e-06, + "loss": 887.9188, + "step": 69730 + }, + { + "epoch": 0.1408792123369304, + "grad_norm": 4628.75732421875, + "learning_rate": 9.952595281319794e-06, + "loss": 3079.9293, + "step": 69740 + }, + { + "epoch": 0.1408994129696142, + "grad_norm": 3579.981201171875, + "learning_rate": 9.952547316191545e-06, + "loss": 767.4115, + "step": 69750 + }, + { + "epoch": 0.14091961360229802, + "grad_norm": 39978.86328125, + "learning_rate": 9.95249932692518e-06, + "loss": 2872.5848, + "step": 69760 + }, + { + "epoch": 0.14093981423498184, + "grad_norm": 7324.0234375, + "learning_rate": 9.952451313520937e-06, + "loss": 3840.1637, + "step": 69770 + }, + { + "epoch": 0.14096001486766566, + "grad_norm": 12607.6044921875, + "learning_rate": 9.952403275979046e-06, + "loss": 2934.1127, + "step": 69780 + }, + { + "epoch": 0.14098021550034948, + "grad_norm": 16683.66796875, + "learning_rate": 9.95235521429974e-06, + "loss": 3115.1182, + "step": 69790 + }, + { + "epoch": 0.1410004161330333, + "grad_norm": 17452.693359375, + "learning_rate": 9.952307128483257e-06, + "loss": 1009.1519, + "step": 69800 + }, + { + "epoch": 0.14102061676571712, + "grad_norm": 23612.2578125, + "learning_rate": 9.952259018529829e-06, + "loss": 3789.9594, + "step": 69810 + }, + { + "epoch": 0.1410408173984009, + "grad_norm": 65915.28125, + "learning_rate": 9.952210884439693e-06, + "loss": 1668.909, + "step": 69820 + }, + { + "epoch": 0.14106101803108473, + "grad_norm": 23596.521484375, + "learning_rate": 9.95216272621308e-06, + "loss": 885.9751, + "step": 69830 + }, + { + "epoch": 0.14108121866376855, + "grad_norm": 35682.0703125, + "learning_rate": 9.952114543850227e-06, + "loss": 2820.6088, + "step": 69840 + }, + { + "epoch": 0.14110141929645237, + "grad_norm": 27730.841796875, + "learning_rate": 9.952066337351367e-06, + "loss": 717.3248, + "step": 69850 + }, + { + "epoch": 0.1411216199291362, + "grad_norm": 1169.9169921875, + "learning_rate": 9.952018106716737e-06, + "loss": 2073.3352, + "step": 69860 + }, + { + "epoch": 0.14114182056182, + "grad_norm": 45497.25390625, + "learning_rate": 9.951969851946573e-06, + "loss": 2829.8475, + "step": 69870 + }, + { + "epoch": 0.1411620211945038, + "grad_norm": 4098.5390625, + "learning_rate": 9.951921573041107e-06, + "loss": 2508.9303, + "step": 69880 + }, + { + "epoch": 0.14118222182718762, + "grad_norm": 19964.638671875, + "learning_rate": 9.951873270000576e-06, + "loss": 2965.6021, + "step": 69890 + }, + { + "epoch": 0.14120242245987144, + "grad_norm": 58893.13671875, + "learning_rate": 9.951824942825215e-06, + "loss": 4045.007, + "step": 69900 + }, + { + "epoch": 0.14122262309255526, + "grad_norm": 14543.2099609375, + "learning_rate": 9.951776591515262e-06, + "loss": 4049.7973, + "step": 69910 + }, + { + "epoch": 0.14124282372523908, + "grad_norm": 41182.00390625, + "learning_rate": 9.951728216070949e-06, + "loss": 1604.3729, + "step": 69920 + }, + { + "epoch": 0.1412630243579229, + "grad_norm": 5411.2119140625, + "learning_rate": 9.951679816492513e-06, + "loss": 1606.9449, + "step": 69930 + }, + { + "epoch": 0.1412832249906067, + "grad_norm": 23112.3359375, + "learning_rate": 9.951631392780189e-06, + "loss": 2435.9873, + "step": 69940 + }, + { + "epoch": 0.1413034256232905, + "grad_norm": 127835.5390625, + "learning_rate": 9.951582944934215e-06, + "loss": 3548.6723, + "step": 69950 + }, + { + "epoch": 0.14132362625597433, + "grad_norm": 348306.0625, + "learning_rate": 9.951534472954826e-06, + "loss": 1787.01, + "step": 69960 + }, + { + "epoch": 0.14134382688865815, + "grad_norm": 3748.863525390625, + "learning_rate": 9.95148597684226e-06, + "loss": 2114.1697, + "step": 69970 + }, + { + "epoch": 0.14136402752134197, + "grad_norm": 15473.8837890625, + "learning_rate": 9.951437456596751e-06, + "loss": 794.9573, + "step": 69980 + }, + { + "epoch": 0.1413842281540258, + "grad_norm": 23893.927734375, + "learning_rate": 9.951388912218536e-06, + "loss": 3316.4313, + "step": 69990 + }, + { + "epoch": 0.1414044287867096, + "grad_norm": 29176.1796875, + "learning_rate": 9.951340343707852e-06, + "loss": 997.8446, + "step": 70000 + }, + { + "epoch": 0.1414246294193934, + "grad_norm": 18842.640625, + "learning_rate": 9.951291751064937e-06, + "loss": 1748.1045, + "step": 70010 + }, + { + "epoch": 0.14144483005207723, + "grad_norm": 22229.6953125, + "learning_rate": 9.951243134290025e-06, + "loss": 2918.4643, + "step": 70020 + }, + { + "epoch": 0.14146503068476105, + "grad_norm": 61569.02734375, + "learning_rate": 9.951194493383355e-06, + "loss": 2714.5574, + "step": 70030 + }, + { + "epoch": 0.14148523131744487, + "grad_norm": 107245.5078125, + "learning_rate": 9.951145828345163e-06, + "loss": 2573.9791, + "step": 70040 + }, + { + "epoch": 0.14150543195012869, + "grad_norm": 128326.9453125, + "learning_rate": 9.951097139175688e-06, + "loss": 1974.0447, + "step": 70050 + }, + { + "epoch": 0.1415256325828125, + "grad_norm": 62327.421875, + "learning_rate": 9.951048425875165e-06, + "loss": 1649.5533, + "step": 70060 + }, + { + "epoch": 0.1415458332154963, + "grad_norm": 41377.53125, + "learning_rate": 9.950999688443833e-06, + "loss": 1432.5377, + "step": 70070 + }, + { + "epoch": 0.14156603384818012, + "grad_norm": 77944.546875, + "learning_rate": 9.950950926881928e-06, + "loss": 3530.9141, + "step": 70080 + }, + { + "epoch": 0.14158623448086394, + "grad_norm": 112966.1640625, + "learning_rate": 9.950902141189691e-06, + "loss": 3068.6885, + "step": 70090 + }, + { + "epoch": 0.14160643511354776, + "grad_norm": 18854.900390625, + "learning_rate": 9.950853331367356e-06, + "loss": 3903.198, + "step": 70100 + }, + { + "epoch": 0.14162663574623158, + "grad_norm": 329.3848876953125, + "learning_rate": 9.95080449741516e-06, + "loss": 2626.1254, + "step": 70110 + }, + { + "epoch": 0.1416468363789154, + "grad_norm": 19517.201171875, + "learning_rate": 9.950755639333347e-06, + "loss": 3346.0512, + "step": 70120 + }, + { + "epoch": 0.1416670370115992, + "grad_norm": 49590.3125, + "learning_rate": 9.95070675712215e-06, + "loss": 3246.118, + "step": 70130 + }, + { + "epoch": 0.141687237644283, + "grad_norm": 51585.18359375, + "learning_rate": 9.950657850781809e-06, + "loss": 1109.0449, + "step": 70140 + }, + { + "epoch": 0.14170743827696683, + "grad_norm": 3176.756591796875, + "learning_rate": 9.95060892031256e-06, + "loss": 2461.8414, + "step": 70150 + }, + { + "epoch": 0.14172763890965065, + "grad_norm": 24159.21875, + "learning_rate": 9.950559965714647e-06, + "loss": 4028.1543, + "step": 70160 + }, + { + "epoch": 0.14174783954233447, + "grad_norm": 943.9586791992188, + "learning_rate": 9.950510986988304e-06, + "loss": 1322.5328, + "step": 70170 + }, + { + "epoch": 0.1417680401750183, + "grad_norm": 20308.30859375, + "learning_rate": 9.95046198413377e-06, + "loss": 1602.1047, + "step": 70180 + }, + { + "epoch": 0.1417882408077021, + "grad_norm": 14541.3388671875, + "learning_rate": 9.950412957151286e-06, + "loss": 2178.9328, + "step": 70190 + }, + { + "epoch": 0.1418084414403859, + "grad_norm": 0.0, + "learning_rate": 9.950363906041089e-06, + "loss": 2673.1822, + "step": 70200 + }, + { + "epoch": 0.14182864207306972, + "grad_norm": 37085.7421875, + "learning_rate": 9.950314830803418e-06, + "loss": 1217.3025, + "step": 70210 + }, + { + "epoch": 0.14184884270575354, + "grad_norm": 142326.40625, + "learning_rate": 9.950265731438513e-06, + "loss": 3758.8254, + "step": 70220 + }, + { + "epoch": 0.14186904333843736, + "grad_norm": 31224.32421875, + "learning_rate": 9.950216607946614e-06, + "loss": 3626.223, + "step": 70230 + }, + { + "epoch": 0.14188924397112118, + "grad_norm": 176543.828125, + "learning_rate": 9.95016746032796e-06, + "loss": 2474.1895, + "step": 70240 + }, + { + "epoch": 0.141909444603805, + "grad_norm": 668.6470947265625, + "learning_rate": 9.95011828858279e-06, + "loss": 3429.5559, + "step": 70250 + }, + { + "epoch": 0.1419296452364888, + "grad_norm": 10654.2939453125, + "learning_rate": 9.950069092711342e-06, + "loss": 1847.4799, + "step": 70260 + }, + { + "epoch": 0.14194984586917261, + "grad_norm": 133.56060791015625, + "learning_rate": 9.950019872713858e-06, + "loss": 3573.3012, + "step": 70270 + }, + { + "epoch": 0.14197004650185643, + "grad_norm": 3750.90625, + "learning_rate": 9.94997062859058e-06, + "loss": 605.085, + "step": 70280 + }, + { + "epoch": 0.14199024713454025, + "grad_norm": 36342.578125, + "learning_rate": 9.949921360341743e-06, + "loss": 1551.4841, + "step": 70290 + }, + { + "epoch": 0.14201044776722407, + "grad_norm": 413.94976806640625, + "learning_rate": 9.94987206796759e-06, + "loss": 946.3315, + "step": 70300 + }, + { + "epoch": 0.1420306483999079, + "grad_norm": 10104.3037109375, + "learning_rate": 9.949822751468364e-06, + "loss": 2252.9695, + "step": 70310 + }, + { + "epoch": 0.14205084903259171, + "grad_norm": 6788.37548828125, + "learning_rate": 9.949773410844299e-06, + "loss": 3278.9441, + "step": 70320 + }, + { + "epoch": 0.1420710496652755, + "grad_norm": 6184.2509765625, + "learning_rate": 9.94972404609564e-06, + "loss": 2051.2168, + "step": 70330 + }, + { + "epoch": 0.14209125029795933, + "grad_norm": 14527.431640625, + "learning_rate": 9.949674657222624e-06, + "loss": 2482.0473, + "step": 70340 + }, + { + "epoch": 0.14211145093064315, + "grad_norm": 107488.625, + "learning_rate": 9.949625244225496e-06, + "loss": 2648.902, + "step": 70350 + }, + { + "epoch": 0.14213165156332697, + "grad_norm": 34974.8359375, + "learning_rate": 9.949575807104494e-06, + "loss": 888.4271, + "step": 70360 + }, + { + "epoch": 0.1421518521960108, + "grad_norm": 32829.8671875, + "learning_rate": 9.94952634585986e-06, + "loss": 3806.0027, + "step": 70370 + }, + { + "epoch": 0.1421720528286946, + "grad_norm": 1092.3905029296875, + "learning_rate": 9.949476860491836e-06, + "loss": 2863.7355, + "step": 70380 + }, + { + "epoch": 0.1421922534613784, + "grad_norm": 40367.0546875, + "learning_rate": 9.949427351000662e-06, + "loss": 1699.8635, + "step": 70390 + }, + { + "epoch": 0.14221245409406222, + "grad_norm": 13123.0, + "learning_rate": 9.94937781738658e-06, + "loss": 1616.926, + "step": 70400 + }, + { + "epoch": 0.14223265472674604, + "grad_norm": 2140.8837890625, + "learning_rate": 9.949328259649828e-06, + "loss": 594.0263, + "step": 70410 + }, + { + "epoch": 0.14225285535942986, + "grad_norm": 2130.658203125, + "learning_rate": 9.949278677790653e-06, + "loss": 1987.2092, + "step": 70420 + }, + { + "epoch": 0.14227305599211368, + "grad_norm": 15846.0068359375, + "learning_rate": 9.949229071809294e-06, + "loss": 1287.1876, + "step": 70430 + }, + { + "epoch": 0.1422932566247975, + "grad_norm": 7311.67431640625, + "learning_rate": 9.949179441705992e-06, + "loss": 2866.0637, + "step": 70440 + }, + { + "epoch": 0.1423134572574813, + "grad_norm": 238.09071350097656, + "learning_rate": 9.949129787480988e-06, + "loss": 1124.6331, + "step": 70450 + }, + { + "epoch": 0.1423336578901651, + "grad_norm": 13192.3623046875, + "learning_rate": 9.949080109134528e-06, + "loss": 2191.9957, + "step": 70460 + }, + { + "epoch": 0.14235385852284893, + "grad_norm": 20609.9375, + "learning_rate": 9.949030406666852e-06, + "loss": 5172.4227, + "step": 70470 + }, + { + "epoch": 0.14237405915553275, + "grad_norm": 4164.42529296875, + "learning_rate": 9.948980680078199e-06, + "loss": 2409.4969, + "step": 70480 + }, + { + "epoch": 0.14239425978821657, + "grad_norm": 11089.1884765625, + "learning_rate": 9.948930929368818e-06, + "loss": 2108.292, + "step": 70490 + }, + { + "epoch": 0.1424144604209004, + "grad_norm": 99487.25, + "learning_rate": 9.948881154538946e-06, + "loss": 2357.6979, + "step": 70500 + }, + { + "epoch": 0.1424346610535842, + "grad_norm": 36591.67578125, + "learning_rate": 9.948831355588828e-06, + "loss": 3986.3051, + "step": 70510 + }, + { + "epoch": 0.142454861686268, + "grad_norm": 20209.37890625, + "learning_rate": 9.948781532518706e-06, + "loss": 2531.7477, + "step": 70520 + }, + { + "epoch": 0.14247506231895182, + "grad_norm": 5274.80615234375, + "learning_rate": 9.948731685328823e-06, + "loss": 2232.9205, + "step": 70530 + }, + { + "epoch": 0.14249526295163564, + "grad_norm": 2595.41943359375, + "learning_rate": 9.948681814019421e-06, + "loss": 3157.8998, + "step": 70540 + }, + { + "epoch": 0.14251546358431946, + "grad_norm": 13629.1259765625, + "learning_rate": 9.948631918590746e-06, + "loss": 2137.2205, + "step": 70550 + }, + { + "epoch": 0.14253566421700328, + "grad_norm": 107723.703125, + "learning_rate": 9.948581999043038e-06, + "loss": 3864.575, + "step": 70560 + }, + { + "epoch": 0.1425558648496871, + "grad_norm": 237280.421875, + "learning_rate": 9.948532055376541e-06, + "loss": 4075.1406, + "step": 70570 + }, + { + "epoch": 0.1425760654823709, + "grad_norm": 58787.703125, + "learning_rate": 9.9484820875915e-06, + "loss": 1781.3449, + "step": 70580 + }, + { + "epoch": 0.14259626611505472, + "grad_norm": 1140.3209228515625, + "learning_rate": 9.948432095688157e-06, + "loss": 1246.0867, + "step": 70590 + }, + { + "epoch": 0.14261646674773854, + "grad_norm": 10705.2431640625, + "learning_rate": 9.948382079666756e-06, + "loss": 1496.4299, + "step": 70600 + }, + { + "epoch": 0.14263666738042236, + "grad_norm": 1913.0086669921875, + "learning_rate": 9.948332039527541e-06, + "loss": 4496.9148, + "step": 70610 + }, + { + "epoch": 0.14265686801310618, + "grad_norm": 9299.4443359375, + "learning_rate": 9.948281975270758e-06, + "loss": 1283.3947, + "step": 70620 + }, + { + "epoch": 0.14267706864579, + "grad_norm": 1440.154296875, + "learning_rate": 9.948231886896646e-06, + "loss": 683.2662, + "step": 70630 + }, + { + "epoch": 0.14269726927847382, + "grad_norm": 71298.875, + "learning_rate": 9.948181774405453e-06, + "loss": 2307.5389, + "step": 70640 + }, + { + "epoch": 0.1427174699111576, + "grad_norm": 1312.6038818359375, + "learning_rate": 9.94813163779742e-06, + "loss": 2270.3197, + "step": 70650 + }, + { + "epoch": 0.14273767054384143, + "grad_norm": 15307.3173828125, + "learning_rate": 9.948081477072797e-06, + "loss": 1757.2209, + "step": 70660 + }, + { + "epoch": 0.14275787117652525, + "grad_norm": 6428.525390625, + "learning_rate": 9.948031292231823e-06, + "loss": 3608.7453, + "step": 70670 + }, + { + "epoch": 0.14277807180920907, + "grad_norm": 8457.337890625, + "learning_rate": 9.947981083274747e-06, + "loss": 958.2712, + "step": 70680 + }, + { + "epoch": 0.1427982724418929, + "grad_norm": 3107.658935546875, + "learning_rate": 9.947930850201808e-06, + "loss": 3044.6859, + "step": 70690 + }, + { + "epoch": 0.1428184730745767, + "grad_norm": 28073.46484375, + "learning_rate": 9.947880593013256e-06, + "loss": 1483.8324, + "step": 70700 + }, + { + "epoch": 0.1428386737072605, + "grad_norm": 8594.4873046875, + "learning_rate": 9.947830311709333e-06, + "loss": 1399.3272, + "step": 70710 + }, + { + "epoch": 0.14285887433994432, + "grad_norm": 5207.89306640625, + "learning_rate": 9.947780006290287e-06, + "loss": 3236.8635, + "step": 70720 + }, + { + "epoch": 0.14287907497262814, + "grad_norm": 2599.56982421875, + "learning_rate": 9.947729676756359e-06, + "loss": 1105.3124, + "step": 70730 + }, + { + "epoch": 0.14289927560531196, + "grad_norm": 8764.7119140625, + "learning_rate": 9.947679323107798e-06, + "loss": 2737.7584, + "step": 70740 + }, + { + "epoch": 0.14291947623799578, + "grad_norm": 2091.064697265625, + "learning_rate": 9.947628945344849e-06, + "loss": 3235.6479, + "step": 70750 + }, + { + "epoch": 0.1429396768706796, + "grad_norm": 4667.0859375, + "learning_rate": 9.947578543467755e-06, + "loss": 1771.31, + "step": 70760 + }, + { + "epoch": 0.1429598775033634, + "grad_norm": 21074.63671875, + "learning_rate": 9.947528117476764e-06, + "loss": 2844.524, + "step": 70770 + }, + { + "epoch": 0.1429800781360472, + "grad_norm": 161759.5, + "learning_rate": 9.94747766737212e-06, + "loss": 3875.1711, + "step": 70780 + }, + { + "epoch": 0.14300027876873103, + "grad_norm": 0.0, + "learning_rate": 9.94742719315407e-06, + "loss": 2663.9809, + "step": 70790 + }, + { + "epoch": 0.14302047940141485, + "grad_norm": 685.3599243164062, + "learning_rate": 9.947376694822861e-06, + "loss": 1703.5607, + "step": 70800 + }, + { + "epoch": 0.14304068003409867, + "grad_norm": 65602.625, + "learning_rate": 9.947326172378736e-06, + "loss": 1740.2992, + "step": 70810 + }, + { + "epoch": 0.1430608806667825, + "grad_norm": 105725.9921875, + "learning_rate": 9.947275625821947e-06, + "loss": 2770.0293, + "step": 70820 + }, + { + "epoch": 0.1430810812994663, + "grad_norm": 5654.9541015625, + "learning_rate": 9.947225055152735e-06, + "loss": 1838.4582, + "step": 70830 + }, + { + "epoch": 0.1431012819321501, + "grad_norm": 9195.333984375, + "learning_rate": 9.947174460371347e-06, + "loss": 2894.8965, + "step": 70840 + }, + { + "epoch": 0.14312148256483392, + "grad_norm": 5550.04345703125, + "learning_rate": 9.947123841478032e-06, + "loss": 2509.8037, + "step": 70850 + }, + { + "epoch": 0.14314168319751774, + "grad_norm": 28083.31640625, + "learning_rate": 9.947073198473034e-06, + "loss": 1752.0051, + "step": 70860 + }, + { + "epoch": 0.14316188383020156, + "grad_norm": 24017.564453125, + "learning_rate": 9.947022531356602e-06, + "loss": 2042.108, + "step": 70870 + }, + { + "epoch": 0.14318208446288538, + "grad_norm": 1291.4371337890625, + "learning_rate": 9.946971840128982e-06, + "loss": 1742.5703, + "step": 70880 + }, + { + "epoch": 0.1432022850955692, + "grad_norm": 128647.453125, + "learning_rate": 9.94692112479042e-06, + "loss": 2074.9805, + "step": 70890 + }, + { + "epoch": 0.143222485728253, + "grad_norm": 61472.6640625, + "learning_rate": 9.946870385341167e-06, + "loss": 1309.8016, + "step": 70900 + }, + { + "epoch": 0.14324268636093682, + "grad_norm": 11376.2998046875, + "learning_rate": 9.946819621781467e-06, + "loss": 1271.4098, + "step": 70910 + }, + { + "epoch": 0.14326288699362064, + "grad_norm": 104124.5234375, + "learning_rate": 9.946768834111568e-06, + "loss": 3060.6461, + "step": 70920 + }, + { + "epoch": 0.14328308762630446, + "grad_norm": 7943.78759765625, + "learning_rate": 9.946718022331715e-06, + "loss": 2844.4695, + "step": 70930 + }, + { + "epoch": 0.14330328825898828, + "grad_norm": 11620.7265625, + "learning_rate": 9.946667186442162e-06, + "loss": 1630.5733, + "step": 70940 + }, + { + "epoch": 0.1433234888916721, + "grad_norm": 165602.265625, + "learning_rate": 9.946616326443153e-06, + "loss": 3779.5344, + "step": 70950 + }, + { + "epoch": 0.14334368952435592, + "grad_norm": 29047.18359375, + "learning_rate": 9.946565442334935e-06, + "loss": 1979.1092, + "step": 70960 + }, + { + "epoch": 0.1433638901570397, + "grad_norm": 3758.068603515625, + "learning_rate": 9.946514534117755e-06, + "loss": 1327.8086, + "step": 70970 + }, + { + "epoch": 0.14338409078972353, + "grad_norm": 38944.58984375, + "learning_rate": 9.946463601791865e-06, + "loss": 3299.3828, + "step": 70980 + }, + { + "epoch": 0.14340429142240735, + "grad_norm": 6988.1142578125, + "learning_rate": 9.94641264535751e-06, + "loss": 2189.976, + "step": 70990 + }, + { + "epoch": 0.14342449205509117, + "grad_norm": 4823.9716796875, + "learning_rate": 9.946361664814942e-06, + "loss": 2780.5352, + "step": 71000 + }, + { + "epoch": 0.143444692687775, + "grad_norm": 703.5529174804688, + "learning_rate": 9.946310660164407e-06, + "loss": 755.0899, + "step": 71010 + }, + { + "epoch": 0.1434648933204588, + "grad_norm": 22866.060546875, + "learning_rate": 9.946259631406153e-06, + "loss": 990.7439, + "step": 71020 + }, + { + "epoch": 0.1434850939531426, + "grad_norm": 44013.125, + "learning_rate": 9.946208578540428e-06, + "loss": 2413.6816, + "step": 71030 + }, + { + "epoch": 0.14350529458582642, + "grad_norm": 25966.66015625, + "learning_rate": 9.946157501567484e-06, + "loss": 1877.3307, + "step": 71040 + }, + { + "epoch": 0.14352549521851024, + "grad_norm": 147983.71875, + "learning_rate": 9.946106400487568e-06, + "loss": 1309.4104, + "step": 71050 + }, + { + "epoch": 0.14354569585119406, + "grad_norm": 25832.146484375, + "learning_rate": 9.946055275300929e-06, + "loss": 1459.8026, + "step": 71060 + }, + { + "epoch": 0.14356589648387788, + "grad_norm": 69665.078125, + "learning_rate": 9.946004126007817e-06, + "loss": 1907.6937, + "step": 71070 + }, + { + "epoch": 0.1435860971165617, + "grad_norm": 51430.84375, + "learning_rate": 9.94595295260848e-06, + "loss": 2358.5395, + "step": 71080 + }, + { + "epoch": 0.1436062977492455, + "grad_norm": 0.0, + "learning_rate": 9.945901755103169e-06, + "loss": 2404.0645, + "step": 71090 + }, + { + "epoch": 0.1436264983819293, + "grad_norm": 19309.990234375, + "learning_rate": 9.945850533492132e-06, + "loss": 1901.0504, + "step": 71100 + }, + { + "epoch": 0.14364669901461313, + "grad_norm": 18452.4453125, + "learning_rate": 9.94579928777562e-06, + "loss": 2209.5756, + "step": 71110 + }, + { + "epoch": 0.14366689964729695, + "grad_norm": 15105.986328125, + "learning_rate": 9.94574801795388e-06, + "loss": 2409.518, + "step": 71120 + }, + { + "epoch": 0.14368710027998077, + "grad_norm": 2876.24658203125, + "learning_rate": 9.945696724027166e-06, + "loss": 3158.9941, + "step": 71130 + }, + { + "epoch": 0.1437073009126646, + "grad_norm": 12994.6015625, + "learning_rate": 9.945645405995726e-06, + "loss": 1469.3365, + "step": 71140 + }, + { + "epoch": 0.1437275015453484, + "grad_norm": 6139.6435546875, + "learning_rate": 9.94559406385981e-06, + "loss": 777.052, + "step": 71150 + }, + { + "epoch": 0.1437477021780322, + "grad_norm": 6118.8349609375, + "learning_rate": 9.945542697619667e-06, + "loss": 1697.067, + "step": 71160 + }, + { + "epoch": 0.14376790281071603, + "grad_norm": 32092.87109375, + "learning_rate": 9.94549130727555e-06, + "loss": 2330.1189, + "step": 71170 + }, + { + "epoch": 0.14378810344339985, + "grad_norm": 4367.5146484375, + "learning_rate": 9.945439892827709e-06, + "loss": 1175.5039, + "step": 71180 + }, + { + "epoch": 0.14380830407608367, + "grad_norm": 16455.52734375, + "learning_rate": 9.945388454276392e-06, + "loss": 1418.1895, + "step": 71190 + }, + { + "epoch": 0.14382850470876749, + "grad_norm": 2308.704833984375, + "learning_rate": 9.945336991621854e-06, + "loss": 5106.3512, + "step": 71200 + }, + { + "epoch": 0.1438487053414513, + "grad_norm": 21415.552734375, + "learning_rate": 9.945285504864342e-06, + "loss": 1873.4188, + "step": 71210 + }, + { + "epoch": 0.1438689059741351, + "grad_norm": 5071.76611328125, + "learning_rate": 9.945233994004107e-06, + "loss": 1491.304, + "step": 71220 + }, + { + "epoch": 0.14388910660681892, + "grad_norm": 3639.6572265625, + "learning_rate": 9.945182459041403e-06, + "loss": 1903.2023, + "step": 71230 + }, + { + "epoch": 0.14390930723950274, + "grad_norm": 18446.302734375, + "learning_rate": 9.945130899976477e-06, + "loss": 2504.5805, + "step": 71240 + }, + { + "epoch": 0.14392950787218656, + "grad_norm": 4328.939453125, + "learning_rate": 9.945079316809585e-06, + "loss": 626.2024, + "step": 71250 + }, + { + "epoch": 0.14394970850487038, + "grad_norm": 45829.73046875, + "learning_rate": 9.945027709540975e-06, + "loss": 2564.0104, + "step": 71260 + }, + { + "epoch": 0.1439699091375542, + "grad_norm": 19733.3671875, + "learning_rate": 9.9449760781709e-06, + "loss": 2389.1379, + "step": 71270 + }, + { + "epoch": 0.14399010977023802, + "grad_norm": 490.1267395019531, + "learning_rate": 9.944924422699613e-06, + "loss": 1302.5286, + "step": 71280 + }, + { + "epoch": 0.1440103104029218, + "grad_norm": 13456.939453125, + "learning_rate": 9.944872743127363e-06, + "loss": 3370.1648, + "step": 71290 + }, + { + "epoch": 0.14403051103560563, + "grad_norm": 2391.69775390625, + "learning_rate": 9.944821039454403e-06, + "loss": 2411.6443, + "step": 71300 + }, + { + "epoch": 0.14405071166828945, + "grad_norm": 2017.6767578125, + "learning_rate": 9.944769311680984e-06, + "loss": 3619.4289, + "step": 71310 + }, + { + "epoch": 0.14407091230097327, + "grad_norm": 5247.912109375, + "learning_rate": 9.94471755980736e-06, + "loss": 1396.6487, + "step": 71320 + }, + { + "epoch": 0.1440911129336571, + "grad_norm": 4631.0791015625, + "learning_rate": 9.944665783833782e-06, + "loss": 1338.7418, + "step": 71330 + }, + { + "epoch": 0.1441113135663409, + "grad_norm": 42699.453125, + "learning_rate": 9.944613983760503e-06, + "loss": 2056.8004, + "step": 71340 + }, + { + "epoch": 0.1441315141990247, + "grad_norm": 28023.44921875, + "learning_rate": 9.944562159587774e-06, + "loss": 2418.6799, + "step": 71350 + }, + { + "epoch": 0.14415171483170852, + "grad_norm": 722.2332763671875, + "learning_rate": 9.94451031131585e-06, + "loss": 1342.2833, + "step": 71360 + }, + { + "epoch": 0.14417191546439234, + "grad_norm": 8070.8720703125, + "learning_rate": 9.944458438944983e-06, + "loss": 1568.9616, + "step": 71370 + }, + { + "epoch": 0.14419211609707616, + "grad_norm": 1923.3660888671875, + "learning_rate": 9.944406542475425e-06, + "loss": 2104.8611, + "step": 71380 + }, + { + "epoch": 0.14421231672975998, + "grad_norm": 70021.3359375, + "learning_rate": 9.944354621907428e-06, + "loss": 2468.6246, + "step": 71390 + }, + { + "epoch": 0.1442325173624438, + "grad_norm": 1275.0313720703125, + "learning_rate": 9.944302677241247e-06, + "loss": 2099.4441, + "step": 71400 + }, + { + "epoch": 0.1442527179951276, + "grad_norm": 45313.15625, + "learning_rate": 9.944250708477135e-06, + "loss": 2470.3105, + "step": 71410 + }, + { + "epoch": 0.14427291862781141, + "grad_norm": 153466.875, + "learning_rate": 9.944198715615343e-06, + "loss": 3996.1316, + "step": 71420 + }, + { + "epoch": 0.14429311926049523, + "grad_norm": 15374.04296875, + "learning_rate": 9.944146698656127e-06, + "loss": 2181.5307, + "step": 71430 + }, + { + "epoch": 0.14431331989317905, + "grad_norm": 13930.677734375, + "learning_rate": 9.94409465759974e-06, + "loss": 1125.7877, + "step": 71440 + }, + { + "epoch": 0.14433352052586287, + "grad_norm": 7024.3974609375, + "learning_rate": 9.944042592446434e-06, + "loss": 752.1198, + "step": 71450 + }, + { + "epoch": 0.1443537211585467, + "grad_norm": 8060.58154296875, + "learning_rate": 9.943990503196466e-06, + "loss": 2595.7051, + "step": 71460 + }, + { + "epoch": 0.14437392179123051, + "grad_norm": 11681.3125, + "learning_rate": 9.943938389850087e-06, + "loss": 1742.2031, + "step": 71470 + }, + { + "epoch": 0.1443941224239143, + "grad_norm": 826.763427734375, + "learning_rate": 9.943886252407551e-06, + "loss": 2832.1357, + "step": 71480 + }, + { + "epoch": 0.14441432305659813, + "grad_norm": 2207.547607421875, + "learning_rate": 9.943834090869116e-06, + "loss": 1487.6988, + "step": 71490 + }, + { + "epoch": 0.14443452368928195, + "grad_norm": 1348.5889892578125, + "learning_rate": 9.94378190523503e-06, + "loss": 1757.1072, + "step": 71500 + }, + { + "epoch": 0.14445472432196577, + "grad_norm": 8694.7724609375, + "learning_rate": 9.943729695505552e-06, + "loss": 1442.4648, + "step": 71510 + }, + { + "epoch": 0.1444749249546496, + "grad_norm": 12214.1572265625, + "learning_rate": 9.943677461680935e-06, + "loss": 3836.693, + "step": 71520 + }, + { + "epoch": 0.1444951255873334, + "grad_norm": 15355.767578125, + "learning_rate": 9.943625203761434e-06, + "loss": 1138.0264, + "step": 71530 + }, + { + "epoch": 0.1445153262200172, + "grad_norm": 1260.4820556640625, + "learning_rate": 9.943572921747302e-06, + "loss": 2204.3229, + "step": 71540 + }, + { + "epoch": 0.14453552685270102, + "grad_norm": 4220.21630859375, + "learning_rate": 9.943520615638796e-06, + "loss": 3756.9215, + "step": 71550 + }, + { + "epoch": 0.14455572748538484, + "grad_norm": 2274.89453125, + "learning_rate": 9.943468285436171e-06, + "loss": 2258.4727, + "step": 71560 + }, + { + "epoch": 0.14457592811806866, + "grad_norm": 92567.6796875, + "learning_rate": 9.94341593113968e-06, + "loss": 2328.2064, + "step": 71570 + }, + { + "epoch": 0.14459612875075248, + "grad_norm": 249.28741455078125, + "learning_rate": 9.943363552749579e-06, + "loss": 3919.9328, + "step": 71580 + }, + { + "epoch": 0.1446163293834363, + "grad_norm": 70716.109375, + "learning_rate": 9.943311150266124e-06, + "loss": 1243.3412, + "step": 71590 + }, + { + "epoch": 0.14463653001612012, + "grad_norm": 36003.84375, + "learning_rate": 9.94325872368957e-06, + "loss": 3095.5766, + "step": 71600 + }, + { + "epoch": 0.1446567306488039, + "grad_norm": 25888.607421875, + "learning_rate": 9.943206273020174e-06, + "loss": 4588.0504, + "step": 71610 + }, + { + "epoch": 0.14467693128148773, + "grad_norm": 8530.3056640625, + "learning_rate": 9.943153798258188e-06, + "loss": 1883.3637, + "step": 71620 + }, + { + "epoch": 0.14469713191417155, + "grad_norm": 58914.92578125, + "learning_rate": 9.94310129940387e-06, + "loss": 1530.5476, + "step": 71630 + }, + { + "epoch": 0.14471733254685537, + "grad_norm": 569.20068359375, + "learning_rate": 9.943048776457479e-06, + "loss": 1224.7913, + "step": 71640 + }, + { + "epoch": 0.1447375331795392, + "grad_norm": 4803.83349609375, + "learning_rate": 9.942996229419264e-06, + "loss": 1386.6733, + "step": 71650 + }, + { + "epoch": 0.144757733812223, + "grad_norm": 19711.578125, + "learning_rate": 9.942943658289487e-06, + "loss": 974.0537, + "step": 71660 + }, + { + "epoch": 0.1447779344449068, + "grad_norm": 12831.6220703125, + "learning_rate": 9.942891063068401e-06, + "loss": 1120.6386, + "step": 71670 + }, + { + "epoch": 0.14479813507759062, + "grad_norm": 139462.890625, + "learning_rate": 9.942838443756265e-06, + "loss": 3278.9695, + "step": 71680 + }, + { + "epoch": 0.14481833571027444, + "grad_norm": 10254.9375, + "learning_rate": 9.942785800353332e-06, + "loss": 1861.9377, + "step": 71690 + }, + { + "epoch": 0.14483853634295826, + "grad_norm": 136935.421875, + "learning_rate": 9.942733132859861e-06, + "loss": 3441.0852, + "step": 71700 + }, + { + "epoch": 0.14485873697564208, + "grad_norm": 29056.236328125, + "learning_rate": 9.94268044127611e-06, + "loss": 1279.8332, + "step": 71710 + }, + { + "epoch": 0.1448789376083259, + "grad_norm": 2691.614501953125, + "learning_rate": 9.942627725602332e-06, + "loss": 1691.6566, + "step": 71720 + }, + { + "epoch": 0.1448991382410097, + "grad_norm": 18156.279296875, + "learning_rate": 9.942574985838785e-06, + "loss": 1589.3619, + "step": 71730 + }, + { + "epoch": 0.14491933887369352, + "grad_norm": 4978.80810546875, + "learning_rate": 9.942522221985728e-06, + "loss": 1475.3737, + "step": 71740 + }, + { + "epoch": 0.14493953950637734, + "grad_norm": 142598.421875, + "learning_rate": 9.942469434043418e-06, + "loss": 3158.6676, + "step": 71750 + }, + { + "epoch": 0.14495974013906116, + "grad_norm": 14376.3173828125, + "learning_rate": 9.942416622012113e-06, + "loss": 4048.6305, + "step": 71760 + }, + { + "epoch": 0.14497994077174498, + "grad_norm": 4832.42529296875, + "learning_rate": 9.942363785892065e-06, + "loss": 1408.8143, + "step": 71770 + }, + { + "epoch": 0.1450001414044288, + "grad_norm": 51923.42578125, + "learning_rate": 9.942310925683538e-06, + "loss": 2921.2855, + "step": 71780 + }, + { + "epoch": 0.14502034203711262, + "grad_norm": 67623.2421875, + "learning_rate": 9.942258041386785e-06, + "loss": 1594.638, + "step": 71790 + }, + { + "epoch": 0.1450405426697964, + "grad_norm": 24564.880859375, + "learning_rate": 9.942205133002067e-06, + "loss": 1144.7191, + "step": 71800 + }, + { + "epoch": 0.14506074330248023, + "grad_norm": 552.9115600585938, + "learning_rate": 9.94215220052964e-06, + "loss": 974.8521, + "step": 71810 + }, + { + "epoch": 0.14508094393516405, + "grad_norm": 802.7922973632812, + "learning_rate": 9.942099243969765e-06, + "loss": 1601.731, + "step": 71820 + }, + { + "epoch": 0.14510114456784787, + "grad_norm": 10066.44140625, + "learning_rate": 9.942046263322694e-06, + "loss": 1687.6631, + "step": 71830 + }, + { + "epoch": 0.1451213452005317, + "grad_norm": 22240.453125, + "learning_rate": 9.941993258588691e-06, + "loss": 1155.1028, + "step": 71840 + }, + { + "epoch": 0.1451415458332155, + "grad_norm": 27310.763671875, + "learning_rate": 9.941940229768012e-06, + "loss": 1543.0646, + "step": 71850 + }, + { + "epoch": 0.1451617464658993, + "grad_norm": 141468.578125, + "learning_rate": 9.941887176860916e-06, + "loss": 3171.4945, + "step": 71860 + }, + { + "epoch": 0.14518194709858312, + "grad_norm": 10168.6953125, + "learning_rate": 9.94183409986766e-06, + "loss": 1089.2806, + "step": 71870 + }, + { + "epoch": 0.14520214773126694, + "grad_norm": 9205.7119140625, + "learning_rate": 9.941780998788506e-06, + "loss": 2401.9938, + "step": 71880 + }, + { + "epoch": 0.14522234836395076, + "grad_norm": 37304.94140625, + "learning_rate": 9.941727873623709e-06, + "loss": 2344.2438, + "step": 71890 + }, + { + "epoch": 0.14524254899663458, + "grad_norm": 9531.8095703125, + "learning_rate": 9.94167472437353e-06, + "loss": 1087.3215, + "step": 71900 + }, + { + "epoch": 0.1452627496293184, + "grad_norm": 21526.609375, + "learning_rate": 9.941621551038228e-06, + "loss": 1986.3684, + "step": 71910 + }, + { + "epoch": 0.14528295026200222, + "grad_norm": 1863.451416015625, + "learning_rate": 9.941568353618064e-06, + "loss": 1160.8761, + "step": 71920 + }, + { + "epoch": 0.145303150894686, + "grad_norm": 115204.015625, + "learning_rate": 9.941515132113291e-06, + "loss": 2412.1592, + "step": 71930 + }, + { + "epoch": 0.14532335152736983, + "grad_norm": 12389.3837890625, + "learning_rate": 9.941461886524176e-06, + "loss": 1986.2615, + "step": 71940 + }, + { + "epoch": 0.14534355216005365, + "grad_norm": 1638.5684814453125, + "learning_rate": 9.941408616850974e-06, + "loss": 1418.6298, + "step": 71950 + }, + { + "epoch": 0.14536375279273747, + "grad_norm": 23541.306640625, + "learning_rate": 9.941355323093944e-06, + "loss": 1082.6744, + "step": 71960 + }, + { + "epoch": 0.1453839534254213, + "grad_norm": 2174.2900390625, + "learning_rate": 9.94130200525335e-06, + "loss": 1217.4611, + "step": 71970 + }, + { + "epoch": 0.1454041540581051, + "grad_norm": 15832.6044921875, + "learning_rate": 9.941248663329448e-06, + "loss": 1252.8494, + "step": 71980 + }, + { + "epoch": 0.1454243546907889, + "grad_norm": 8903.8740234375, + "learning_rate": 9.941195297322498e-06, + "loss": 4772.9422, + "step": 71990 + }, + { + "epoch": 0.14544455532347272, + "grad_norm": 29647.3515625, + "learning_rate": 9.941141907232766e-06, + "loss": 4124.3656, + "step": 72000 + } + ], + "logging_steps": 10, + "max_steps": 500000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 4000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}