diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,28012 +1,64412 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.08080253073526263, + "epoch": 0.18584582069110406, "eval_steps": 500, - "global_step": 40000, + "global_step": 92000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.0200632683815657e-05, - "grad_norm": 65096.4921875, + "grad_norm": 83736.3046875, "learning_rate": 2e-09, - "loss": 11233.4719, + "loss": 373408.825, "step": 10 }, { "epoch": 4.040126536763131e-05, - "grad_norm": 39778.34765625, + "grad_norm": 53146.40625, "learning_rate": 4e-09, - "loss": 6753.3531, + "loss": 247317.85, "step": 20 }, { "epoch": 6.060189805144697e-05, - "grad_norm": 122502.625, + "grad_norm": 136772.140625, "learning_rate": 6e-09, - "loss": 11836.7258, + "loss": 424051.85, "step": 30 }, { "epoch": 8.080253073526263e-05, - "grad_norm": 3041.6865234375, + "grad_norm": 5514.5703125, "learning_rate": 8e-09, - "loss": 10987.382, + "loss": 409105.575, "step": 40 }, { "epoch": 0.00010100316341907829, - "grad_norm": 102942.4453125, + "grad_norm": 147286.5625, "learning_rate": 1e-08, - "loss": 14632.0453, + "loss": 223427.325, "step": 50 }, { "epoch": 0.00012120379610289395, - "grad_norm": 27414.546875, + "grad_norm": 38365.84375, "learning_rate": 1.2e-08, - "loss": 4729.0188, + "loss": 174143.0625, "step": 60 }, { "epoch": 0.0001414044287867096, - "grad_norm": 73012.1484375, + "grad_norm": 86495.0078125, "learning_rate": 1.4000000000000001e-08, - "loss": 10290.0078, + "loss": 114562.0, "step": 70 }, { "epoch": 0.00016160506147052525, - "grad_norm": 122050.140625, + "grad_norm": 153524.765625, "learning_rate": 1.6e-08, - "loss": 11848.857, + "loss": 433216.4, "step": 80 }, { "epoch": 0.0001818056941543409, - "grad_norm": 490384.40625, + "grad_norm": 630919.625, "learning_rate": 1.8000000000000002e-08, - "loss": 7937.6734, + "loss": 305444.575, "step": 90 }, { "epoch": 0.00020200632683815657, - "grad_norm": 194970.03125, + "grad_norm": 247445.953125, "learning_rate": 2e-08, - "loss": 11736.6211, + "loss": 427279.55, "step": 100 }, { "epoch": 0.00022220695952197223, - "grad_norm": 25075.3828125, + "grad_norm": 31963.97265625, "learning_rate": 2.2000000000000002e-08, - "loss": 6181.4105, + "loss": 235723.375, "step": 110 }, { "epoch": 0.0002424075922057879, - "grad_norm": 248390.96875, + "grad_norm": 310993.3125, "learning_rate": 2.4e-08, - "loss": 11065.5094, + "loss": 408180.1, "step": 120 }, { "epoch": 0.0002626082248896035, - "grad_norm": 305972.375, + "grad_norm": 387362.84375, "learning_rate": 2.6e-08, - "loss": 12863.4969, + "loss": 480477.35, "step": 130 }, { "epoch": 0.0002828088575734192, - "grad_norm": 11724.2216796875, + "grad_norm": 20231.345703125, "learning_rate": 2.8000000000000003e-08, - "loss": 13912.8828, + "loss": 519868.7, "step": 140 }, { "epoch": 0.00030300949025723485, - "grad_norm": 172294.40625, + "grad_norm": 204711.265625, "learning_rate": 3.0000000000000004e-08, - "loss": 12760.107, + "loss": 523599.1, "step": 150 }, { "epoch": 0.0003232101229410505, - "grad_norm": 97164.8671875, + "grad_norm": 127249.4609375, "learning_rate": 3.2e-08, - "loss": 16991.5703, + "loss": 603631.95, "step": 160 }, { "epoch": 0.00034341075562486617, - "grad_norm": 7352.380859375, + "grad_norm": 9513.45703125, "learning_rate": 3.4e-08, - "loss": 13090.6688, + "loss": 488906.0, "step": 170 }, { "epoch": 0.0003636113883086818, - "grad_norm": 128528.03125, + "grad_norm": 167473.578125, "learning_rate": 3.6000000000000005e-08, - "loss": 8226.9242, + "loss": 306468.175, "step": 180 }, { "epoch": 0.0003838120209924975, - "grad_norm": 944609.3125, + "grad_norm": 1192213.875, "learning_rate": 3.8e-08, - "loss": 9925.725, + "loss": 382436.475, "step": 190 }, { "epoch": 0.00040401265367631315, - "grad_norm": 196744.65625, + "grad_norm": 273623.03125, "learning_rate": 4e-08, - "loss": 10066.875, + "loss": 411896.15, "step": 200 }, { "epoch": 0.0004242132863601288, - "grad_norm": 30733.384765625, + "grad_norm": 44820.0, "learning_rate": 4.2e-08, - "loss": 13516.4375, + "loss": 496755.6, "step": 210 }, { "epoch": 0.00044441391904394446, - "grad_norm": 4609.60107421875, + "grad_norm": 8523.7080078125, "learning_rate": 4.4000000000000004e-08, - "loss": 6884.6539, + "loss": 288092.0, "step": 220 }, { "epoch": 0.0004646145517277601, - "grad_norm": 192982.515625, + "grad_norm": 267589.9375, "learning_rate": 4.6e-08, - "loss": 4757.4563, + "loss": 194557.7, "step": 230 }, { "epoch": 0.0004848151844115758, - "grad_norm": 78006.6953125, + "grad_norm": 98972.8125, "learning_rate": 4.8e-08, - "loss": 16362.8391, + "loss": 639265.05, "step": 240 }, { "epoch": 0.0005050158170953914, - "grad_norm": 234356.640625, + "grad_norm": 340557.09375, "learning_rate": 5.0000000000000004e-08, - "loss": 15078.7016, + "loss": 591631.25, "step": 250 }, { "epoch": 0.000525216449779207, - "grad_norm": 469881.40625, + "grad_norm": 625899.5625, "learning_rate": 5.2e-08, - "loss": 10714.3516, + "loss": 447399.4, "step": 260 }, { "epoch": 0.0005454170824630227, - "grad_norm": 2937.453369140625, + "grad_norm": 5286.2802734375, "learning_rate": 5.400000000000001e-08, - "loss": 2641.0516, + "loss": 123876.025, "step": 270 }, { "epoch": 0.0005656177151468384, - "grad_norm": 70269.5, + "grad_norm": 107371.9140625, "learning_rate": 5.6000000000000005e-08, - "loss": 19962.9969, + "loss": 774217.3, "step": 280 }, { "epoch": 0.000585818347830654, - "grad_norm": 336712.125, + "grad_norm": 475586.125, "learning_rate": 5.8e-08, - "loss": 15199.8156, + "loss": 587557.1, "step": 290 }, { "epoch": 0.0006060189805144697, - "grad_norm": 525204.5, + "grad_norm": 592803.375, "learning_rate": 6.000000000000001e-08, - "loss": 9005.2641, + "loss": 354666.95, "step": 300 }, { "epoch": 0.0006262196131982854, - "grad_norm": 190851.921875, + "grad_norm": 418616.53125, "learning_rate": 6.2e-08, - "loss": 14303.7391, + "loss": 614736.45, "step": 310 }, { "epoch": 0.000646420245882101, - "grad_norm": 5955.3447265625, + "grad_norm": 16942.396484375, "learning_rate": 6.4e-08, - "loss": 7467.5555, + "loss": 313741.15, "step": 320 }, { "epoch": 0.0006666208785659167, - "grad_norm": 49157.48046875, + "grad_norm": 98732.484375, "learning_rate": 6.600000000000001e-08, - "loss": 8577.0812, + "loss": 348942.975, "step": 330 }, { "epoch": 0.0006868215112497323, - "grad_norm": 117187.0859375, + "grad_norm": 326644.21875, "learning_rate": 6.8e-08, - "loss": 14602.1969, + "loss": 616877.6, "step": 340 }, { "epoch": 0.000707022143933548, - "grad_norm": 87424.6796875, + "grad_norm": 142671.28125, "learning_rate": 7e-08, - "loss": 7267.4609, + "loss": 317281.45, "step": 350 }, { "epoch": 0.0007272227766173637, - "grad_norm": 148016.0625, + "grad_norm": 169390.75, "learning_rate": 7.200000000000001e-08, - "loss": 6086.9867, + "loss": 262640.4, "step": 360 }, { "epoch": 0.0007474234093011793, - "grad_norm": 35290.921875, + "grad_norm": 55167.09375, "learning_rate": 7.400000000000001e-08, - "loss": 4718.1293, + "loss": 186088.9375, "step": 370 }, { "epoch": 0.000767624041984995, - "grad_norm": 59703.11328125, + "grad_norm": 83827.2265625, "learning_rate": 7.6e-08, - "loss": 7168.1328, + "loss": 310705.3, "step": 380 }, { "epoch": 0.0007878246746688106, - "grad_norm": 115406.109375, + "grad_norm": 181304.71875, "learning_rate": 7.8e-08, - "loss": 5522.825, + "loss": 234021.4, "step": 390 }, { "epoch": 0.0008080253073526263, - "grad_norm": 1033790.0, + "grad_norm": 1480947.75, "learning_rate": 8e-08, - "loss": 14909.4578, + "loss": 668650.8, "step": 400 }, { "epoch": 0.000828225940036442, - "grad_norm": 1555.5286865234375, + "grad_norm": 1858.93994140625, "learning_rate": 8.200000000000002e-08, - "loss": 3192.377, + "loss": 134692.125, "step": 410 }, { "epoch": 0.0008484265727202576, - "grad_norm": 5990.689453125, + "grad_norm": 11091.0439453125, "learning_rate": 8.4e-08, - "loss": 9098.8461, + "loss": 412967.975, "step": 420 }, { "epoch": 0.0008686272054040733, - "grad_norm": 18429.44140625, + "grad_norm": 32737.765625, "learning_rate": 8.6e-08, - "loss": 6171.102, + "loss": 275469.825, "step": 430 }, { "epoch": 0.0008888278380878889, - "grad_norm": 132924.671875, + "grad_norm": 202891.53125, "learning_rate": 8.800000000000001e-08, - "loss": 5040.1375, + "loss": 226698.3, "step": 440 }, { "epoch": 0.0009090284707717046, - "grad_norm": 173899.5, + "grad_norm": 225592.859375, "learning_rate": 9e-08, - "loss": 6546.6094, + "loss": 306378.45, "step": 450 }, { "epoch": 0.0009292291034555202, - "grad_norm": 123756.265625, + "grad_norm": 213380.8125, "learning_rate": 9.2e-08, - "loss": 6561.5109, + "loss": 328450.35, "step": 460 }, { "epoch": 0.0009494297361393359, - "grad_norm": 11079.6220703125, + "grad_norm": 20587.87890625, "learning_rate": 9.400000000000001e-08, - "loss": 4482.1957, + "loss": 208999.825, "step": 470 }, { "epoch": 0.0009696303688231516, - "grad_norm": 5980.13623046875, + "grad_norm": 11341.0341796875, "learning_rate": 9.6e-08, - "loss": 10391.9445, + "loss": 489597.4, "step": 480 }, { "epoch": 0.0009898310015069671, - "grad_norm": 376240.28125, + "grad_norm": 567761.5625, "learning_rate": 9.8e-08, - "loss": 16186.6797, + "loss": 771154.7, "step": 490 }, { "epoch": 0.0010100316341907828, - "grad_norm": 58464.67578125, + "grad_norm": 95081.78125, "learning_rate": 1.0000000000000001e-07, - "loss": 6353.3027, + "loss": 321518.75, "step": 500 }, { "epoch": 0.0010302322668745984, - "grad_norm": 39892.734375, + "grad_norm": 68337.609375, "learning_rate": 1.0200000000000001e-07, - "loss": 7480.5086, + "loss": 364351.225, "step": 510 }, { "epoch": 0.001050432899558414, - "grad_norm": 45868.78125, + "grad_norm": 87485.234375, "learning_rate": 1.04e-07, - "loss": 5171.841, + "loss": 685626.45, "step": 520 }, { "epoch": 0.0010706335322422298, - "grad_norm": 99062.4765625, + "grad_norm": 160185.390625, "learning_rate": 1.0600000000000001e-07, - "loss": 4597.2766, + "loss": 238281.725, "step": 530 }, { "epoch": 0.0010908341649260454, - "grad_norm": 15070.6015625, + "grad_norm": 23498.45703125, "learning_rate": 1.0800000000000001e-07, - "loss": 4236.3781, + "loss": 236872.85, "step": 540 }, { "epoch": 0.001111034797609861, - "grad_norm": 52908.14453125, + "grad_norm": 107803.9140625, "learning_rate": 1.1e-07, - "loss": 12600.6578, + "loss": 650193.0, "step": 550 }, { "epoch": 0.0011312354302936767, - "grad_norm": 56906.28125, + "grad_norm": 104909.609375, "learning_rate": 1.1200000000000001e-07, - "loss": 13785.9359, + "loss": 750099.8, "step": 560 }, { "epoch": 0.0011514360629774924, - "grad_norm": 42789.75, + "grad_norm": 97141.703125, "learning_rate": 1.1400000000000001e-07, - "loss": 5349.8664, + "loss": 299977.2, "step": 570 }, { "epoch": 0.001171636695661308, - "grad_norm": 263825.09375, + "grad_norm": 487710.375, "learning_rate": 1.16e-07, - "loss": 4150.466, + "loss": 252171.4, "step": 580 }, { "epoch": 0.0011918373283451237, - "grad_norm": 32624.45703125, + "grad_norm": 66863.9453125, "learning_rate": 1.1800000000000001e-07, - "loss": 4708.4141, + "loss": 274458.875, "step": 590 }, { "epoch": 0.0012120379610289394, - "grad_norm": 46189.94921875, + "grad_norm": 95021.75, "learning_rate": 1.2000000000000002e-07, - "loss": 7048.482, + "loss": 394326.575, "step": 600 }, { "epoch": 0.001232238593712755, - "grad_norm": 149440.578125, + "grad_norm": 673771.9375, "learning_rate": 1.22e-07, - "loss": 6607.6875, + "loss": 461093.95, "step": 610 }, { "epoch": 0.0012524392263965707, - "grad_norm": 255069.703125, + "grad_norm": 757915.125, "learning_rate": 1.24e-07, - "loss": 9900.175, + "loss": 561302.3, "step": 620 }, { "epoch": 0.0012726398590803864, - "grad_norm": 159184.65625, + "grad_norm": 451339.59375, "learning_rate": 1.2600000000000002e-07, - "loss": 6831.8297, + "loss": 461238.0, "step": 630 }, { "epoch": 0.001292840491764202, - "grad_norm": 22822.376953125, + "grad_norm": 59287.33203125, "learning_rate": 1.28e-07, - "loss": 11728.625, + "loss": 755194.35, "step": 640 }, { "epoch": 0.0013130411244480177, - "grad_norm": 41516.66015625, + "grad_norm": 107376.796875, "learning_rate": 1.3e-07, - "loss": 6552.25, + "loss": 418832.325, "step": 650 }, { "epoch": 0.0013332417571318333, - "grad_norm": 43093.328125, + "grad_norm": 136292.765625, "learning_rate": 1.3200000000000002e-07, - "loss": 7379.882, + "loss": 524561.2, "step": 660 }, { "epoch": 0.001353442389815649, - "grad_norm": 107386.859375, + "grad_norm": 775048.625, "learning_rate": 1.34e-07, - "loss": 6401.5484, + "loss": 486069.25, "step": 670 }, { "epoch": 0.0013736430224994647, - "grad_norm": 213066.9375, + "grad_norm": 611663.875, "learning_rate": 1.36e-07, - "loss": 4699.1313, + "loss": 308842.375, "step": 680 }, { "epoch": 0.0013938436551832803, - "grad_norm": 10320.375, + "grad_norm": 43598.10546875, "learning_rate": 1.3800000000000002e-07, - "loss": 7790.7414, + "loss": 590683.6, "step": 690 }, { "epoch": 0.001414044287867096, - "grad_norm": 23302.9609375, + "grad_norm": 68910.765625, "learning_rate": 1.4e-07, - "loss": 3460.5645, + "loss": 269986.0, "step": 700 }, { "epoch": 0.0014342449205509116, - "grad_norm": 1215.584716796875, + "grad_norm": 4407.7451171875, "learning_rate": 1.4200000000000003e-07, - "loss": 1822.1643, + "loss": 143949.725, "step": 710 }, { "epoch": 0.0014544455532347273, - "grad_norm": 12606.0166015625, + "grad_norm": 39544.77734375, "learning_rate": 1.4400000000000002e-07, - "loss": 3574.1598, + "loss": 250966.35, "step": 720 }, { "epoch": 0.001474646185918543, - "grad_norm": 117029.765625, + "grad_norm": 519757.90625, "learning_rate": 1.46e-07, - "loss": 9211.0422, + "loss": 765333.35, "step": 730 }, { "epoch": 0.0014948468186023586, - "grad_norm": 340.4008483886719, + "grad_norm": 1440.6585693359375, "learning_rate": 1.4800000000000003e-07, - "loss": 3960.3461, + "loss": 292025.475, "step": 740 }, { "epoch": 0.0015150474512861743, - "grad_norm": 35960.99609375, + "grad_norm": 127945.2265625, "learning_rate": 1.5000000000000002e-07, - "loss": 2743.5658, + "loss": 248232.75, "step": 750 }, { "epoch": 0.00153524808396999, - "grad_norm": 145897.859375, + "grad_norm": 793228.4375, "learning_rate": 1.52e-07, - "loss": 3500.0004, + "loss": 322774.9, "step": 760 }, { "epoch": 0.0015554487166538056, - "grad_norm": 45004.9765625, + "grad_norm": 254828.3125, "learning_rate": 1.5400000000000003e-07, - "loss": 1732.0832, + "loss": 208892.525, "step": 770 }, { "epoch": 0.0015756493493376213, - "grad_norm": 179198.921875, + "grad_norm": 1198211.625, "learning_rate": 1.56e-07, - "loss": 3390.8328, + "loss": 351610.025, "step": 780 }, { "epoch": 0.001595849982021437, - "grad_norm": 7204.7783203125, + "grad_norm": 21228.140625, "learning_rate": 1.5800000000000004e-07, - "loss": 1180.8115, + "loss": 137029.2375, "step": 790 }, { "epoch": 0.0016160506147052526, "grad_norm": 0.0, "learning_rate": 1.6e-07, - "loss": 2439.4896, + "loss": 275660.95, "step": 800 }, { "epoch": 0.0016362512473890682, - "grad_norm": 181739.609375, + "grad_norm": 965494.75, "learning_rate": 1.62e-07, - "loss": 2977.4486, + "loss": 372736.1, "step": 810 }, { "epoch": 0.001656451880072884, - "grad_norm": 222.23785400390625, + "grad_norm": 707.3334350585938, "learning_rate": 1.6400000000000004e-07, - "loss": 5945.4168, + "loss": 712232.1, "step": 820 }, { "epoch": 0.0016766525127566996, - "grad_norm": 21338.203125, + "grad_norm": 149820.03125, "learning_rate": 1.66e-07, - "loss": 2105.0984, + "loss": 374511.625, "step": 830 }, { "epoch": 0.0016968531454405152, - "grad_norm": 13159.9228515625, + "grad_norm": 103196.6640625, "learning_rate": 1.68e-07, - "loss": 1220.1609, + "loss": 143199.4375, "step": 840 }, { "epoch": 0.0017170537781243309, - "grad_norm": 61013.140625, + "grad_norm": 451886.0, "learning_rate": 1.7000000000000001e-07, - "loss": 5229.9059, + "loss": 796682.95, "step": 850 }, { "epoch": 0.0017372544108081465, - "grad_norm": 35526.16796875, + "grad_norm": 266100.28125, "learning_rate": 1.72e-07, - "loss": 1076.1927, + "loss": 131878.5625, "step": 860 }, { "epoch": 0.0017574550434919622, - "grad_norm": 333795.25, + "grad_norm": 2557776.5, "learning_rate": 1.74e-07, - "loss": 3029.3473, + "loss": 455015.25, "step": 870 }, { "epoch": 0.0017776556761757779, - "grad_norm": 48221.35546875, + "grad_norm": 501454.3125, "learning_rate": 1.7600000000000001e-07, - "loss": 1157.1312, + "loss": 184736.45, "step": 880 }, { "epoch": 0.0017978563088595935, - "grad_norm": 46140.2734375, + "grad_norm": 384709.03125, "learning_rate": 1.78e-07, - "loss": 1161.5459, + "loss": 227766.225, "step": 890 }, { "epoch": 0.0018180569415434092, - "grad_norm": 282336.625, + "grad_norm": 3030316.25, "learning_rate": 1.8e-07, - "loss": 2661.925, + "loss": 519283.2, "step": 900 }, { "epoch": 0.0018382575742272248, - "grad_norm": 33197.43359375, + "grad_norm": 486770.6875, "learning_rate": 1.8200000000000002e-07, - "loss": 1830.7346, + "loss": 409996.8, "step": 910 }, { "epoch": 0.0018584582069110405, - "grad_norm": 38643.58984375, + "grad_norm": 455300.625, "learning_rate": 1.84e-07, - "loss": 977.2544, + "loss": 172638.675, "step": 920 }, { "epoch": 0.0018786588395948562, - "grad_norm": 14847.4560546875, + "grad_norm": 181121.8125, "learning_rate": 1.86e-07, - "loss": 976.7939, + "loss": 257575.15, "step": 930 }, { "epoch": 0.0018988594722786718, - "grad_norm": 77729.234375, + "grad_norm": 1036799.0625, "learning_rate": 1.8800000000000002e-07, - "loss": 2855.9787, + "loss": 688808.7, "step": 940 }, { "epoch": 0.0019190601049624875, - "grad_norm": 5603.44189453125, + "grad_norm": 44095.8828125, "learning_rate": 1.9e-07, - "loss": 828.1684, + "loss": 238714.175, "step": 950 }, { "epoch": 0.0019392607376463031, - "grad_norm": 30702.611328125, + "grad_norm": 354469.5, "learning_rate": 1.92e-07, - "loss": 1953.3998, + "loss": 542354.65, "step": 960 }, { "epoch": 0.0019594613703301186, - "grad_norm": 4158.3828125, + "grad_norm": 80858.5, "learning_rate": 1.9400000000000002e-07, - "loss": 1464.1069, + "loss": 490965.0, "step": 970 }, { "epoch": 0.0019796620030139342, - "grad_norm": 20014.083984375, + "grad_norm": 276063.65625, "learning_rate": 1.96e-07, - "loss": 1218.4455, + "loss": 364453.875, "step": 980 }, { "epoch": 0.00199986263569775, - "grad_norm": 103666.546875, + "grad_norm": 1596976.5, "learning_rate": 1.9800000000000003e-07, - "loss": 1240.7101, + "loss": 429143.9, "step": 990 }, { "epoch": 0.0020200632683815656, - "grad_norm": 938.9301147460938, + "grad_norm": 6207.4130859375, "learning_rate": 2.0000000000000002e-07, - "loss": 538.2274, + "loss": 200281.5, "step": 1000 }, { "epoch": 0.0020402639010653812, - "grad_norm": 13128.259765625, + "grad_norm": 283366.34375, "learning_rate": 2.02e-07, - "loss": 754.7326, + "loss": 328666.625, "step": 1010 }, { "epoch": 0.002060464533749197, - "grad_norm": 767.2325439453125, + "grad_norm": 11474.4365234375, "learning_rate": 2.0400000000000003e-07, - "loss": 644.2353, + "loss": 355200.5, "step": 1020 }, { "epoch": 0.0020806651664330125, - "grad_norm": 1375.877685546875, + "grad_norm": 34710.90625, "learning_rate": 2.0600000000000002e-07, - "loss": 655.4981, + "loss": 343393.125, "step": 1030 }, { "epoch": 0.002100865799116828, - "grad_norm": 8146.990234375, + "grad_norm": 219975.5625, "learning_rate": 2.08e-07, - "loss": 673.8707, + "loss": 322336.5, "step": 1040 }, { "epoch": 0.002121066431800644, - "grad_norm": 527.5423583984375, + "grad_norm": 16997.318359375, "learning_rate": 2.1000000000000003e-07, - "loss": 850.3592, + "loss": 505389.05, "step": 1050 }, { "epoch": 0.0021412670644844595, - "grad_norm": 873.3736572265625, + "grad_norm": 9891.099609375, "learning_rate": 2.1200000000000002e-07, - "loss": 363.9269, + "loss": 221517.625, "step": 1060 }, { "epoch": 0.002161467697168275, - "grad_norm": 1566.6383056640625, + "grad_norm": 60380.53125, "learning_rate": 2.14e-07, - "loss": 256.3116, + "loss": 158835.975, "step": 1070 }, { "epoch": 0.002181668329852091, - "grad_norm": 1443.2564697265625, + "grad_norm": 19396.9609375, "learning_rate": 2.1600000000000003e-07, - "loss": 143.4964, + "loss": 127154.2125, "step": 1080 }, { "epoch": 0.0022018689625359065, - "grad_norm": 1113.212158203125, + "grad_norm": 24141.693359375, "learning_rate": 2.1800000000000002e-07, - "loss": 432.3124, + "loss": 264109.525, "step": 1090 }, { "epoch": 0.002222069595219722, - "grad_norm": 70644.9609375, + "grad_norm": 2142753.5, "learning_rate": 2.2e-07, - "loss": 856.8565, + "loss": 721491.5, "step": 1100 }, { "epoch": 0.002242270227903538, - "grad_norm": 1631.7987060546875, + "grad_norm": 46031.328125, "learning_rate": 2.2200000000000003e-07, - "loss": 273.9229, + "loss": 148969.425, "step": 1110 }, { "epoch": 0.0022624708605873535, - "grad_norm": 13581.16015625, + "grad_norm": 663202.125, "learning_rate": 2.2400000000000002e-07, - "loss": 387.1696, + "loss": 394971.2, "step": 1120 }, { "epoch": 0.002282671493271169, - "grad_norm": 4706.87841796875, + "grad_norm": 221180.015625, "learning_rate": 2.26e-07, - "loss": 300.5173, + "loss": 288363.525, "step": 1130 }, { "epoch": 0.002302872125954985, - "grad_norm": 655.0797729492188, + "grad_norm": 22898.529296875, "learning_rate": 2.2800000000000003e-07, - "loss": 141.2374, + "loss": 178754.2, "step": 1140 }, { "epoch": 0.0023230727586388005, - "grad_norm": 5878.240234375, + "grad_norm": 360452.59375, "learning_rate": 2.3000000000000002e-07, - "loss": 253.9427, + "loss": 307751.05, "step": 1150 }, { "epoch": 0.002343273391322616, - "grad_norm": 2291.21142578125, + "grad_norm": 267245.375, "learning_rate": 2.32e-07, - "loss": 285.336, + "loss": 328270.225, "step": 1160 }, { "epoch": 0.0023634740240064318, - "grad_norm": 5558.34814453125, + "grad_norm": 385932.28125, "learning_rate": 2.3400000000000003e-07, - "loss": 223.9155, + "loss": 219737.425, "step": 1170 }, { "epoch": 0.0023836746566902474, - "grad_norm": 14650.7294921875, + "grad_norm": 1135067.625, "learning_rate": 2.3600000000000002e-07, - "loss": 355.8709, + "loss": 571758.0, "step": 1180 }, { "epoch": 0.002403875289374063, - "grad_norm": 6757.7099609375, + "grad_norm": 446322.15625, "learning_rate": 2.3800000000000004e-07, - "loss": 209.0635, + "loss": 223603.1, "step": 1190 }, { "epoch": 0.0024240759220578788, - "grad_norm": 1495.2066650390625, + "grad_norm": 167217.28125, "learning_rate": 2.4000000000000003e-07, - "loss": 112.6854, + "loss": 146446.8, "step": 1200 }, { "epoch": 0.0024442765547416944, - "grad_norm": 1605.6187744140625, + "grad_norm": 161870.109375, "learning_rate": 2.42e-07, - "loss": 227.6503, + "loss": 433750.85, "step": 1210 }, { "epoch": 0.00246447718742551, - "grad_norm": 1119.8724365234375, + "grad_norm": 131540.484375, "learning_rate": 2.44e-07, - "loss": 239.0082, + "loss": 431477.3, "step": 1220 }, { "epoch": 0.0024846778201093257, - "grad_norm": 3259.475341796875, + "grad_norm": 558771.4375, "learning_rate": 2.46e-07, - "loss": 250.875, + "loss": 511033.85, "step": 1230 }, { "epoch": 0.0025048784527931414, - "grad_norm": 410.2232360839844, + "grad_norm": 9898.779296875, "learning_rate": 2.48e-07, - "loss": 163.1823, + "loss": 389582.05, "step": 1240 }, { "epoch": 0.002525079085476957, - "grad_norm": 1629.2432861328125, + "grad_norm": 199460.890625, "learning_rate": 2.5000000000000004e-07, - "loss": 201.8566, + "loss": 320150.05, "step": 1250 }, { "epoch": 0.0025452797181607727, - "grad_norm": 974.5859985351562, + "grad_norm": 36157.79296875, "learning_rate": 2.5200000000000003e-07, - "loss": 261.7459, + "loss": 473816.15, "step": 1260 }, { "epoch": 0.0025654803508445884, - "grad_norm": 587.459228515625, + "grad_norm": 85122.5078125, "learning_rate": 2.54e-07, - "loss": 120.2635, + "loss": 185743.475, "step": 1270 }, { "epoch": 0.002585680983528404, - "grad_norm": 13336.3154296875, + "grad_norm": 1062025.25, "learning_rate": 2.56e-07, - "loss": 172.1209, + "loss": 230305.8, "step": 1280 }, { "epoch": 0.0026058816162122197, - "grad_norm": 270.600830078125, + "grad_norm": 3644.444580078125, "learning_rate": 2.58e-07, - "loss": 134.1804, + "loss": 289938.375, "step": 1290 }, { "epoch": 0.0026260822488960354, - "grad_norm": 1434.50830078125, + "grad_norm": 154559.46875, "learning_rate": 2.6e-07, - "loss": 225.1987, + "loss": 402676.9, "step": 1300 }, { "epoch": 0.002646282881579851, - "grad_norm": 993.0425415039062, + "grad_norm": 98007.5546875, "learning_rate": 2.6200000000000004e-07, - "loss": 174.5251, + "loss": 312613.825, "step": 1310 }, { "epoch": 0.0026664835142636667, - "grad_norm": 438.0826416015625, + "grad_norm": 29007.98828125, "learning_rate": 2.6400000000000003e-07, - "loss": 70.0126, + "loss": 77100.4812, "step": 1320 }, { "epoch": 0.0026866841469474823, - "grad_norm": 124.42053985595703, + "grad_norm": 2148.982666015625, "learning_rate": 2.66e-07, - "loss": 144.7481, + "loss": 303046.325, "step": 1330 }, { "epoch": 0.002706884779631298, - "grad_norm": 2154.919677734375, + "grad_norm": 338692.0, "learning_rate": 2.68e-07, - "loss": 105.6473, + "loss": 203764.7, "step": 1340 }, { "epoch": 0.0027270854123151137, - "grad_norm": 587.4594116210938, + "grad_norm": 48039.57421875, "learning_rate": 2.7e-07, - "loss": 196.7108, + "loss": 443663.05, "step": 1350 }, { "epoch": 0.0027472860449989293, - "grad_norm": 883.718994140625, + "grad_norm": 108720.796875, "learning_rate": 2.72e-07, - "loss": 165.5248, + "loss": 408752.375, "step": 1360 }, { "epoch": 0.002767486677682745, - "grad_norm": 1839.2825927734375, + "grad_norm": 291329.71875, "learning_rate": 2.7400000000000004e-07, - "loss": 135.5073, + "loss": 261791.95, "step": 1370 }, { "epoch": 0.0027876873103665606, - "grad_norm": 11787.134765625, + "grad_norm": 1740633.25, "learning_rate": 2.7600000000000004e-07, - "loss": 161.5529, + "loss": 413837.6, "step": 1380 }, { "epoch": 0.0028078879430503763, - "grad_norm": 1019.1098022460938, + "grad_norm": 197437.46875, "learning_rate": 2.7800000000000003e-07, - "loss": 186.0807, + "loss": 445843.1, "step": 1390 }, { "epoch": 0.002828088575734192, - "grad_norm": 478.6576843261719, + "grad_norm": 11544.482421875, "learning_rate": 2.8e-07, - "loss": 80.4145, + "loss": 82496.9187, "step": 1400 }, { "epoch": 0.0028482892084180076, - "grad_norm": 676.1159057617188, + "grad_norm": 108452.1953125, "learning_rate": 2.82e-07, - "loss": 102.2378, + "loss": 174064.3875, "step": 1410 }, { "epoch": 0.0028684898411018233, - "grad_norm": 1881.320068359375, + "grad_norm": 420826.8125, "learning_rate": 2.8400000000000005e-07, - "loss": 132.2781, + "loss": 261850.825, "step": 1420 }, { "epoch": 0.002888690473785639, - "grad_norm": 2000.854736328125, + "grad_norm": 237736.171875, "learning_rate": 2.8600000000000005e-07, - "loss": 171.1422, + "loss": 328263.55, "step": 1430 }, { "epoch": 0.0029088911064694546, - "grad_norm": 3567.638916015625, + "grad_norm": 680389.5625, "learning_rate": 2.8800000000000004e-07, - "loss": 177.4434, + "loss": 432290.2, "step": 1440 }, { "epoch": 0.0029290917391532703, - "grad_norm": 948.8512573242188, + "grad_norm": 50789.38671875, "learning_rate": 2.9000000000000003e-07, - "loss": 120.84, + "loss": 194833.1, "step": 1450 }, { "epoch": 0.002949292371837086, - "grad_norm": 438.62078857421875, + "grad_norm": 42879.52734375, "learning_rate": 2.92e-07, - "loss": 120.0028, + "loss": 197492.5375, "step": 1460 }, { "epoch": 0.0029694930045209016, - "grad_norm": 929.599365234375, + "grad_norm": 99213.8125, "learning_rate": 2.94e-07, - "loss": 90.2759, + "loss": 189496.525, "step": 1470 }, { "epoch": 0.0029896936372047172, - "grad_norm": 14297.3984375, + "grad_norm": 2998926.5, "learning_rate": 2.9600000000000006e-07, - "loss": 178.2558, + "loss": 486678.0, "step": 1480 }, { "epoch": 0.003009894269888533, - "grad_norm": 475.08367919921875, + "grad_norm": 21957.8984375, "learning_rate": 2.9800000000000005e-07, - "loss": 97.6335, + "loss": 142727.075, "step": 1490 }, { "epoch": 0.0030300949025723486, - "grad_norm": 892.8331909179688, + "grad_norm": 228963.984375, "learning_rate": 3.0000000000000004e-07, - "loss": 67.4573, + "loss": 96635.2312, "step": 1500 }, { "epoch": 0.0030502955352561642, - "grad_norm": 775.4041137695312, + "grad_norm": 145673.921875, "learning_rate": 3.0200000000000003e-07, - "loss": 62.6433, + "loss": 86943.8125, "step": 1510 }, { "epoch": 0.00307049616793998, - "grad_norm": 438.9576721191406, + "grad_norm": 18428.58984375, "learning_rate": 3.04e-07, - "loss": 100.9333, + "loss": 292205.5, "step": 1520 }, { "epoch": 0.0030906968006237955, - "grad_norm": 204.52166748046875, + "grad_norm": 14291.998046875, "learning_rate": 3.06e-07, - "loss": 113.6906, + "loss": 199039.0625, "step": 1530 }, { "epoch": 0.003110897433307611, - "grad_norm": 1490.9913330078125, + "grad_norm": 306500.0, "learning_rate": 3.0800000000000006e-07, - "loss": 269.1036, + "loss": 826510.0, "step": 1540 }, { "epoch": 0.003131098065991427, - "grad_norm": 428.6115417480469, + "grad_norm": 33081.5078125, "learning_rate": 3.1000000000000005e-07, - "loss": 83.4018, + "loss": 137364.925, "step": 1550 }, { "epoch": 0.0031512986986752425, - "grad_norm": 809.4813842773438, + "grad_norm": 205276.09375, "learning_rate": 3.12e-07, - "loss": 102.0261, + "loss": 129617.8875, "step": 1560 }, { "epoch": 0.003171499331359058, - "grad_norm": 975.3469848632812, + "grad_norm": 344623.96875, "learning_rate": 3.14e-07, - "loss": 143.8353, + "loss": 288935.3, "step": 1570 }, { "epoch": 0.003191699964042874, "grad_norm": 0.0, "learning_rate": 3.160000000000001e-07, - "loss": 104.2053, + "loss": 168020.7375, "step": 1580 }, { "epoch": 0.0032119005967266895, - "grad_norm": 618.6730346679688, + "grad_norm": 125314.5390625, "learning_rate": 3.1800000000000007e-07, - "loss": 76.7223, + "loss": 160295.5875, "step": 1590 }, { "epoch": 0.003232101229410505, - "grad_norm": 272.2789306640625, + "grad_norm": 31785.748046875, "learning_rate": 3.2e-07, - "loss": 93.3761, + "loss": 186077.625, "step": 1600 }, { "epoch": 0.003252301862094321, - "grad_norm": 1097.93505859375, + "grad_norm": 125310.3359375, "learning_rate": 3.22e-07, - "loss": 110.7409, + "loss": 226773.725, "step": 1610 }, { "epoch": 0.0032725024947781365, - "grad_norm": 2503.691650390625, + "grad_norm": 612683.6875, "learning_rate": 3.24e-07, - "loss": 117.8427, + "loss": 204765.775, "step": 1620 }, { "epoch": 0.003292703127461952, - "grad_norm": 310.89483642578125, + "grad_norm": 63337.984375, "learning_rate": 3.26e-07, - "loss": 98.9289, + "loss": 194911.925, "step": 1630 }, { "epoch": 0.003312903760145768, - "grad_norm": 973.0148315429688, + "grad_norm": 294899.625, "learning_rate": 3.280000000000001e-07, - "loss": 110.4893, + "loss": 248289.475, "step": 1640 }, { "epoch": 0.0033331043928295835, - "grad_norm": 270.54412841796875, + "grad_norm": 18086.005859375, "learning_rate": 3.3e-07, - "loss": 77.6948, + "loss": 164243.2375, "step": 1650 }, { "epoch": 0.003353305025513399, - "grad_norm": 172.4710235595703, + "grad_norm": 6473.33154296875, "learning_rate": 3.32e-07, - "loss": 64.3907, + "loss": 349541.65, "step": 1660 }, { "epoch": 0.0033735056581972148, - "grad_norm": 491.76654052734375, + "grad_norm": 53033.82421875, "learning_rate": 3.34e-07, - "loss": 90.2913, + "loss": 165609.9625, "step": 1670 }, { "epoch": 0.0033937062908810304, - "grad_norm": 2328.1591796875, + "grad_norm": 875034.3125, "learning_rate": 3.36e-07, - "loss": 79.351, + "loss": 208381.1375, "step": 1680 }, { "epoch": 0.003413906923564846, - "grad_norm": 1274.43017578125, + "grad_norm": 288532.5625, "learning_rate": 3.38e-07, - "loss": 140.8928, + "loss": 364191.8, "step": 1690 }, { "epoch": 0.0034341075562486618, - "grad_norm": 190.69508361816406, + "grad_norm": 6139.25927734375, "learning_rate": 3.4000000000000003e-07, - "loss": 78.0292, + "loss": 162761.1875, "step": 1700 }, { "epoch": 0.0034543081889324774, - "grad_norm": 295.1920166015625, + "grad_norm": 47536.19140625, "learning_rate": 3.42e-07, - "loss": 94.6306, + "loss": 137680.1, "step": 1710 }, { "epoch": 0.003474508821616293, - "grad_norm": 609.4105834960938, + "grad_norm": 92837.8203125, "learning_rate": 3.44e-07, - "loss": 108.0684, + "loss": 217534.45, "step": 1720 }, { "epoch": 0.0034947094543001087, - "grad_norm": 1126.4515380859375, + "grad_norm": 314604.71875, "learning_rate": 3.46e-07, - "loss": 86.9196, + "loss": 131719.4125, "step": 1730 }, { "epoch": 0.0035149100869839244, - "grad_norm": 398.4494323730469, + "grad_norm": 109403.1953125, "learning_rate": 3.48e-07, - "loss": 113.1218, + "loss": 283084.1, "step": 1740 }, { "epoch": 0.00353511071966774, - "grad_norm": 319.8553771972656, + "grad_norm": 34055.1796875, "learning_rate": 3.5000000000000004e-07, - "loss": 62.7632, + "loss": 111559.275, "step": 1750 }, { "epoch": 0.0035553113523515557, - "grad_norm": 254.40948486328125, + "grad_norm": 38673.7890625, "learning_rate": 3.5200000000000003e-07, - "loss": 134.6582, + "loss": 283747.6, "step": 1760 }, { "epoch": 0.0035755119850353714, - "grad_norm": 1010.0476684570312, + "grad_norm": 365158.875, "learning_rate": 3.54e-07, - "loss": 148.9172, + "loss": 427924.4, "step": 1770 }, { "epoch": 0.003595712617719187, - "grad_norm": 467.79229736328125, + "grad_norm": 148021.84375, "learning_rate": 3.56e-07, - "loss": 82.6746, + "loss": 145041.5, "step": 1780 }, { "epoch": 0.0036159132504030027, - "grad_norm": 334.4750061035156, + "grad_norm": 60648.984375, "learning_rate": 3.58e-07, - "loss": 59.3184, + "loss": 113247.6875, "step": 1790 }, { "epoch": 0.0036361138830868184, - "grad_norm": 930.7593994140625, + "grad_norm": 400989.375, "learning_rate": 3.6e-07, - "loss": 119.8977, + "loss": 317134.225, "step": 1800 }, { "epoch": 0.003656314515770634, - "grad_norm": 469.00738525390625, + "grad_norm": 62391.984375, "learning_rate": 3.6200000000000004e-07, - "loss": 77.2972, + "loss": 117676.6625, "step": 1810 }, { "epoch": 0.0036765151484544497, - "grad_norm": 227.85411071777344, + "grad_norm": 39016.015625, "learning_rate": 3.6400000000000003e-07, - "loss": 100.8369, + "loss": 239388.375, "step": 1820 }, { "epoch": 0.0036967157811382653, - "grad_norm": 288.67144775390625, + "grad_norm": 19838.15625, "learning_rate": 3.66e-07, - "loss": 178.2947, + "loss": 603931.4, "step": 1830 }, { "epoch": 0.003716916413822081, - "grad_norm": 254.9197998046875, + "grad_norm": 27033.650390625, "learning_rate": 3.68e-07, - "loss": 122.8252, + "loss": 459653.3, "step": 1840 }, { "epoch": 0.0037371170465058967, - "grad_norm": 175.5465850830078, + "grad_norm": 9690.869140625, "learning_rate": 3.7e-07, - "loss": 55.4609, + "loss": 155030.4, "step": 1850 }, { "epoch": 0.0037573176791897123, - "grad_norm": 876.6700439453125, + "grad_norm": 197071.5625, "learning_rate": 3.72e-07, - "loss": 95.3277, + "loss": 189442.8875, "step": 1860 }, { "epoch": 0.003777518311873528, - "grad_norm": 437.26739501953125, + "grad_norm": 75994.765625, "learning_rate": 3.7400000000000004e-07, - "loss": 126.6525, + "loss": 282707.875, "step": 1870 }, { "epoch": 0.0037977189445573436, - "grad_norm": 870.8125, + "grad_norm": 211027.21875, "learning_rate": 3.7600000000000003e-07, - "loss": 89.4691, + "loss": 263612.375, "step": 1880 }, { "epoch": 0.0038179195772411593, - "grad_norm": 272.8868713378906, + "grad_norm": 31661.8203125, "learning_rate": 3.78e-07, - "loss": 122.4074, + "loss": 321488.225, "step": 1890 }, { "epoch": 0.003838120209924975, - "grad_norm": 183.42120361328125, + "grad_norm": 3400.19873046875, "learning_rate": 3.8e-07, - "loss": 61.9867, + "loss": 108897.3, "step": 1900 }, { "epoch": 0.0038583208426087906, - "grad_norm": 194.27969360351562, + "grad_norm": 16761.791015625, "learning_rate": 3.82e-07, - "loss": 50.8383, + "loss": 132380.775, "step": 1910 }, { "epoch": 0.0038785214752926063, - "grad_norm": 474.03411865234375, + "grad_norm": 80789.4921875, "learning_rate": 3.84e-07, - "loss": 105.5464, + "loss": 255233.55, "step": 1920 }, { "epoch": 0.003898722107976422, - "grad_norm": 140.2830810546875, + "grad_norm": 28090.84375, "learning_rate": 3.8600000000000004e-07, - "loss": 106.7615, + "loss": 263732.3, "step": 1930 }, { "epoch": 0.003918922740660237, - "grad_norm": 1215.9754638671875, + "grad_norm": 688702.1875, "learning_rate": 3.8800000000000003e-07, - "loss": 91.5135, + "loss": 245063.8, "step": 1940 }, { "epoch": 0.003939123373344053, - "grad_norm": 528.3040771484375, + "grad_norm": 141316.46875, "learning_rate": 3.9e-07, - "loss": 53.1781, + "loss": 97047.1313, "step": 1950 }, { "epoch": 0.0039593240060278685, - "grad_norm": 1267.73779296875, + "grad_norm": 431741.15625, "learning_rate": 3.92e-07, - "loss": 104.2797, + "loss": 369879.025, "step": 1960 }, { "epoch": 0.003979524638711684, - "grad_norm": 415.97540283203125, + "grad_norm": 108297.15625, "learning_rate": 3.94e-07, - "loss": 86.2812, + "loss": 142291.625, "step": 1970 }, { "epoch": 0.0039997252713955, - "grad_norm": 479.4092712402344, + "grad_norm": 53267.33203125, "learning_rate": 3.9600000000000005e-07, - "loss": 82.7487, + "loss": 169082.3, "step": 1980 }, { "epoch": 0.0040199259040793155, - "grad_norm": 2517.703857421875, + "grad_norm": 265487.6875, "learning_rate": 3.9800000000000004e-07, - "loss": 109.8114, + "loss": 168423.3, "step": 1990 }, { "epoch": 0.004040126536763131, - "grad_norm": 1479.90234375, + "grad_norm": 381406.46875, "learning_rate": 4.0000000000000003e-07, - "loss": 65.3891, + "loss": 124377.525, "step": 2000 }, { "epoch": 0.004060327169446947, - "grad_norm": 1292.1494140625, + "grad_norm": 285048.375, "learning_rate": 4.02e-07, - "loss": 74.328, + "loss": 194957.225, "step": 2010 }, { "epoch": 0.0040805278021307624, - "grad_norm": 340.7449645996094, + "grad_norm": 50589.5, "learning_rate": 4.04e-07, - "loss": 82.6, + "loss": 225172.375, "step": 2020 }, { "epoch": 0.004100728434814578, - "grad_norm": 3876.7412109375, + "grad_norm": 771637.875, "learning_rate": 4.06e-07, - "loss": 121.7228, + "loss": 308721.15, "step": 2030 }, { "epoch": 0.004120929067498394, - "grad_norm": 931.7589111328125, + "grad_norm": 82126.8046875, "learning_rate": 4.0800000000000005e-07, - "loss": 82.096, + "loss": 252142.025, "step": 2040 }, { "epoch": 0.004141129700182209, - "grad_norm": 834.064208984375, + "grad_norm": 311785.15625, "learning_rate": 4.1000000000000004e-07, - "loss": 134.9444, + "loss": 427140.2, "step": 2050 }, { "epoch": 0.004161330332866025, - "grad_norm": 480.33331298828125, + "grad_norm": 71258.2734375, "learning_rate": 4.1200000000000004e-07, - "loss": 67.3013, + "loss": 114754.05, "step": 2060 }, { "epoch": 0.004181530965549841, - "grad_norm": 2522.739501953125, + "grad_norm": 580333.6875, "learning_rate": 4.1400000000000003e-07, - "loss": 125.4579, + "loss": 232836.95, "step": 2070 }, { "epoch": 0.004201731598233656, - "grad_norm": 582.5256958007812, + "grad_norm": 65090.8515625, "learning_rate": 4.16e-07, - "loss": 142.2961, + "loss": 137883.1375, "step": 2080 }, { "epoch": 0.004221932230917472, - "grad_norm": 699.96923828125, + "grad_norm": 68443.625, "learning_rate": 4.18e-07, - "loss": 149.4893, + "loss": 364943.225, "step": 2090 }, { "epoch": 0.004242132863601288, - "grad_norm": 158.1244659423828, + "grad_norm": 12664.0009765625, "learning_rate": 4.2000000000000006e-07, - "loss": 49.9103, + "loss": 96624.3938, "step": 2100 }, { "epoch": 0.004262333496285103, - "grad_norm": 810.2927856445312, + "grad_norm": 277409.65625, "learning_rate": 4.2200000000000005e-07, - "loss": 99.5848, + "loss": 191873.3125, "step": 2110 }, { "epoch": 0.004282534128968919, "grad_norm": 0.0, "learning_rate": 4.2400000000000004e-07, - "loss": 76.9101, + "loss": 191697.575, "step": 2120 }, { "epoch": 0.004302734761652735, - "grad_norm": 3999.092041015625, + "grad_norm": 898991.8125, "learning_rate": 4.2600000000000003e-07, - "loss": 127.9206, + "loss": 305840.35, "step": 2130 }, { "epoch": 0.00432293539433655, - "grad_norm": 1941.1787109375, + "grad_norm": 667169.125, "learning_rate": 4.28e-07, - "loss": 116.6594, + "loss": 399287.4, "step": 2140 }, { "epoch": 0.004343136027020366, - "grad_norm": 263.46563720703125, + "grad_norm": 9812.39453125, "learning_rate": 4.3e-07, - "loss": 53.0862, + "loss": 106857.6625, "step": 2150 }, { "epoch": 0.004363336659704182, - "grad_norm": 517.8768920898438, + "grad_norm": 64556.76953125, "learning_rate": 4.3200000000000006e-07, - "loss": 94.6016, + "loss": 251993.05, "step": 2160 }, { "epoch": 0.004383537292387997, - "grad_norm": 298.11346435546875, + "grad_norm": 26484.95703125, "learning_rate": 4.3400000000000005e-07, - "loss": 73.2881, + "loss": 146298.05, "step": 2170 }, { "epoch": 0.004403737925071813, - "grad_norm": 3516.132568359375, + "grad_norm": 1562062.0, "learning_rate": 4.3600000000000004e-07, - "loss": 133.0595, + "loss": 420058.7, "step": 2180 }, { "epoch": 0.004423938557755629, - "grad_norm": 214.3043670654297, + "grad_norm": 10332.0205078125, "learning_rate": 4.3800000000000003e-07, - "loss": 54.9281, + "loss": 96537.2063, "step": 2190 }, { "epoch": 0.004444139190439444, - "grad_norm": 336.51812744140625, + "grad_norm": 87179.921875, "learning_rate": 4.4e-07, - "loss": 56.0404, + "loss": 74717.625, "step": 2200 }, { "epoch": 0.00446433982312326, - "grad_norm": 645.703857421875, + "grad_norm": 315035.25, "learning_rate": 4.4200000000000007e-07, - "loss": 117.952, + "loss": 295844.45, "step": 2210 }, { "epoch": 0.004484540455807076, - "grad_norm": 754.068603515625, + "grad_norm": 160577.8125, "learning_rate": 4.4400000000000006e-07, - "loss": 89.8843, + "loss": 182387.775, "step": 2220 }, { "epoch": 0.004504741088490891, - "grad_norm": 461.39532470703125, + "grad_norm": 49482.359375, "learning_rate": 4.4600000000000005e-07, - "loss": 99.9239, + "loss": 182299.525, "step": 2230 }, { "epoch": 0.004524941721174707, - "grad_norm": 564.421630859375, + "grad_norm": 74800.6796875, "learning_rate": 4.4800000000000004e-07, - "loss": 70.6959, + "loss": 170700.25, "step": 2240 }, { "epoch": 0.004545142353858523, - "grad_norm": 456.60406494140625, + "grad_norm": 65843.171875, "learning_rate": 4.5000000000000003e-07, - "loss": 95.7541, + "loss": 160756.8, "step": 2250 }, { "epoch": 0.004565342986542338, - "grad_norm": 75.55956268310547, + "grad_norm": 842.5634765625, "learning_rate": 4.52e-07, - "loss": 86.0029, + "loss": 194797.4625, "step": 2260 }, { "epoch": 0.004585543619226154, - "grad_norm": 650.9395751953125, + "grad_norm": 172040.0, "learning_rate": 4.5400000000000007e-07, - "loss": 97.5285, + "loss": 255901.9, "step": 2270 }, { "epoch": 0.00460574425190997, - "grad_norm": 2492.443115234375, + "grad_norm": 602447.4375, "learning_rate": 4.5600000000000006e-07, - "loss": 89.6205, + "loss": 170729.95, "step": 2280 }, { "epoch": 0.004625944884593785, - "grad_norm": 2232.35009765625, + "grad_norm": 536963.0625, "learning_rate": 4.5800000000000005e-07, - "loss": 88.7037, + "loss": 166643.425, "step": 2290 }, { "epoch": 0.004646145517277601, - "grad_norm": 608.87841796875, + "grad_norm": 143424.640625, "learning_rate": 4.6000000000000004e-07, - "loss": 100.9818, + "loss": 218563.2, "step": 2300 }, { "epoch": 0.004666346149961417, - "grad_norm": 1630.1737060546875, + "grad_norm": 740198.9375, "learning_rate": 4.6200000000000003e-07, - "loss": 146.6656, + "loss": 400643.0, "step": 2310 }, { "epoch": 0.004686546782645232, - "grad_norm": 359.7907409667969, + "grad_norm": 35267.39453125, "learning_rate": 4.64e-07, - "loss": 65.0291, + "loss": 92618.4062, "step": 2320 }, { "epoch": 0.004706747415329048, "grad_norm": 0.0, "learning_rate": 4.6600000000000007e-07, - "loss": 43.4686, + "loss": 67718.8562, "step": 2330 }, { "epoch": 0.0047269480480128636, - "grad_norm": 1424.6302490234375, + "grad_norm": 814188.8125, "learning_rate": 4.6800000000000006e-07, - "loss": 89.2737, + "loss": 207296.7, "step": 2340 }, { "epoch": 0.004747148680696679, - "grad_norm": 2092.49609375, + "grad_norm": 825947.5, "learning_rate": 4.7000000000000005e-07, - "loss": 96.8369, + "loss": 243623.6, "step": 2350 }, { "epoch": 0.004767349313380495, - "grad_norm": 406.96240234375, + "grad_norm": 116385.953125, "learning_rate": 4.7200000000000004e-07, - "loss": 49.3349, + "loss": 60777.1438, "step": 2360 }, { "epoch": 0.0047875499460643105, - "grad_norm": 658.2279052734375, + "grad_norm": 428868.125, "learning_rate": 4.7400000000000004e-07, - "loss": 96.8905, + "loss": 308163.775, "step": 2370 }, { "epoch": 0.004807750578748126, - "grad_norm": 519.50341796875, + "grad_norm": 256608.0625, "learning_rate": 4.760000000000001e-07, - "loss": 74.312, + "loss": 148660.975, "step": 2380 }, { "epoch": 0.004827951211431942, - "grad_norm": 908.0526123046875, + "grad_norm": 128981.8359375, "learning_rate": 4.78e-07, - "loss": 90.0288, + "loss": 204653.6625, "step": 2390 }, { "epoch": 0.0048481518441157575, - "grad_norm": 146.76319885253906, + "grad_norm": 7248.716796875, "learning_rate": 4.800000000000001e-07, - "loss": 73.4254, + "loss": 154707.1625, "step": 2400 }, { "epoch": 0.004868352476799573, - "grad_norm": 1243.083740234375, + "grad_norm": 473580.09375, "learning_rate": 4.82e-07, - "loss": 50.6427, + "loss": 149534.8125, "step": 2410 }, { "epoch": 0.004888553109483389, - "grad_norm": 657.0536499023438, + "grad_norm": 18061.369140625, "learning_rate": 4.84e-07, - "loss": 69.0492, + "loss": 132146.3625, "step": 2420 }, { "epoch": 0.0049087537421672045, - "grad_norm": 342.3917236328125, + "grad_norm": 42779.6640625, "learning_rate": 4.86e-07, - "loss": 49.1884, + "loss": 68987.4, "step": 2430 }, { "epoch": 0.00492895437485102, - "grad_norm": 502.2886657714844, + "grad_norm": 93670.3203125, "learning_rate": 4.88e-07, - "loss": 80.7097, + "loss": 223834.25, "step": 2440 }, { "epoch": 0.004949155007534836, - "grad_norm": 351.9439392089844, + "grad_norm": 48592.94921875, "learning_rate": 4.900000000000001e-07, - "loss": 88.9331, + "loss": 178603.5625, "step": 2450 }, { "epoch": 0.0049693556402186515, - "grad_norm": 849.8200073242188, + "grad_norm": 154781.4375, "learning_rate": 4.92e-07, - "loss": 73.8392, + "loss": 139291.4375, "step": 2460 }, { "epoch": 0.004989556272902467, - "grad_norm": 229.1455078125, + "grad_norm": 34159.4296875, "learning_rate": 4.940000000000001e-07, - "loss": 93.6035, + "loss": 221261.875, "step": 2470 }, { "epoch": 0.005009756905586283, - "grad_norm": 575.8076171875, + "grad_norm": 92654.875, "learning_rate": 4.96e-07, - "loss": 77.5868, + "loss": 152205.075, "step": 2480 }, { "epoch": 0.0050299575382700985, - "grad_norm": 374.3426208496094, + "grad_norm": 70986.1484375, "learning_rate": 4.98e-07, - "loss": 88.166, + "loss": 310923.75, "step": 2490 }, { "epoch": 0.005050158170953914, - "grad_norm": 574.907958984375, + "grad_norm": 222055.171875, "learning_rate": 5.000000000000001e-07, - "loss": 110.9288, + "loss": 245125.8, "step": 2500 }, { "epoch": 0.00507035880363773, - "grad_norm": 721.3595581054688, + "grad_norm": 274362.78125, "learning_rate": 5.02e-07, - "loss": 82.0438, + "loss": 146873.6, "step": 2510 }, { "epoch": 0.0050905594363215454, - "grad_norm": 407.7942199707031, + "grad_norm": 136488.765625, "learning_rate": 5.040000000000001e-07, - "loss": 92.8602, + "loss": 171489.625, "step": 2520 }, { "epoch": 0.005110760069005361, - "grad_norm": 355.7406311035156, + "grad_norm": 125160.5234375, "learning_rate": 5.06e-07, - "loss": 123.6961, + "loss": 239504.05, "step": 2530 }, { "epoch": 0.005130960701689177, - "grad_norm": 722.0, + "grad_norm": 179528.5625, "learning_rate": 5.08e-07, - "loss": 61.9486, + "loss": 380109.675, "step": 2540 }, { "epoch": 0.005151161334372992, - "grad_norm": 347.21484375, + "grad_norm": 41804.015625, "learning_rate": 5.1e-07, - "loss": 56.5078, + "loss": 104765.2563, "step": 2550 }, { "epoch": 0.005171361967056808, - "grad_norm": 394.5853271484375, + "grad_norm": 214090.328125, "learning_rate": 5.12e-07, - "loss": 59.994, + "loss": 192548.2125, "step": 2560 }, { "epoch": 0.005191562599740624, - "grad_norm": 252.5443115234375, + "grad_norm": 20515.794921875, "learning_rate": 5.140000000000001e-07, - "loss": 82.5115, + "loss": 153032.2625, "step": 2570 }, { "epoch": 0.005211763232424439, - "grad_norm": 192.8706817626953, + "grad_norm": 26728.29296875, "learning_rate": 5.16e-07, - "loss": 129.1886, + "loss": 358690.375, "step": 2580 }, { "epoch": 0.005231963865108255, - "grad_norm": 656.0848999023438, + "grad_norm": 146361.640625, "learning_rate": 5.180000000000001e-07, - "loss": 89.5882, + "loss": 197724.8875, "step": 2590 }, { "epoch": 0.005252164497792071, - "grad_norm": 862.1351928710938, + "grad_norm": 310606.78125, "learning_rate": 5.2e-07, - "loss": 62.8564, + "loss": 115457.4625, "step": 2600 }, { "epoch": 0.005272365130475886, - "grad_norm": 6233.95849609375, + "grad_norm": 1293998.125, "learning_rate": 5.22e-07, - "loss": 113.1677, + "loss": 207157.8625, "step": 2610 }, { "epoch": 0.005292565763159702, - "grad_norm": 170.0315399169922, + "grad_norm": 801.4590454101562, "learning_rate": 5.240000000000001e-07, - "loss": 64.3998, + "loss": 193913.65, "step": 2620 }, { "epoch": 0.005312766395843518, - "grad_norm": 906.1055297851562, + "grad_norm": 321245.28125, "learning_rate": 5.26e-07, - "loss": 156.6982, + "loss": 586454.95, "step": 2630 }, { "epoch": 0.005332967028527333, - "grad_norm": 1416.2374267578125, + "grad_norm": 355190.0, "learning_rate": 5.280000000000001e-07, - "loss": 99.133, + "loss": 225604.575, "step": 2640 }, { "epoch": 0.005353167661211149, - "grad_norm": 1397.4161376953125, + "grad_norm": 589980.5625, "learning_rate": 5.3e-07, - "loss": 98.2664, + "loss": 323603.075, "step": 2650 }, { "epoch": 0.005373368293894965, - "grad_norm": 305.9736633300781, + "grad_norm": 75395.4453125, "learning_rate": 5.32e-07, - "loss": 72.9986, + "loss": 257123.725, "step": 2660 }, { "epoch": 0.00539356892657878, - "grad_norm": 801.323974609375, + "grad_norm": 248380.703125, "learning_rate": 5.340000000000001e-07, - "loss": 82.8052, + "loss": 250131.425, "step": 2670 }, { "epoch": 0.005413769559262596, - "grad_norm": 809.2282104492188, + "grad_norm": 346828.125, "learning_rate": 5.36e-07, - "loss": 95.8148, + "loss": 184158.2, "step": 2680 }, { "epoch": 0.005433970191946412, - "grad_norm": 558.6268310546875, + "grad_norm": 186409.421875, "learning_rate": 5.380000000000001e-07, - "loss": 43.4266, + "loss": 64563.6125, "step": 2690 }, { "epoch": 0.005454170824630227, - "grad_norm": 359.20068359375, + "grad_norm": 44146.40234375, "learning_rate": 5.4e-07, - "loss": 96.1491, + "loss": 262912.125, "step": 2700 }, { "epoch": 0.005474371457314043, - "grad_norm": 363.21295166015625, + "grad_norm": 117163.234375, "learning_rate": 5.420000000000001e-07, - "loss": 85.8485, + "loss": 216751.875, "step": 2710 }, { "epoch": 0.005494572089997859, - "grad_norm": 287.8823547363281, + "grad_norm": 10891.802734375, "learning_rate": 5.44e-07, - "loss": 39.5392, + "loss": 118410.625, "step": 2720 }, { "epoch": 0.005514772722681674, - "grad_norm": 1454.7030029296875, + "grad_norm": 710504.5625, "learning_rate": 5.46e-07, - "loss": 93.551, + "loss": 295034.425, "step": 2730 }, { "epoch": 0.00553497335536549, - "grad_norm": 769.43798828125, + "grad_norm": 169628.203125, "learning_rate": 5.480000000000001e-07, - "loss": 105.6374, + "loss": 299680.725, "step": 2740 }, { "epoch": 0.005555173988049306, - "grad_norm": 444.657958984375, + "grad_norm": 98351.1640625, "learning_rate": 5.5e-07, - "loss": 68.2009, + "loss": 177948.525, "step": 2750 }, { "epoch": 0.005575374620733121, - "grad_norm": 1487.874267578125, + "grad_norm": 273923.125, "learning_rate": 5.520000000000001e-07, - "loss": 123.8217, + "loss": 358400.425, "step": 2760 }, { "epoch": 0.005595575253416937, - "grad_norm": 524.3209838867188, + "grad_norm": 195207.640625, "learning_rate": 5.54e-07, - "loss": 63.6022, + "loss": 166270.5125, "step": 2770 }, { "epoch": 0.005615775886100753, - "grad_norm": 633.0186767578125, + "grad_norm": 233813.65625, "learning_rate": 5.560000000000001e-07, - "loss": 141.7133, + "loss": 409021.875, "step": 2780 }, { "epoch": 0.005635976518784568, - "grad_norm": 145.8428955078125, + "grad_norm": 15438.251953125, "learning_rate": 5.580000000000001e-07, - "loss": 116.3414, + "loss": 315298.45, "step": 2790 }, { "epoch": 0.005656177151468384, - "grad_norm": 161.0638885498047, + "grad_norm": 26042.484375, "learning_rate": 5.6e-07, - "loss": 60.23, + "loss": 173566.275, "step": 2800 }, { "epoch": 0.0056763777841522, - "grad_norm": 481.66070556640625, + "grad_norm": 84348.9140625, "learning_rate": 5.620000000000001e-07, - "loss": 52.4764, + "loss": 124864.6875, "step": 2810 }, { "epoch": 0.005696578416836015, - "grad_norm": 309.8464050292969, + "grad_norm": 7167.1142578125, "learning_rate": 5.64e-07, - "loss": 82.8577, + "loss": 193868.0875, "step": 2820 }, { "epoch": 0.005716779049519831, - "grad_norm": 224.34535217285156, + "grad_norm": 40510.1015625, "learning_rate": 5.660000000000001e-07, - "loss": 77.6688, + "loss": 234499.55, "step": 2830 }, { "epoch": 0.0057369796822036466, - "grad_norm": 662.8355102539062, + "grad_norm": 156678.671875, "learning_rate": 5.680000000000001e-07, - "loss": 90.837, + "loss": 356035.8, "step": 2840 }, { "epoch": 0.005757180314887462, - "grad_norm": 734.1444702148438, + "grad_norm": 310792.34375, "learning_rate": 5.7e-07, - "loss": 107.0871, + "loss": 279862.175, "step": 2850 }, { "epoch": 0.005777380947571278, - "grad_norm": 483.3670349121094, + "grad_norm": 232700.046875, "learning_rate": 5.720000000000001e-07, - "loss": 62.0021, + "loss": 181587.15, "step": 2860 }, { "epoch": 0.0057975815802550935, - "grad_norm": 256.9395751953125, + "grad_norm": 20130.74609375, "learning_rate": 5.74e-07, - "loss": 69.7057, + "loss": 347430.2, "step": 2870 }, { "epoch": 0.005817782212938909, - "grad_norm": 959.993896484375, + "grad_norm": 496685.15625, "learning_rate": 5.760000000000001e-07, - "loss": 88.4341, + "loss": 288673.55, "step": 2880 }, { "epoch": 0.005837982845622725, - "grad_norm": 981.29345703125, + "grad_norm": 332696.78125, "learning_rate": 5.78e-07, - "loss": 67.0605, + "loss": 183658.625, "step": 2890 }, { "epoch": 0.0058581834783065405, - "grad_norm": 360.46368408203125, + "grad_norm": 13086.6083984375, "learning_rate": 5.800000000000001e-07, - "loss": 70.2873, + "loss": 185702.3125, "step": 2900 }, { "epoch": 0.005878384110990356, - "grad_norm": 712.8489990234375, + "grad_norm": 202711.265625, "learning_rate": 5.820000000000001e-07, - "loss": 83.0383, + "loss": 211015.025, "step": 2910 }, { "epoch": 0.005898584743674172, - "grad_norm": 1037.8680419921875, + "grad_norm": 170362.8125, "learning_rate": 5.84e-07, - "loss": 133.6744, + "loss": 334237.75, "step": 2920 }, { "epoch": 0.0059187853763579875, - "grad_norm": 410.0044250488281, + "grad_norm": 103737.3984375, "learning_rate": 5.860000000000001e-07, - "loss": 60.8479, + "loss": 93036.1812, "step": 2930 }, { "epoch": 0.005938986009041803, - "grad_norm": 104.19155883789062, + "grad_norm": 2042.2506103515625, "learning_rate": 5.88e-07, - "loss": 44.2561, + "loss": 96135.7625, "step": 2940 }, { "epoch": 0.005959186641725619, - "grad_norm": 966.65966796875, + "grad_norm": 376746.78125, "learning_rate": 5.900000000000001e-07, - "loss": 51.1191, + "loss": 146448.2625, "step": 2950 }, { "epoch": 0.0059793872744094345, - "grad_norm": 142.35443115234375, + "grad_norm": 88757.125, "learning_rate": 5.920000000000001e-07, - "loss": 63.691, + "loss": 253396.9, "step": 2960 }, { "epoch": 0.00599958790709325, - "grad_norm": 192.0156707763672, + "grad_norm": 26718.490234375, "learning_rate": 5.94e-07, - "loss": 100.6594, + "loss": 250171.475, "step": 2970 }, { "epoch": 0.006019788539777066, - "grad_norm": 435.83447265625, + "grad_norm": 114003.96875, "learning_rate": 5.960000000000001e-07, - "loss": 85.0416, + "loss": 158155.5375, "step": 2980 }, { "epoch": 0.0060399891724608815, - "grad_norm": 464.9222106933594, + "grad_norm": 173111.984375, "learning_rate": 5.98e-07, - "loss": 60.7656, + "loss": 111310.425, "step": 2990 }, { "epoch": 0.006060189805144697, - "grad_norm": 265.5617980957031, + "grad_norm": 34841.87890625, "learning_rate": 6.000000000000001e-07, - "loss": 38.6057, + "loss": 79433.6, "step": 3000 }, { "epoch": 0.006080390437828513, - "grad_norm": 81.73272705078125, + "grad_norm": 3505.210693359375, "learning_rate": 6.02e-07, - "loss": 57.3032, + "loss": 134622.825, "step": 3010 }, { "epoch": 0.0061005910705123284, - "grad_norm": 6022.1904296875, + "grad_norm": 1039582.1875, "learning_rate": 6.040000000000001e-07, - "loss": 92.7834, + "loss": 172318.975, "step": 3020 }, { "epoch": 0.006120791703196144, - "grad_norm": 737.8707275390625, + "grad_norm": 405198.125, "learning_rate": 6.060000000000001e-07, - "loss": 69.2623, + "loss": 246828.025, "step": 3030 }, { "epoch": 0.00614099233587996, - "grad_norm": 398.296875, + "grad_norm": 148695.546875, "learning_rate": 6.08e-07, - "loss": 66.5551, + "loss": 117633.2625, "step": 3040 }, { "epoch": 0.006161192968563775, - "grad_norm": 902.4992065429688, + "grad_norm": 552680.0625, "learning_rate": 6.100000000000001e-07, - "loss": 66.4267, + "loss": 168710.9875, "step": 3050 }, { "epoch": 0.006181393601247591, - "grad_norm": 454.4711608886719, + "grad_norm": 259876.796875, "learning_rate": 6.12e-07, - "loss": 74.1148, + "loss": 291619.975, "step": 3060 }, { "epoch": 0.006201594233931407, - "grad_norm": 532.4801025390625, + "grad_norm": 94179.421875, "learning_rate": 6.140000000000001e-07, - "loss": 63.2332, + "loss": 209473.125, "step": 3070 }, { "epoch": 0.006221794866615222, "grad_norm": 0.0, "learning_rate": 6.160000000000001e-07, - "loss": 64.6687, + "loss": 90016.8625, "step": 3080 }, { "epoch": 0.006241995499299038, - "grad_norm": 502.45843505859375, + "grad_norm": 129408.25, "learning_rate": 6.180000000000001e-07, - "loss": 66.1464, + "loss": 199061.9125, "step": 3090 }, { "epoch": 0.006262196131982854, - "grad_norm": 278.95416259765625, + "grad_norm": 106802.2109375, "learning_rate": 6.200000000000001e-07, - "loss": 55.2986, + "loss": 137387.6, "step": 3100 }, { "epoch": 0.006282396764666669, - "grad_norm": 485.7933349609375, + "grad_norm": 66804.9296875, "learning_rate": 6.22e-07, - "loss": 84.7006, + "loss": 221652.9, "step": 3110 }, { "epoch": 0.006302597397350485, - "grad_norm": 84.02503967285156, + "grad_norm": 2376.517333984375, "learning_rate": 6.24e-07, - "loss": 43.8232, + "loss": 106453.4125, "step": 3120 }, { "epoch": 0.006322798030034301, - "grad_norm": 294.9523010253906, + "grad_norm": 22295.984375, "learning_rate": 6.260000000000001e-07, - "loss": 47.3571, + "loss": 117849.025, "step": 3130 }, { "epoch": 0.006342998662718116, - "grad_norm": 690.8302001953125, + "grad_norm": 81029.7421875, "learning_rate": 6.28e-07, - "loss": 106.0559, + "loss": 348793.875, "step": 3140 }, { "epoch": 0.006363199295401932, - "grad_norm": 136.47386169433594, + "grad_norm": 8544.5625, "learning_rate": 6.3e-07, - "loss": 64.1328, + "loss": 132255.25, "step": 3150 }, { "epoch": 0.006383399928085748, - "grad_norm": 1511.87939453125, + "grad_norm": 792589.4375, "learning_rate": 6.320000000000002e-07, - "loss": 126.6674, + "loss": 481396.45, "step": 3160 }, { "epoch": 0.006403600560769563, - "grad_norm": 739.3742065429688, + "grad_norm": 205887.375, "learning_rate": 6.34e-07, - "loss": 75.301, + "loss": 223400.775, "step": 3170 }, { "epoch": 0.006423801193453379, - "grad_norm": 418.8565979003906, + "grad_norm": 59440.578125, "learning_rate": 6.360000000000001e-07, - "loss": 67.626, + "loss": 184737.3125, "step": 3180 }, { "epoch": 0.006444001826137195, - "grad_norm": 281.0145568847656, + "grad_norm": 80907.9375, "learning_rate": 6.38e-07, - "loss": 68.8929, + "loss": 173582.95, "step": 3190 }, { "epoch": 0.00646420245882101, - "grad_norm": 245.50973510742188, + "grad_norm": 9571.0927734375, "learning_rate": 6.4e-07, - "loss": 88.0388, + "loss": 153110.2875, "step": 3200 }, { "epoch": 0.006484403091504826, - "grad_norm": 336.1756591796875, + "grad_norm": 20507.84375, "learning_rate": 6.42e-07, - "loss": 100.5232, + "loss": 206409.8125, "step": 3210 }, { "epoch": 0.006504603724188642, - "grad_norm": 292.33343505859375, + "grad_norm": 36965.8671875, "learning_rate": 6.44e-07, - "loss": 65.68, + "loss": 223412.675, "step": 3220 }, { "epoch": 0.006524804356872457, - "grad_norm": 211.74859619140625, + "grad_norm": 39904.30078125, "learning_rate": 6.460000000000001e-07, - "loss": 69.6377, + "loss": 269824.3, "step": 3230 }, { "epoch": 0.006545004989556273, - "grad_norm": 1244.14990234375, + "grad_norm": 299366.65625, "learning_rate": 6.48e-07, - "loss": 114.7192, + "loss": 292864.875, "step": 3240 }, { "epoch": 0.006565205622240089, - "grad_norm": 1398.028564453125, + "grad_norm": 796878.0625, "learning_rate": 6.5e-07, - "loss": 101.866, + "loss": 306985.7, "step": 3250 }, { "epoch": 0.006585406254923904, - "grad_norm": 172.55064392089844, + "grad_norm": 6159.1923828125, "learning_rate": 6.52e-07, - "loss": 67.5807, + "loss": 286756.625, "step": 3260 }, { "epoch": 0.00660560688760772, - "grad_norm": 229.16807556152344, + "grad_norm": 5711.42529296875, "learning_rate": 6.54e-07, - "loss": 79.2535, + "loss": 153446.7375, "step": 3270 }, { "epoch": 0.006625807520291536, - "grad_norm": 773.2127075195312, + "grad_norm": 314807.5625, "learning_rate": 6.560000000000002e-07, - "loss": 67.9804, + "loss": 166506.45, "step": 3280 }, { "epoch": 0.006646008152975351, - "grad_norm": 659.2110595703125, + "grad_norm": 99453.203125, "learning_rate": 6.58e-07, - "loss": 104.5057, + "loss": 249214.45, "step": 3290 }, { "epoch": 0.006666208785659167, - "grad_norm": 598.7490844726562, + "grad_norm": 464471.875, "learning_rate": 6.6e-07, - "loss": 96.1712, + "loss": 301056.525, "step": 3300 }, { "epoch": 0.006686409418342983, - "grad_norm": 178.95274353027344, + "grad_norm": 23528.71875, "learning_rate": 6.62e-07, - "loss": 78.2361, + "loss": 357644.05, "step": 3310 }, { "epoch": 0.006706610051026798, - "grad_norm": 411.71893310546875, + "grad_norm": 130403.3125, "learning_rate": 6.64e-07, - "loss": 37.8195, + "loss": 76758.7125, "step": 3320 }, { "epoch": 0.006726810683710614, - "grad_norm": 1021.5560913085938, + "grad_norm": 572227.375, "learning_rate": 6.660000000000002e-07, - "loss": 90.4262, + "loss": 253747.825, "step": 3330 }, { "epoch": 0.0067470113163944296, - "grad_norm": 222.90164184570312, + "grad_norm": 22290.359375, "learning_rate": 6.68e-07, - "loss": 53.6363, + "loss": 175177.1125, "step": 3340 }, { "epoch": 0.006767211949078245, - "grad_norm": 735.28369140625, + "grad_norm": 37422.6953125, "learning_rate": 6.7e-07, - "loss": 84.8791, + "loss": 245530.0, "step": 3350 }, { "epoch": 0.006787412581762061, - "grad_norm": 1695.0865478515625, + "grad_norm": 1126871.0, "learning_rate": 6.72e-07, - "loss": 58.5201, + "loss": 204630.95, "step": 3360 }, { "epoch": 0.0068076132144458765, - "grad_norm": 465.19525146484375, + "grad_norm": 7533.625, "learning_rate": 6.74e-07, - "loss": 54.8187, + "loss": 114413.9375, "step": 3370 }, { "epoch": 0.006827813847129692, - "grad_norm": 406.1634826660156, + "grad_norm": 86930.5234375, "learning_rate": 6.76e-07, - "loss": 66.5806, + "loss": 121766.05, "step": 3380 }, { "epoch": 0.006848014479813508, - "grad_norm": 197.90008544921875, + "grad_norm": 21667.3046875, "learning_rate": 6.78e-07, - "loss": 88.9329, + "loss": 340758.775, "step": 3390 }, { "epoch": 0.0068682151124973235, - "grad_norm": 318.5137634277344, + "grad_norm": 46494.484375, "learning_rate": 6.800000000000001e-07, - "loss": 53.4784, + "loss": 171434.75, "step": 3400 }, { "epoch": 0.006888415745181139, - "grad_norm": 1154.5323486328125, + "grad_norm": 589762.625, "learning_rate": 6.82e-07, - "loss": 67.6873, + "loss": 150590.475, "step": 3410 }, { "epoch": 0.006908616377864955, - "grad_norm": 419.08807373046875, + "grad_norm": 156261.703125, "learning_rate": 6.84e-07, - "loss": 60.5299, + "loss": 170107.05, "step": 3420 }, { "epoch": 0.0069288170105487705, - "grad_norm": 1420.910400390625, + "grad_norm": 1065158.125, "learning_rate": 6.86e-07, - "loss": 51.5206, + "loss": 221675.05, "step": 3430 }, { "epoch": 0.006949017643232586, - "grad_norm": 3216.753173828125, + "grad_norm": 1241244.375, "learning_rate": 6.88e-07, - "loss": 142.1161, + "loss": 546164.15, "step": 3440 }, { "epoch": 0.006969218275916402, - "grad_norm": 196.4447784423828, + "grad_norm": 6295.31689453125, "learning_rate": 6.900000000000001e-07, - "loss": 63.2946, + "loss": 233491.95, "step": 3450 }, { "epoch": 0.0069894189086002175, - "grad_norm": 211.54823303222656, + "grad_norm": 21336.802734375, "learning_rate": 6.92e-07, - "loss": 72.3728, + "loss": 191947.6, "step": 3460 }, { "epoch": 0.007009619541284033, - "grad_norm": 839.4917602539062, + "grad_norm": 30956.9765625, "learning_rate": 6.94e-07, - "loss": 94.8516, + "loss": 280018.65, "step": 3470 }, { "epoch": 0.007029820173967849, - "grad_norm": 221.24810791015625, + "grad_norm": 14798.587890625, "learning_rate": 6.96e-07, - "loss": 85.2103, + "loss": 252106.975, "step": 3480 }, { "epoch": 0.0070500208066516645, - "grad_norm": 802.2166748046875, + "grad_norm": 93001.796875, "learning_rate": 6.98e-07, - "loss": 62.4996, + "loss": 113922.2625, "step": 3490 }, { "epoch": 0.00707022143933548, - "grad_norm": 851.0772094726562, + "grad_norm": 580998.4375, "learning_rate": 7.000000000000001e-07, - "loss": 94.2625, + "loss": 290999.275, "step": 3500 }, { "epoch": 0.007090422072019296, - "grad_norm": 100.0244369506836, + "grad_norm": 1307.6683349609375, "learning_rate": 7.02e-07, - "loss": 134.9697, + "loss": 338875.05, "step": 3510 }, { "epoch": 0.0071106227047031114, - "grad_norm": 357.1327819824219, + "grad_norm": 59976.5390625, "learning_rate": 7.040000000000001e-07, - "loss": 50.204, + "loss": 104920.9375, "step": 3520 }, { "epoch": 0.007130823337386927, - "grad_norm": 162.4925537109375, + "grad_norm": 4955.1494140625, "learning_rate": 7.06e-07, - "loss": 39.0503, + "loss": 78835.3687, "step": 3530 }, { "epoch": 0.007151023970070743, - "grad_norm": 338.42791748046875, + "grad_norm": 195896.71875, "learning_rate": 7.08e-07, - "loss": 50.3432, + "loss": 112083.775, "step": 3540 }, { "epoch": 0.007171224602754558, - "grad_norm": 409.02313232421875, + "grad_norm": 77589.6875, "learning_rate": 7.1e-07, - "loss": 75.9351, + "loss": 208640.2, "step": 3550 }, { "epoch": 0.007191425235438374, - "grad_norm": 937.6009521484375, + "grad_norm": 655835.0, "learning_rate": 7.12e-07, - "loss": 120.148, + "loss": 555943.3, "step": 3560 }, { "epoch": 0.00721162586812219, - "grad_norm": 175.7372283935547, + "grad_norm": 56403.203125, "learning_rate": 7.140000000000001e-07, - "loss": 53.868, + "loss": 144957.9375, "step": 3570 }, { "epoch": 0.007231826500806005, - "grad_norm": 374.8381652832031, + "grad_norm": 237701.203125, "learning_rate": 7.16e-07, - "loss": 39.1397, + "loss": 92795.8, "step": 3580 }, { "epoch": 0.007252027133489821, - "grad_norm": 111.05989837646484, + "grad_norm": 2599.7763671875, "learning_rate": 7.18e-07, - "loss": 77.018, + "loss": 250898.625, "step": 3590 }, { "epoch": 0.007272227766173637, - "grad_norm": 215.99217224121094, + "grad_norm": 68233.8359375, "learning_rate": 7.2e-07, - "loss": 58.0823, + "loss": 157000.1125, "step": 3600 }, { "epoch": 0.007292428398857452, - "grad_norm": 392.3381042480469, + "grad_norm": 65210.8125, "learning_rate": 7.22e-07, - "loss": 101.4519, + "loss": 249905.45, "step": 3610 }, { "epoch": 0.007312629031541268, - "grad_norm": 237.5904998779297, + "grad_norm": 29991.6015625, "learning_rate": 7.240000000000001e-07, - "loss": 47.6513, + "loss": 120913.925, "step": 3620 }, { "epoch": 0.007332829664225084, - "grad_norm": 880.9609985351562, + "grad_norm": 695456.625, "learning_rate": 7.26e-07, - "loss": 80.3464, + "loss": 284240.25, "step": 3630 }, { "epoch": 0.007353030296908899, - "grad_norm": 376.3143005371094, + "grad_norm": 33151.59375, "learning_rate": 7.280000000000001e-07, - "loss": 68.8664, + "loss": 135477.6, "step": 3640 }, { "epoch": 0.007373230929592715, - "grad_norm": 273.7664794921875, + "grad_norm": 25313.83984375, "learning_rate": 7.3e-07, - "loss": 112.0288, + "loss": 328860.4, "step": 3650 }, { "epoch": 0.007393431562276531, - "grad_norm": 624.0692749023438, + "grad_norm": 149670.640625, "learning_rate": 7.32e-07, - "loss": 80.3962, + "loss": 362645.475, "step": 3660 }, { "epoch": 0.007413632194960346, - "grad_norm": 463.6368103027344, + "grad_norm": 205773.453125, "learning_rate": 7.340000000000001e-07, - "loss": 51.8117, + "loss": 176650.525, "step": 3670 }, { "epoch": 0.007433832827644162, - "grad_norm": 515.5740966796875, + "grad_norm": 31363.662109375, "learning_rate": 7.36e-07, - "loss": 52.8487, + "loss": 123590.8375, "step": 3680 }, { "epoch": 0.007454033460327978, - "grad_norm": 243.49615478515625, + "grad_norm": 16304.078125, "learning_rate": 7.380000000000001e-07, - "loss": 72.603, + "loss": 173383.3, "step": 3690 }, { "epoch": 0.007474234093011793, - "grad_norm": 257.2506408691406, + "grad_norm": 33977.3203125, "learning_rate": 7.4e-07, - "loss": 96.6127, + "loss": 260979.65, "step": 3700 }, { "epoch": 0.007494434725695609, - "grad_norm": 460.7102966308594, + "grad_norm": 347730.84375, "learning_rate": 7.420000000000001e-07, - "loss": 107.1785, + "loss": 450849.15, "step": 3710 }, { "epoch": 0.007514635358379425, - "grad_norm": 264.08837890625, + "grad_norm": 64101.04296875, "learning_rate": 7.44e-07, - "loss": 89.0843, + "loss": 257313.825, "step": 3720 }, { "epoch": 0.00753483599106324, - "grad_norm": 173.69886779785156, + "grad_norm": 35078.265625, "learning_rate": 7.46e-07, - "loss": 61.4319, + "loss": 215947.5, "step": 3730 }, { "epoch": 0.007555036623747056, "grad_norm": 0.0, "learning_rate": 7.480000000000001e-07, - "loss": 69.4595, + "loss": 202492.2, "step": 3740 }, { "epoch": 0.007575237256430872, - "grad_norm": 545.4425659179688, + "grad_norm": 52274.12890625, "learning_rate": 7.5e-07, - "loss": 43.0321, + "loss": 81991.5, "step": 3750 }, { "epoch": 0.007595437889114687, "grad_norm": 0.0, "learning_rate": 7.520000000000001e-07, - "loss": 61.6203, + "loss": 155823.6125, "step": 3760 }, { "epoch": 0.007615638521798503, - "grad_norm": 250.635986328125, + "grad_norm": 12321.1943359375, "learning_rate": 7.54e-07, - "loss": 54.8057, + "loss": 119610.9, "step": 3770 }, { "epoch": 0.007635839154482319, - "grad_norm": 2738.195068359375, + "grad_norm": 1443713.875, "learning_rate": 7.56e-07, - "loss": 79.2305, + "loss": 260106.325, "step": 3780 }, { "epoch": 0.007656039787166134, - "grad_norm": 264.37335205078125, + "grad_norm": 34254.37890625, "learning_rate": 7.580000000000001e-07, - "loss": 84.1048, + "loss": 315069.2, "step": 3790 }, { "epoch": 0.00767624041984995, - "grad_norm": 543.71337890625, + "grad_norm": 46495.70703125, "learning_rate": 7.6e-07, - "loss": 89.0422, + "loss": 341471.725, "step": 3800 }, { "epoch": 0.007696441052533766, - "grad_norm": 170.02987670898438, + "grad_norm": 15482.25390625, "learning_rate": 7.620000000000001e-07, - "loss": 92.9234, + "loss": 236118.65, "step": 3810 }, { "epoch": 0.007716641685217581, - "grad_norm": 1459.7193603515625, + "grad_norm": 902341.875, "learning_rate": 7.64e-07, - "loss": 69.3069, + "loss": 233140.55, "step": 3820 }, { "epoch": 0.007736842317901397, - "grad_norm": 491.8957824707031, + "grad_norm": 235505.0625, "learning_rate": 7.660000000000001e-07, - "loss": 50.4874, + "loss": 151871.95, "step": 3830 }, { "epoch": 0.0077570429505852126, - "grad_norm": 191.47091674804688, + "grad_norm": 16168.0537109375, "learning_rate": 7.68e-07, - "loss": 58.4721, + "loss": 238502.975, "step": 3840 }, { "epoch": 0.007777243583269028, - "grad_norm": 154.1424560546875, + "grad_norm": 10799.955078125, "learning_rate": 7.7e-07, - "loss": 68.2273, + "loss": 203708.9, "step": 3850 }, { "epoch": 0.007797444215952844, - "grad_norm": 672.3381958007812, + "grad_norm": 433589.21875, "learning_rate": 7.720000000000001e-07, - "loss": 58.8063, + "loss": 161422.125, "step": 3860 }, { "epoch": 0.00781764484863666, - "grad_norm": 660.721435546875, + "grad_norm": 155173.515625, "learning_rate": 7.74e-07, - "loss": 85.1556, + "loss": 319709.35, "step": 3870 }, { "epoch": 0.007837845481320474, - "grad_norm": 411.94158935546875, + "grad_norm": 45670.97265625, "learning_rate": 7.760000000000001e-07, - "loss": 100.4106, + "loss": 250784.45, "step": 3880 }, { "epoch": 0.00785804611400429, - "grad_norm": 524.49462890625, + "grad_norm": 267430.46875, "learning_rate": 7.78e-07, - "loss": 61.9688, + "loss": 259770.85, "step": 3890 }, { "epoch": 0.007878246746688106, - "grad_norm": 258.5390625, + "grad_norm": 21466.544921875, "learning_rate": 7.8e-07, - "loss": 56.4161, + "loss": 187302.125, "step": 3900 }, { "epoch": 0.007898447379371922, - "grad_norm": 745.609130859375, + "grad_norm": 407195.40625, "learning_rate": 7.820000000000001e-07, - "loss": 104.5297, + "loss": 372071.35, "step": 3910 }, { "epoch": 0.007918648012055737, - "grad_norm": 161.8105926513672, + "grad_norm": 98506.0078125, "learning_rate": 7.84e-07, - "loss": 39.02, + "loss": 119741.0, "step": 3920 }, { "epoch": 0.007938848644739553, - "grad_norm": 671.252685546875, + "grad_norm": 158478.984375, "learning_rate": 7.860000000000001e-07, - "loss": 70.6206, + "loss": 217404.45, "step": 3930 }, { "epoch": 0.007959049277423368, - "grad_norm": 2161.61767578125, + "grad_norm": 1725536.125, "learning_rate": 7.88e-07, - "loss": 116.4441, + "loss": 349011.85, "step": 3940 }, { "epoch": 0.007979249910107185, - "grad_norm": 528.2971801757812, + "grad_norm": 122157.7578125, "learning_rate": 7.900000000000001e-07, - "loss": 100.3893, + "loss": 218812.55, "step": 3950 }, { "epoch": 0.007999450542791, - "grad_norm": 190.61056518554688, + "grad_norm": 4094.054443359375, "learning_rate": 7.920000000000001e-07, - "loss": 36.9781, + "loss": 52430.6937, "step": 3960 }, { "epoch": 0.008019651175474816, - "grad_norm": 482.51385498046875, + "grad_norm": 40998.43359375, "learning_rate": 7.94e-07, - "loss": 59.9889, + "loss": 169286.8375, "step": 3970 }, { "epoch": 0.008039851808158631, - "grad_norm": 798.2640380859375, + "grad_norm": 233307.296875, "learning_rate": 7.960000000000001e-07, - "loss": 65.9875, + "loss": 132898.225, "step": 3980 }, { "epoch": 0.008060052440842447, - "grad_norm": 320.660888671875, + "grad_norm": 4506.3046875, "learning_rate": 7.98e-07, - "loss": 107.9519, + "loss": 359931.35, "step": 3990 }, { "epoch": 0.008080253073526262, - "grad_norm": 450.87030029296875, + "grad_norm": 336763.5625, "learning_rate": 8.000000000000001e-07, - "loss": 68.9743, + "loss": 289954.7, "step": 4000 }, { "epoch": 0.008100453706210079, - "grad_norm": 254.083251953125, + "grad_norm": 45416.59765625, "learning_rate": 8.02e-07, - "loss": 54.0094, + "loss": 142493.7875, "step": 4010 }, { "epoch": 0.008120654338893894, - "grad_norm": 1128.0335693359375, + "grad_norm": 713163.1875, "learning_rate": 8.04e-07, - "loss": 93.9685, + "loss": 276303.825, "step": 4020 }, { "epoch": 0.00814085497157771, - "grad_norm": 263.24163818359375, + "grad_norm": 32743.466796875, "learning_rate": 8.060000000000001e-07, - "loss": 50.0381, + "loss": 127712.2625, "step": 4030 }, { "epoch": 0.008161055604261525, - "grad_norm": 3170.089111328125, + "grad_norm": 1487927.625, "learning_rate": 8.08e-07, - "loss": 85.2729, + "loss": 262294.6, "step": 4040 }, { "epoch": 0.008181256236945341, - "grad_norm": 367.68878173828125, + "grad_norm": 75393.9296875, "learning_rate": 8.100000000000001e-07, - "loss": 93.919, + "loss": 259503.6, "step": 4050 }, { "epoch": 0.008201456869629156, - "grad_norm": 239.5875701904297, + "grad_norm": 1169.8466796875, "learning_rate": 8.12e-07, - "loss": 81.0192, + "loss": 211517.05, "step": 4060 }, { "epoch": 0.008221657502312973, - "grad_norm": 699.9768676757812, + "grad_norm": 124916.625, "learning_rate": 8.140000000000001e-07, - "loss": 79.5798, + "loss": 160175.825, "step": 4070 }, { "epoch": 0.008241858134996788, - "grad_norm": 780.355712890625, + "grad_norm": 8532.4296875, "learning_rate": 8.160000000000001e-07, - "loss": 73.2325, + "loss": 171688.1625, "step": 4080 }, { "epoch": 0.008262058767680604, - "grad_norm": 272.5352783203125, + "grad_norm": 87506.265625, "learning_rate": 8.18e-07, - "loss": 33.2559, + "loss": 53155.3313, "step": 4090 }, { "epoch": 0.008282259400364419, - "grad_norm": 970.9287719726562, + "grad_norm": 576724.0625, "learning_rate": 8.200000000000001e-07, - "loss": 48.2855, + "loss": 163651.95, "step": 4100 }, { "epoch": 0.008302460033048235, "grad_norm": 0.0, "learning_rate": 8.22e-07, - "loss": 79.2521, + "loss": 171538.025, "step": 4110 }, { "epoch": 0.00832266066573205, - "grad_norm": 1466.3936767578125, + "grad_norm": 521696.84375, "learning_rate": 8.240000000000001e-07, - "loss": 99.5521, + "loss": 300504.25, "step": 4120 }, { "epoch": 0.008342861298415867, - "grad_norm": 250.12246704101562, + "grad_norm": 22977.390625, "learning_rate": 8.260000000000001e-07, - "loss": 48.5331, + "loss": 88622.5938, "step": 4130 }, { "epoch": 0.008363061931099681, - "grad_norm": 590.21533203125, + "grad_norm": 299371.375, "learning_rate": 8.280000000000001e-07, - "loss": 62.4788, + "loss": 164125.525, "step": 4140 }, { "epoch": 0.008383262563783498, - "grad_norm": 250.15396118164062, + "grad_norm": 51182.8671875, "learning_rate": 8.300000000000001e-07, - "loss": 95.9057, + "loss": 401216.3, "step": 4150 }, { "epoch": 0.008403463196467313, - "grad_norm": 335.9629211425781, + "grad_norm": 86956.96875, "learning_rate": 8.32e-07, - "loss": 48.6034, + "loss": 110363.8875, "step": 4160 }, { "epoch": 0.00842366382915113, - "grad_norm": 299.31121826171875, + "grad_norm": 245466.15625, "learning_rate": 8.340000000000001e-07, - "loss": 69.3774, + "loss": 249426.925, "step": 4170 }, { "epoch": 0.008443864461834944, - "grad_norm": 600.8267822265625, + "grad_norm": 164002.0625, "learning_rate": 8.36e-07, - "loss": 83.5373, + "loss": 451131.8, "step": 4180 }, { "epoch": 0.00846406509451876, - "grad_norm": 189.73745727539062, + "grad_norm": 13425.4287109375, "learning_rate": 8.380000000000001e-07, - "loss": 54.2606, + "loss": 168266.7, "step": 4190 }, { "epoch": 0.008484265727202575, - "grad_norm": 110.473388671875, + "grad_norm": 5091.4853515625, "learning_rate": 8.400000000000001e-07, - "loss": 65.1877, + "loss": 236135.95, "step": 4200 }, { "epoch": 0.008504466359886392, - "grad_norm": 640.5608520507812, + "grad_norm": 179410.8125, "learning_rate": 8.42e-07, - "loss": 64.1539, + "loss": 380396.75, "step": 4210 }, { "epoch": 0.008524666992570207, - "grad_norm": 485.4070739746094, + "grad_norm": 34261.60546875, "learning_rate": 8.440000000000001e-07, - "loss": 46.0433, + "loss": 158122.6, "step": 4220 }, { "epoch": 0.008544867625254023, - "grad_norm": 1038.2320556640625, + "grad_norm": 466699.15625, "learning_rate": 8.46e-07, - "loss": 99.0718, + "loss": 276887.025, "step": 4230 }, { "epoch": 0.008565068257937838, - "grad_norm": 302.6388244628906, + "grad_norm": 7158.931640625, "learning_rate": 8.480000000000001e-07, - "loss": 43.5486, + "loss": 104478.5875, "step": 4240 }, { "epoch": 0.008585268890621655, - "grad_norm": 471.0609130859375, + "grad_norm": 138142.71875, "learning_rate": 8.500000000000001e-07, - "loss": 73.2996, + "loss": 162651.75, "step": 4250 }, { "epoch": 0.00860546952330547, - "grad_norm": 342.42022705078125, + "grad_norm": 101274.078125, "learning_rate": 8.520000000000001e-07, - "loss": 46.3335, + "loss": 66241.8125, "step": 4260 }, { "epoch": 0.008625670155989286, - "grad_norm": 590.296142578125, + "grad_norm": 141996.59375, "learning_rate": 8.540000000000001e-07, - "loss": 81.2999, + "loss": 284136.55, "step": 4270 }, { "epoch": 0.0086458707886731, - "grad_norm": 427.3771057128906, + "grad_norm": 159615.796875, "learning_rate": 8.56e-07, - "loss": 77.2387, + "loss": 230484.475, "step": 4280 }, { "epoch": 0.008666071421356917, - "grad_norm": 353.09783935546875, + "grad_norm": 224560.921875, "learning_rate": 8.580000000000001e-07, - "loss": 61.0241, + "loss": 271579.525, "step": 4290 }, { "epoch": 0.008686272054040732, - "grad_norm": 216.05088806152344, + "grad_norm": 31924.70703125, "learning_rate": 8.6e-07, - "loss": 46.5208, + "loss": 103245.3375, "step": 4300 }, { "epoch": 0.008706472686724549, - "grad_norm": 264.6614074707031, + "grad_norm": 51188.63671875, "learning_rate": 8.620000000000001e-07, - "loss": 74.8502, + "loss": 305923.325, "step": 4310 }, { "epoch": 0.008726673319408363, - "grad_norm": 424.48736572265625, + "grad_norm": 358859.9375, "learning_rate": 8.640000000000001e-07, - "loss": 102.8178, + "loss": 386818.7, "step": 4320 }, { "epoch": 0.00874687395209218, - "grad_norm": 572.438720703125, + "grad_norm": 170631.21875, "learning_rate": 8.66e-07, - "loss": 71.089, + "loss": 246345.1, "step": 4330 }, { "epoch": 0.008767074584775995, - "grad_norm": 328.88494873046875, + "grad_norm": 79810.28125, "learning_rate": 8.680000000000001e-07, - "loss": 71.5397, + "loss": 270794.175, "step": 4340 }, { "epoch": 0.008787275217459811, - "grad_norm": 1221.77294921875, + "grad_norm": 845396.6875, "learning_rate": 8.7e-07, - "loss": 85.2543, + "loss": 221309.75, "step": 4350 }, { "epoch": 0.008807475850143626, - "grad_norm": 183.07659912109375, + "grad_norm": 22155.251953125, "learning_rate": 8.720000000000001e-07, - "loss": 46.6462, + "loss": 147956.725, "step": 4360 }, { "epoch": 0.008827676482827443, - "grad_norm": 205.29086303710938, + "grad_norm": 37154.09375, "learning_rate": 8.740000000000001e-07, - "loss": 38.5955, + "loss": 79357.2063, "step": 4370 }, { "epoch": 0.008847877115511257, - "grad_norm": 572.8082275390625, + "grad_norm": 255391.9375, "learning_rate": 8.760000000000001e-07, - "loss": 58.0181, + "loss": 162477.4875, "step": 4380 }, { "epoch": 0.008868077748195074, - "grad_norm": 770.6737060546875, + "grad_norm": 416127.96875, "learning_rate": 8.780000000000001e-07, - "loss": 55.9774, + "loss": 147735.3375, "step": 4390 }, { "epoch": 0.008888278380878889, - "grad_norm": 154.41558837890625, + "grad_norm": 115592.3828125, "learning_rate": 8.8e-07, - "loss": 82.9785, + "loss": 192981.025, "step": 4400 }, { "epoch": 0.008908479013562705, - "grad_norm": 647.172119140625, + "grad_norm": 223477.640625, "learning_rate": 8.820000000000001e-07, - "loss": 48.1095, + "loss": 148639.6, "step": 4410 }, { "epoch": 0.00892867964624652, - "grad_norm": 274.4324645996094, + "grad_norm": 80916.7265625, "learning_rate": 8.840000000000001e-07, - "loss": 58.5929, + "loss": 152252.0125, "step": 4420 }, { "epoch": 0.008948880278930336, - "grad_norm": 298.7028503417969, + "grad_norm": 1884.1187744140625, "learning_rate": 8.860000000000001e-07, - "loss": 54.1359, + "loss": 146819.1, "step": 4430 }, { "epoch": 0.008969080911614151, - "grad_norm": 344.7870178222656, + "grad_norm": 41009.83203125, "learning_rate": 8.880000000000001e-07, - "loss": 67.8226, + "loss": 218498.45, "step": 4440 }, { "epoch": 0.008989281544297968, - "grad_norm": 849.3818969726562, + "grad_norm": 620892.8125, "learning_rate": 8.900000000000001e-07, - "loss": 62.9041, + "loss": 210586.675, "step": 4450 }, { "epoch": 0.009009482176981783, - "grad_norm": 284.92254638671875, + "grad_norm": 132113.0, "learning_rate": 8.920000000000001e-07, - "loss": 106.6945, + "loss": 308075.225, "step": 4460 }, { "epoch": 0.0090296828096656, - "grad_norm": 916.0337524414062, + "grad_norm": 524022.40625, "learning_rate": 8.94e-07, - "loss": 85.026, + "loss": 354483.475, "step": 4470 }, { "epoch": 0.009049883442349414, - "grad_norm": 493.93170166015625, + "grad_norm": 172844.875, "learning_rate": 8.960000000000001e-07, - "loss": 77.4128, + "loss": 251360.775, "step": 4480 }, { "epoch": 0.00907008407503323, - "grad_norm": 813.1965942382812, + "grad_norm": 581277.6875, "learning_rate": 8.980000000000001e-07, - "loss": 60.6983, + "loss": 174650.175, "step": 4490 }, { "epoch": 0.009090284707717045, - "grad_norm": 492.5750427246094, + "grad_norm": 121103.3828125, "learning_rate": 9.000000000000001e-07, - "loss": 71.4951, + "loss": 253192.7, "step": 4500 }, { "epoch": 0.009110485340400862, - "grad_norm": 344.6943664550781, + "grad_norm": 110094.2109375, "learning_rate": 9.020000000000001e-07, - "loss": 47.9769, + "loss": 104635.5938, "step": 4510 }, { "epoch": 0.009130685973084677, - "grad_norm": 1498.4163818359375, + "grad_norm": 1087719.375, "learning_rate": 9.04e-07, - "loss": 74.5518, + "loss": 252878.75, "step": 4520 }, { "epoch": 0.009150886605768493, - "grad_norm": 348.4417419433594, + "grad_norm": 123308.1875, "learning_rate": 9.060000000000001e-07, - "loss": 51.9745, + "loss": 101472.4625, "step": 4530 }, { "epoch": 0.009171087238452308, - "grad_norm": 1819.0377197265625, + "grad_norm": 700213.5, "learning_rate": 9.080000000000001e-07, - "loss": 85.0029, + "loss": 245996.35, "step": 4540 }, { "epoch": 0.009191287871136124, - "grad_norm": 607.7725830078125, + "grad_norm": 209184.828125, "learning_rate": 9.100000000000001e-07, - "loss": 57.5715, + "loss": 115438.85, "step": 4550 }, { "epoch": 0.00921148850381994, - "grad_norm": 243.10305786132812, + "grad_norm": 64134.1953125, "learning_rate": 9.120000000000001e-07, - "loss": 62.3906, + "loss": 151623.8375, "step": 4560 }, { "epoch": 0.009231689136503756, - "grad_norm": 442.12542724609375, + "grad_norm": 111751.125, "learning_rate": 9.140000000000001e-07, - "loss": 38.8184, + "loss": 82097.3875, "step": 4570 }, { "epoch": 0.00925188976918757, - "grad_norm": 288.15234375, + "grad_norm": 58079.44140625, "learning_rate": 9.160000000000001e-07, - "loss": 75.4379, + "loss": 349699.375, "step": 4580 }, { "epoch": 0.009272090401871387, - "grad_norm": 416.0212707519531, + "grad_norm": 116160.8828125, "learning_rate": 9.180000000000001e-07, - "loss": 28.1377, + "loss": 62942.3313, "step": 4590 }, { "epoch": 0.009292291034555202, - "grad_norm": 251.46937561035156, + "grad_norm": 83131.8359375, "learning_rate": 9.200000000000001e-07, - "loss": 59.431, + "loss": 253180.225, "step": 4600 }, { "epoch": 0.009312491667239018, - "grad_norm": 976.6220092773438, + "grad_norm": 669428.5, "learning_rate": 9.220000000000001e-07, - "loss": 55.6885, + "loss": 225616.825, "step": 4610 }, { "epoch": 0.009332692299922833, - "grad_norm": 512.8638305664062, + "grad_norm": 63037.44921875, "learning_rate": 9.240000000000001e-07, - "loss": 89.008, + "loss": 306298.225, "step": 4620 }, { "epoch": 0.00935289293260665, - "grad_norm": 1499.292724609375, + "grad_norm": 1070200.375, "learning_rate": 9.260000000000001e-07, - "loss": 68.1157, + "loss": 205901.875, "step": 4630 }, { "epoch": 0.009373093565290464, - "grad_norm": 207.52223205566406, + "grad_norm": 6573.00048828125, "learning_rate": 9.28e-07, - "loss": 39.4039, + "loss": 91817.1875, "step": 4640 }, { "epoch": 0.009393294197974281, - "grad_norm": 420.28131103515625, + "grad_norm": 34686.44140625, "learning_rate": 9.300000000000001e-07, - "loss": 54.6099, + "loss": 137450.9625, "step": 4650 }, { "epoch": 0.009413494830658096, - "grad_norm": 699.9663696289062, + "grad_norm": 407398.71875, "learning_rate": 9.320000000000001e-07, - "loss": 67.8268, + "loss": 272299.475, "step": 4660 }, { "epoch": 0.009433695463341912, - "grad_norm": 2562.5595703125, + "grad_norm": 1779325.75, "learning_rate": 9.340000000000001e-07, - "loss": 77.5905, + "loss": 332040.1, "step": 4670 }, { "epoch": 0.009453896096025727, - "grad_norm": 696.5670776367188, + "grad_norm": 11288.173828125, "learning_rate": 9.360000000000001e-07, - "loss": 46.34, + "loss": 112056.625, "step": 4680 }, { "epoch": 0.009474096728709544, - "grad_norm": 414.7549743652344, + "grad_norm": 120578.0234375, "learning_rate": 9.380000000000001e-07, - "loss": 63.9172, + "loss": 318715.225, "step": 4690 }, { "epoch": 0.009494297361393358, - "grad_norm": 296.7588806152344, + "grad_norm": 9773.2626953125, "learning_rate": 9.400000000000001e-07, - "loss": 57.9701, + "loss": 137344.5625, "step": 4700 }, { "epoch": 0.009514497994077175, - "grad_norm": 257.6050109863281, + "grad_norm": 73225.59375, "learning_rate": 9.420000000000002e-07, - "loss": 105.1853, + "loss": 390411.375, "step": 4710 }, { "epoch": 0.00953469862676099, - "grad_norm": 291.5637512207031, + "grad_norm": 61664.78515625, "learning_rate": 9.440000000000001e-07, - "loss": 54.7079, + "loss": 105328.525, "step": 4720 }, { "epoch": 0.009554899259444806, - "grad_norm": 2119.67578125, + "grad_norm": 1206836.75, "learning_rate": 9.460000000000001e-07, - "loss": 100.7774, + "loss": 366955.625, "step": 4730 }, { "epoch": 0.009575099892128621, - "grad_norm": 947.7593994140625, + "grad_norm": 726749.125, "learning_rate": 9.480000000000001e-07, - "loss": 44.5658, + "loss": 152434.3, "step": 4740 }, { "epoch": 0.009595300524812438, - "grad_norm": 187.89585876464844, + "grad_norm": 60840.72265625, "learning_rate": 9.500000000000001e-07, - "loss": 45.5988, + "loss": 158363.3125, "step": 4750 }, { "epoch": 0.009615501157496252, - "grad_norm": 708.5851440429688, + "grad_norm": 314934.71875, "learning_rate": 9.520000000000002e-07, - "loss": 59.6354, + "loss": 173043.6375, "step": 4760 }, { "epoch": 0.009635701790180069, - "grad_norm": 297.5168151855469, + "grad_norm": 76533.9453125, "learning_rate": 9.54e-07, - "loss": 63.4563, + "loss": 221859.175, "step": 4770 }, { "epoch": 0.009655902422863884, - "grad_norm": 124.88948822021484, + "grad_norm": 3975.255126953125, "learning_rate": 9.56e-07, - "loss": 40.1416, + "loss": 79251.025, "step": 4780 }, { "epoch": 0.0096761030555477, - "grad_norm": 79.28592681884766, + "grad_norm": 2108.87841796875, "learning_rate": 9.58e-07, - "loss": 89.3487, + "loss": 285312.7, "step": 4790 }, { "epoch": 0.009696303688231515, - "grad_norm": 158.1666259765625, + "grad_norm": 15348.76171875, "learning_rate": 9.600000000000001e-07, - "loss": 67.6345, + "loss": 359363.7, "step": 4800 }, { "epoch": 0.009716504320915332, "grad_norm": 0.0, "learning_rate": 9.62e-07, - "loss": 39.1207, + "loss": 61026.3562, "step": 4810 }, { "epoch": 0.009736704953599146, - "grad_norm": 96.97802734375, + "grad_norm": 9017.28125, "learning_rate": 9.64e-07, - "loss": 34.8307, + "loss": 115173.6875, "step": 4820 }, { "epoch": 0.009756905586282963, - "grad_norm": 171.4454803466797, + "grad_norm": 14248.8017578125, "learning_rate": 9.660000000000002e-07, - "loss": 66.8396, + "loss": 292562.375, "step": 4830 }, { "epoch": 0.009777106218966778, - "grad_norm": 136.7041778564453, + "grad_norm": 15074.720703125, "learning_rate": 9.68e-07, - "loss": 51.1474, + "loss": 157963.575, "step": 4840 }, { "epoch": 0.009797306851650594, - "grad_norm": 184.39718627929688, + "grad_norm": 43500.91796875, "learning_rate": 9.7e-07, - "loss": 66.5278, + "loss": 174559.975, "step": 4850 }, { "epoch": 0.009817507484334409, - "grad_norm": 408.21807861328125, + "grad_norm": 12254.3857421875, "learning_rate": 9.72e-07, - "loss": 92.9937, + "loss": 398012.7, "step": 4860 }, { "epoch": 0.009837708117018226, - "grad_norm": 625.3853759765625, + "grad_norm": 8261.427734375, "learning_rate": 9.740000000000001e-07, - "loss": 56.5103, + "loss": 277774.825, "step": 4870 }, { "epoch": 0.00985790874970204, - "grad_norm": 277.07147216796875, + "grad_norm": 19549.763671875, "learning_rate": 9.76e-07, - "loss": 39.2891, + "loss": 71090.5063, "step": 4880 }, { "epoch": 0.009878109382385857, - "grad_norm": 288.1568603515625, + "grad_norm": 49290.51953125, "learning_rate": 9.78e-07, - "loss": 58.7945, + "loss": 150790.4125, "step": 4890 }, { "epoch": 0.009898310015069672, - "grad_norm": 269.74517822265625, + "grad_norm": 149781.59375, "learning_rate": 9.800000000000001e-07, - "loss": 39.0253, + "loss": 173007.5, "step": 4900 }, { "epoch": 0.009918510647753488, - "grad_norm": 192.9111328125, + "grad_norm": 5852.3330078125, "learning_rate": 9.82e-07, - "loss": 54.869, + "loss": 178302.825, "step": 4910 }, { "epoch": 0.009938711280437303, - "grad_norm": 459.1325988769531, + "grad_norm": 258817.796875, "learning_rate": 9.84e-07, - "loss": 22.2133, + "loss": 57192.775, "step": 4920 }, { "epoch": 0.00995891191312112, - "grad_norm": 161.34878540039062, + "grad_norm": 11703.6767578125, "learning_rate": 9.86e-07, - "loss": 82.1027, + "loss": 241792.1, "step": 4930 }, { "epoch": 0.009979112545804934, - "grad_norm": 229.77442932128906, + "grad_norm": 6024.68359375, "learning_rate": 9.880000000000001e-07, - "loss": 52.1782, + "loss": 177009.175, "step": 4940 }, { "epoch": 0.00999931317848875, - "grad_norm": 181.29869079589844, + "grad_norm": 2109.07958984375, "learning_rate": 9.9e-07, - "loss": 64.2641, + "loss": 161350.425, "step": 4950 }, { "epoch": 0.010019513811172566, - "grad_norm": 546.9896240234375, + "grad_norm": 189437.15625, "learning_rate": 9.92e-07, - "loss": 58.4041, + "loss": 161792.525, "step": 4960 }, { "epoch": 0.010039714443856382, - "grad_norm": 534.610595703125, + "grad_norm": 295174.5625, "learning_rate": 9.940000000000001e-07, - "loss": 78.499, + "loss": 354032.95, "step": 4970 }, { "epoch": 0.010059915076540197, - "grad_norm": 152.09864807128906, + "grad_norm": 14043.8115234375, "learning_rate": 9.96e-07, - "loss": 86.7894, + "loss": 280226.425, "step": 4980 }, { "epoch": 0.010080115709224013, - "grad_norm": 233.00289916992188, + "grad_norm": 19413.458984375, "learning_rate": 9.98e-07, - "loss": 34.7484, + "loss": 304773.45, "step": 4990 }, { "epoch": 0.010100316341907828, - "grad_norm": 172.82601928710938, + "grad_norm": 25748.02734375, "learning_rate": 1.0000000000000002e-06, - "loss": 18.2531, + "loss": 52401.5, "step": 5000 }, { "epoch": 0.010120516974591645, - "grad_norm": 383.88214111328125, + "grad_norm": 94723.5546875, "learning_rate": 1.002e-06, - "loss": 75.1869, + "loss": 356649.975, "step": 5010 }, { "epoch": 0.01014071760727546, - "grad_norm": 375.9042053222656, + "grad_norm": 37175.59765625, "learning_rate": 1.004e-06, - "loss": 40.8033, + "loss": 77473.6687, "step": 5020 }, { "epoch": 0.010160918239959276, - "grad_norm": 265.6379699707031, + "grad_norm": 128323.546875, "learning_rate": 1.006e-06, - "loss": 52.5388, + "loss": 92601.0, "step": 5030 }, { "epoch": 0.010181118872643091, - "grad_norm": 171.20565795898438, + "grad_norm": 84265.9921875, "learning_rate": 1.0080000000000001e-06, - "loss": 50.3155, + "loss": 195397.9875, "step": 5040 }, { "epoch": 0.010201319505326907, - "grad_norm": 1019.8333740234375, + "grad_norm": 616822.125, "learning_rate": 1.01e-06, - "loss": 46.4356, + "loss": 148352.925, "step": 5050 }, { "epoch": 0.010221520138010722, - "grad_norm": 417.64642333984375, + "grad_norm": 106293.84375, "learning_rate": 1.012e-06, - "loss": 39.6472, + "loss": 110045.575, "step": 5060 }, { "epoch": 0.010241720770694539, - "grad_norm": 1038.948974609375, + "grad_norm": 367895.46875, "learning_rate": 1.0140000000000002e-06, - "loss": 71.8275, + "loss": 165925.0875, "step": 5070 }, { "epoch": 0.010261921403378354, - "grad_norm": 382.73333740234375, + "grad_norm": 116208.46875, "learning_rate": 1.016e-06, - "loss": 36.9994, + "loss": 160799.6125, "step": 5080 }, { "epoch": 0.01028212203606217, - "grad_norm": 678.2418212890625, + "grad_norm": 149808.53125, "learning_rate": 1.018e-06, - "loss": 89.8579, + "loss": 256790.75, "step": 5090 }, { "epoch": 0.010302322668745985, - "grad_norm": 744.5878295898438, + "grad_norm": 152010.625, "learning_rate": 1.02e-06, - "loss": 65.8045, + "loss": 202302.05, "step": 5100 }, { "epoch": 0.010322523301429801, - "grad_norm": 0.0, + "grad_norm": 2062042.5, "learning_rate": 1.0220000000000001e-06, - "loss": 36.4327, + "loss": 325965.75, "step": 5110 }, { "epoch": 0.010342723934113616, - "grad_norm": 190.85916137695312, + "grad_norm": 36904.31640625, "learning_rate": 1.024e-06, - "loss": 55.1862, + "loss": 180105.8875, "step": 5120 }, { "epoch": 0.010362924566797433, - "grad_norm": 614.1928100585938, + "grad_norm": 114563.7734375, "learning_rate": 1.026e-06, - "loss": 56.2716, + "loss": 165454.025, "step": 5130 }, { "epoch": 0.010383125199481247, - "grad_norm": 1234.486572265625, + "grad_norm": 779898.0, "learning_rate": 1.0280000000000002e-06, - "loss": 97.5441, + "loss": 232749.8, "step": 5140 }, { "epoch": 0.010403325832165064, - "grad_norm": 2740.594482421875, + "grad_norm": 1815382.5, "learning_rate": 1.03e-06, - "loss": 84.4043, + "loss": 480212.9, "step": 5150 }, { "epoch": 0.010423526464848879, - "grad_norm": 387.3719177246094, + "grad_norm": 133293.4375, "learning_rate": 1.032e-06, - "loss": 49.6628, + "loss": 191595.0, "step": 5160 }, { "epoch": 0.010443727097532695, - "grad_norm": 814.0663452148438, + "grad_norm": 258877.96875, "learning_rate": 1.0340000000000002e-06, - "loss": 55.2796, + "loss": 152638.8625, "step": 5170 }, { "epoch": 0.01046392773021651, - "grad_norm": 265.0376892089844, + "grad_norm": 42413.0234375, "learning_rate": 1.0360000000000001e-06, - "loss": 49.8556, + "loss": 199530.7, "step": 5180 }, { "epoch": 0.010484128362900327, - "grad_norm": 348.89959716796875, + "grad_norm": 121179.359375, "learning_rate": 1.038e-06, - "loss": 57.7772, + "loss": 252217.325, "step": 5190 }, { "epoch": 0.010504328995584141, - "grad_norm": 454.0226135253906, + "grad_norm": 114019.171875, "learning_rate": 1.04e-06, - "loss": 75.268, + "loss": 198993.5625, "step": 5200 }, { "epoch": 0.010524529628267958, - "grad_norm": 377.39471435546875, + "grad_norm": 20795.62890625, "learning_rate": 1.0420000000000001e-06, - "loss": 52.2617, + "loss": 193767.6375, "step": 5210 }, { "epoch": 0.010544730260951773, - "grad_norm": 371.4864501953125, + "grad_norm": 36082.140625, "learning_rate": 1.044e-06, - "loss": 75.8078, + "loss": 301796.375, "step": 5220 }, { "epoch": 0.01056493089363559, - "grad_norm": 314.1023254394531, + "grad_norm": 11361.8935546875, "learning_rate": 1.046e-06, - "loss": 47.2679, + "loss": 271486.95, "step": 5230 }, { "epoch": 0.010585131526319404, - "grad_norm": 359.90826416015625, + "grad_norm": 245677.9375, "learning_rate": 1.0480000000000002e-06, - "loss": 62.8292, + "loss": 276067.1, "step": 5240 }, { "epoch": 0.01060533215900322, - "grad_norm": 662.6807861328125, + "grad_norm": 5956.818359375, "learning_rate": 1.0500000000000001e-06, - "loss": 60.9894, + "loss": 312573.9, "step": 5250 }, { "epoch": 0.010625532791687035, - "grad_norm": 102.56043243408203, + "grad_norm": 11913.482421875, "learning_rate": 1.052e-06, - "loss": 107.8306, + "loss": 335247.925, "step": 5260 }, { "epoch": 0.010645733424370852, - "grad_norm": 347.8341979980469, + "grad_norm": 279839.40625, "learning_rate": 1.054e-06, - "loss": 78.8811, + "loss": 221842.075, "step": 5270 }, { "epoch": 0.010665934057054667, - "grad_norm": 156.92105102539062, + "grad_norm": 26855.37890625, "learning_rate": 1.0560000000000001e-06, - "loss": 43.6627, + "loss": 182584.6, "step": 5280 }, { "epoch": 0.010686134689738483, - "grad_norm": 388.3529052734375, + "grad_norm": 101360.2578125, "learning_rate": 1.058e-06, - "loss": 41.8558, + "loss": 108261.2, "step": 5290 }, { "epoch": 0.010706335322422298, - "grad_norm": 1490.1282958984375, + "grad_norm": 1242751.5, "learning_rate": 1.06e-06, - "loss": 66.6095, + "loss": 375366.3, "step": 5300 }, { "epoch": 0.010726535955106115, - "grad_norm": 26.241750717163086, + "grad_norm": 1730.4876708984375, "learning_rate": 1.0620000000000002e-06, - "loss": 51.6079, + "loss": 308546.575, "step": 5310 }, { "epoch": 0.01074673658778993, - "grad_norm": 86.36492919921875, + "grad_norm": 18044.345703125, "learning_rate": 1.064e-06, - "loss": 45.8673, + "loss": 119568.3125, "step": 5320 }, { "epoch": 0.010766937220473746, - "grad_norm": 529.7972412109375, + "grad_norm": 341549.6875, "learning_rate": 1.066e-06, - "loss": 66.3335, + "loss": 264939.425, "step": 5330 }, { "epoch": 0.01078713785315756, - "grad_norm": 83.22615814208984, + "grad_norm": 2772.193115234375, "learning_rate": 1.0680000000000002e-06, - "loss": 70.2409, + "loss": 191456.1125, "step": 5340 }, { "epoch": 0.010807338485841377, - "grad_norm": 153.9220733642578, + "grad_norm": 18869.033203125, "learning_rate": 1.0700000000000001e-06, - "loss": 27.6604, + "loss": 60877.125, "step": 5350 }, { "epoch": 0.010827539118525192, - "grad_norm": 343.4872741699219, + "grad_norm": 11776.099609375, "learning_rate": 1.072e-06, - "loss": 108.8975, + "loss": 664963.75, "step": 5360 }, { "epoch": 0.010847739751209009, - "grad_norm": 205.2510986328125, + "grad_norm": 6577.0751953125, "learning_rate": 1.074e-06, - "loss": 70.3986, + "loss": 256728.675, "step": 5370 }, { "epoch": 0.010867940383892823, - "grad_norm": 617.7909545898438, + "grad_norm": 489216.125, "learning_rate": 1.0760000000000002e-06, - "loss": 77.1631, + "loss": 271020.175, "step": 5380 }, { "epoch": 0.01088814101657664, - "grad_norm": 328.7904052734375, + "grad_norm": 5096.65869140625, "learning_rate": 1.078e-06, - "loss": 96.2922, + "loss": 248232.9, "step": 5390 }, { "epoch": 0.010908341649260455, - "grad_norm": 583.5615234375, + "grad_norm": 276566.3125, "learning_rate": 1.08e-06, - "loss": 74.316, + "loss": 386013.825, "step": 5400 }, { "epoch": 0.010928542281944271, - "grad_norm": 1134.4434814453125, + "grad_norm": 131675.15625, "learning_rate": 1.0820000000000002e-06, - "loss": 47.4514, + "loss": 105471.15, "step": 5410 }, { "epoch": 0.010948742914628086, - "grad_norm": 229.69723510742188, + "grad_norm": 27548.408203125, "learning_rate": 1.0840000000000001e-06, - "loss": 133.4457, + "loss": 262396.175, "step": 5420 }, { "epoch": 0.010968943547311902, - "grad_norm": 627.3202514648438, + "grad_norm": 235807.171875, "learning_rate": 1.086e-06, - "loss": 111.7841, + "loss": 331523.2, "step": 5430 }, { "epoch": 0.010989144179995717, - "grad_norm": 258.3600158691406, + "grad_norm": 39118.625, "learning_rate": 1.088e-06, - "loss": 43.8069, + "loss": 138700.0375, "step": 5440 }, { "epoch": 0.011009344812679534, - "grad_norm": 486.23779296875, + "grad_norm": 253444.421875, "learning_rate": 1.0900000000000002e-06, - "loss": 34.7471, + "loss": 147846.35, "step": 5450 }, { "epoch": 0.011029545445363349, - "grad_norm": 219.52427673339844, + "grad_norm": 89810.3984375, "learning_rate": 1.092e-06, - "loss": 64.9426, + "loss": 296197.075, "step": 5460 }, { "epoch": 0.011049746078047165, - "grad_norm": 155.5829620361328, + "grad_norm": 4890.4482421875, "learning_rate": 1.094e-06, - "loss": 38.3777, + "loss": 114592.55, "step": 5470 }, { "epoch": 0.01106994671073098, - "grad_norm": 863.4156494140625, + "grad_norm": 544363.625, "learning_rate": 1.0960000000000002e-06, - "loss": 54.5578, + "loss": 211362.775, "step": 5480 }, { "epoch": 0.011090147343414796, - "grad_norm": 198.6283416748047, + "grad_norm": 119014.515625, "learning_rate": 1.0980000000000001e-06, - "loss": 25.8693, + "loss": 47654.6625, "step": 5490 }, { "epoch": 0.011110347976098611, - "grad_norm": 245.0723419189453, + "grad_norm": 52070.54296875, "learning_rate": 1.1e-06, - "loss": 54.0653, + "loss": 174297.6625, "step": 5500 }, { "epoch": 0.011130548608782428, - "grad_norm": 484.22796630859375, + "grad_norm": 319574.8125, "learning_rate": 1.1020000000000002e-06, - "loss": 73.8256, + "loss": 236224.475, "step": 5510 }, { "epoch": 0.011150749241466243, - "grad_norm": 305.1805114746094, + "grad_norm": 33749.49609375, "learning_rate": 1.1040000000000001e-06, - "loss": 64.1971, + "loss": 253679.5, "step": 5520 }, { "epoch": 0.011170949874150059, - "grad_norm": 282.2366638183594, + "grad_norm": 106899.515625, "learning_rate": 1.106e-06, - "loss": 48.6069, + "loss": 167219.575, "step": 5530 }, { "epoch": 0.011191150506833874, - "grad_norm": 440.402099609375, + "grad_norm": 282117.8125, "learning_rate": 1.108e-06, - "loss": 43.1783, + "loss": 140571.125, "step": 5540 }, { "epoch": 0.01121135113951769, - "grad_norm": 370.8833312988281, + "grad_norm": 227021.03125, "learning_rate": 1.1100000000000002e-06, - "loss": 51.1846, + "loss": 224820.175, "step": 5550 }, { "epoch": 0.011231551772201505, - "grad_norm": 189.22903442382812, + "grad_norm": 11503.18359375, "learning_rate": 1.1120000000000001e-06, - "loss": 60.3623, + "loss": 122715.95, "step": 5560 }, { "epoch": 0.011251752404885322, - "grad_norm": 1747.9296875, + "grad_norm": 401147.75, "learning_rate": 1.114e-06, - "loss": 64.3104, + "loss": 174447.825, "step": 5570 }, { "epoch": 0.011271953037569137, - "grad_norm": 132.58010864257812, + "grad_norm": 10263.841796875, "learning_rate": 1.1160000000000002e-06, - "loss": 49.1857, + "loss": 142719.775, "step": 5580 }, { "epoch": 0.011292153670252953, - "grad_norm": 624.1473999023438, + "grad_norm": 487121.03125, "learning_rate": 1.1180000000000001e-06, - "loss": 48.8147, + "loss": 145684.6125, "step": 5590 }, { "epoch": 0.011312354302936768, - "grad_norm": 83.12364959716797, + "grad_norm": 904.2119750976562, "learning_rate": 1.12e-06, - "loss": 50.3172, + "loss": 171381.275, "step": 5600 }, { "epoch": 0.011332554935620584, - "grad_norm": 224.36105346679688, + "grad_norm": 98642.3671875, "learning_rate": 1.122e-06, - "loss": 60.6691, + "loss": 223609.175, "step": 5610 }, { "epoch": 0.0113527555683044, - "grad_norm": 357.14398193359375, + "grad_norm": 126193.9765625, "learning_rate": 1.1240000000000002e-06, - "loss": 62.2691, + "loss": 210400.875, "step": 5620 }, { "epoch": 0.011372956200988216, - "grad_norm": 387.4647521972656, + "grad_norm": 81560.5, "learning_rate": 1.126e-06, - "loss": 33.2794, + "loss": 110323.4, "step": 5630 }, { "epoch": 0.01139315683367203, - "grad_norm": 268.9903869628906, + "grad_norm": 21354.85546875, "learning_rate": 1.128e-06, - "loss": 44.9158, + "loss": 110530.7125, "step": 5640 }, { "epoch": 0.011413357466355847, - "grad_norm": 1204.131591796875, + "grad_norm": 705375.5625, "learning_rate": 1.1300000000000002e-06, - "loss": 72.8314, + "loss": 355214.3, "step": 5650 }, { "epoch": 0.011433558099039662, - "grad_norm": 1143.989990234375, + "grad_norm": 670109.0625, "learning_rate": 1.1320000000000001e-06, - "loss": 62.8215, + "loss": 145305.775, "step": 5660 }, { "epoch": 0.011453758731723478, - "grad_norm": 406.05548095703125, + "grad_norm": 165329.828125, "learning_rate": 1.134e-06, - "loss": 54.4107, + "loss": 292169.0, "step": 5670 }, { "epoch": 0.011473959364407293, - "grad_norm": 121.01177978515625, + "grad_norm": 2824.65185546875, "learning_rate": 1.1360000000000002e-06, - "loss": 51.8799, + "loss": 152458.4, "step": 5680 }, { "epoch": 0.01149415999709111, - "grad_norm": 954.1021728515625, + "grad_norm": 498191.96875, "learning_rate": 1.1380000000000002e-06, - "loss": 85.13, + "loss": 213249.7, "step": 5690 }, { "epoch": 0.011514360629774924, - "grad_norm": 253.6305389404297, + "grad_norm": 39956.41015625, "learning_rate": 1.14e-06, - "loss": 36.4587, + "loss": 74277.725, "step": 5700 }, { "epoch": 0.011534561262458741, - "grad_norm": 132.0428466796875, + "grad_norm": 1266.055419921875, "learning_rate": 1.142e-06, - "loss": 45.7969, + "loss": 103491.325, "step": 5710 }, { "epoch": 0.011554761895142556, - "grad_norm": 151.45111083984375, + "grad_norm": 9990.341796875, "learning_rate": 1.1440000000000002e-06, - "loss": 73.2935, + "loss": 337454.075, "step": 5720 }, { "epoch": 0.011574962527826372, - "grad_norm": 326.3273010253906, + "grad_norm": 91487.5, "learning_rate": 1.1460000000000001e-06, - "loss": 60.3755, + "loss": 189531.4625, "step": 5730 }, { "epoch": 0.011595163160510187, - "grad_norm": 648.4600219726562, + "grad_norm": 131722.109375, "learning_rate": 1.148e-06, - "loss": 56.7199, + "loss": 217692.85, "step": 5740 }, { "epoch": 0.011615363793194004, - "grad_norm": 234.32139587402344, + "grad_norm": 25353.05859375, "learning_rate": 1.1500000000000002e-06, - "loss": 29.5902, + "loss": 65741.0125, "step": 5750 }, { "epoch": 0.011635564425877818, - "grad_norm": 312.2818603515625, + "grad_norm": 62004.95703125, "learning_rate": 1.1520000000000002e-06, - "loss": 44.4278, + "loss": 213234.575, "step": 5760 }, { "epoch": 0.011655765058561635, - "grad_norm": 134.03675842285156, + "grad_norm": 11575.1748046875, "learning_rate": 1.154e-06, - "loss": 46.4341, + "loss": 147733.55, "step": 5770 }, { "epoch": 0.01167596569124545, - "grad_norm": 510.0673522949219, + "grad_norm": 155426.484375, "learning_rate": 1.156e-06, - "loss": 76.13, + "loss": 284443.3, "step": 5780 }, { "epoch": 0.011696166323929266, - "grad_norm": 913.5552978515625, + "grad_norm": 564390.5, "learning_rate": 1.1580000000000002e-06, - "loss": 68.4197, + "loss": 346617.7, "step": 5790 }, { "epoch": 0.011716366956613081, - "grad_norm": 568.3344116210938, + "grad_norm": 6786.76708984375, "learning_rate": 1.1600000000000001e-06, - "loss": 63.3916, + "loss": 222567.575, "step": 5800 }, { "epoch": 0.011736567589296898, - "grad_norm": 257.7299499511719, + "grad_norm": 34725.8828125, "learning_rate": 1.162e-06, - "loss": 84.071, + "loss": 373824.15, "step": 5810 }, { "epoch": 0.011756768221980712, - "grad_norm": 616.9789428710938, + "grad_norm": 48506.70703125, "learning_rate": 1.1640000000000002e-06, - "loss": 48.8078, + "loss": 97256.1187, "step": 5820 }, { "epoch": 0.011776968854664529, - "grad_norm": 126.01705169677734, + "grad_norm": 11506.5869140625, "learning_rate": 1.1660000000000001e-06, - "loss": 40.8571, + "loss": 196560.35, "step": 5830 }, { "epoch": 0.011797169487348344, - "grad_norm": 156.91046142578125, + "grad_norm": 10748.9697265625, "learning_rate": 1.168e-06, - "loss": 44.4424, + "loss": 83322.2437, "step": 5840 }, { "epoch": 0.01181737012003216, - "grad_norm": 486.3993835449219, + "grad_norm": 249144.3125, "learning_rate": 1.1700000000000002e-06, - "loss": 44.0293, + "loss": 162703.175, "step": 5850 }, { "epoch": 0.011837570752715975, - "grad_norm": 230.46969604492188, + "grad_norm": 49058.2734375, "learning_rate": 1.1720000000000002e-06, - "loss": 39.297, + "loss": 99975.8188, "step": 5860 }, { "epoch": 0.011857771385399792, - "grad_norm": 898.0840454101562, + "grad_norm": 568850.375, "learning_rate": 1.1740000000000001e-06, - "loss": 87.5855, + "loss": 300860.3, "step": 5870 }, { "epoch": 0.011877972018083606, - "grad_norm": 212.59927368164062, + "grad_norm": 46855.36328125, "learning_rate": 1.176e-06, - "loss": 39.9678, + "loss": 110791.1375, "step": 5880 }, { "epoch": 0.011898172650767423, - "grad_norm": 189.84169006347656, + "grad_norm": 6408.60400390625, "learning_rate": 1.1780000000000002e-06, - "loss": 45.7435, + "loss": 166430.6625, "step": 5890 }, { "epoch": 0.011918373283451238, - "grad_norm": 690.14013671875, + "grad_norm": 621790.625, "learning_rate": 1.1800000000000001e-06, - "loss": 59.9252, + "loss": 268589.475, "step": 5900 }, { "epoch": 0.011938573916135054, - "grad_norm": 644.1328125, + "grad_norm": 10542.998046875, "learning_rate": 1.182e-06, - "loss": 62.9608, + "loss": 153527.075, "step": 5910 }, { "epoch": 0.011958774548818869, - "grad_norm": 1064.7626953125, + "grad_norm": 732047.0, "learning_rate": 1.1840000000000002e-06, - "loss": 58.9116, + "loss": 218407.625, "step": 5920 }, { "epoch": 0.011978975181502685, - "grad_norm": 278.33001708984375, + "grad_norm": 53747.96484375, "learning_rate": 1.1860000000000002e-06, - "loss": 66.7376, + "loss": 92658.875, "step": 5930 }, { "epoch": 0.0119991758141865, - "grad_norm": 2776.10205078125, + "grad_norm": 2557801.25, "learning_rate": 1.188e-06, - "loss": 73.8909, + "loss": 495478.3, "step": 5940 }, { "epoch": 0.012019376446870317, - "grad_norm": 181.832275390625, + "grad_norm": 35281.5859375, "learning_rate": 1.19e-06, - "loss": 51.1401, + "loss": 294682.375, "step": 5950 }, { "epoch": 0.012039577079554132, - "grad_norm": 173.2306365966797, + "grad_norm": 12886.1884765625, "learning_rate": 1.1920000000000002e-06, - "loss": 68.0926, + "loss": 281136.85, "step": 5960 }, { "epoch": 0.012059777712237948, - "grad_norm": 383.7989807128906, + "grad_norm": 77179.2109375, "learning_rate": 1.1940000000000001e-06, - "loss": 71.6162, + "loss": 191763.45, "step": 5970 }, { "epoch": 0.012079978344921763, - "grad_norm": 338.2807312011719, + "grad_norm": 72219.8984375, "learning_rate": 1.196e-06, - "loss": 83.3396, + "loss": 285171.2, "step": 5980 }, { "epoch": 0.01210017897760558, - "grad_norm": 355.2503662109375, + "grad_norm": 31011.212890625, "learning_rate": 1.1980000000000002e-06, - "loss": 44.8515, + "loss": 203071.25, "step": 5990 }, { "epoch": 0.012120379610289394, - "grad_norm": 461.7050476074219, + "grad_norm": 13782.95703125, "learning_rate": 1.2000000000000002e-06, - "loss": 29.5021, + "loss": 82814.4375, "step": 6000 }, { "epoch": 0.01214058024297321, - "grad_norm": 1292.11865234375, + "grad_norm": 912947.125, "learning_rate": 1.202e-06, - "loss": 65.0173, + "loss": 202832.7125, "step": 6010 }, { "epoch": 0.012160780875657026, - "grad_norm": 723.0204467773438, + "grad_norm": 308730.21875, "learning_rate": 1.204e-06, - "loss": 52.8088, + "loss": 236531.6, "step": 6020 }, { "epoch": 0.012180981508340842, - "grad_norm": 216.30770874023438, + "grad_norm": 50170.78515625, "learning_rate": 1.2060000000000002e-06, - "loss": 41.5408, + "loss": 69371.7, "step": 6030 }, { "epoch": 0.012201182141024657, - "grad_norm": 87.2208023071289, + "grad_norm": 5961.62109375, "learning_rate": 1.2080000000000001e-06, - "loss": 43.1754, + "loss": 104129.8188, "step": 6040 }, { "epoch": 0.012221382773708473, - "grad_norm": 261.27435302734375, + "grad_norm": 45485.00390625, "learning_rate": 1.21e-06, - "loss": 36.5309, + "loss": 138357.9625, "step": 6050 }, { "epoch": 0.012241583406392288, - "grad_norm": 363.8855285644531, + "grad_norm": 221637.03125, "learning_rate": 1.2120000000000002e-06, - "loss": 65.357, + "loss": 243971.4, "step": 6060 }, { "epoch": 0.012261784039076105, - "grad_norm": 279.0544128417969, + "grad_norm": 104204.140625, "learning_rate": 1.214e-06, - "loss": 124.9096, + "loss": 528832.35, "step": 6070 }, { "epoch": 0.01228198467175992, - "grad_norm": 95.21453094482422, + "grad_norm": 41355.61328125, "learning_rate": 1.216e-06, - "loss": 68.8102, + "loss": 285732.2, "step": 6080 }, { "epoch": 0.012302185304443734, - "grad_norm": 255.11932373046875, + "grad_norm": 6375.42822265625, "learning_rate": 1.2180000000000002e-06, - "loss": 58.7622, + "loss": 187749.65, "step": 6090 }, { "epoch": 0.01232238593712755, - "grad_norm": 216.3697052001953, + "grad_norm": 17161.6953125, "learning_rate": 1.2200000000000002e-06, - "loss": 85.2264, + "loss": 325157.25, "step": 6100 }, { "epoch": 0.012342586569811366, - "grad_norm": 464.1301574707031, + "grad_norm": 145562.734375, "learning_rate": 1.2220000000000001e-06, - "loss": 50.6743, + "loss": 263053.35, "step": 6110 }, { "epoch": 0.012362787202495182, - "grad_norm": 267.7419738769531, + "grad_norm": 215036.875, "learning_rate": 1.224e-06, - "loss": 59.9191, + "loss": 267244.15, "step": 6120 }, { "epoch": 0.012382987835178997, - "grad_norm": 1089.6318359375, + "grad_norm": 846912.3125, "learning_rate": 1.2260000000000002e-06, - "loss": 57.112, + "loss": 188783.75, "step": 6130 }, { "epoch": 0.012403188467862813, - "grad_norm": 611.3997802734375, + "grad_norm": 330831.21875, "learning_rate": 1.2280000000000001e-06, - "loss": 50.4293, + "loss": 164783.6625, "step": 6140 }, { "epoch": 0.012423389100546628, "grad_norm": 0.0, "learning_rate": 1.23e-06, - "loss": 28.3768, + "loss": 97637.025, "step": 6150 }, { "epoch": 0.012443589733230445, - "grad_norm": 597.175048828125, + "grad_norm": 250334.203125, "learning_rate": 1.2320000000000002e-06, - "loss": 55.5967, + "loss": 125869.5, "step": 6160 }, { "epoch": 0.01246379036591426, - "grad_norm": 103.65872192382812, + "grad_norm": 16129.9560546875, "learning_rate": 1.234e-06, - "loss": 72.0286, + "loss": 280061.825, "step": 6170 }, { "epoch": 0.012483990998598076, - "grad_norm": 1943.7325439453125, + "grad_norm": 544790.4375, "learning_rate": 1.2360000000000001e-06, - "loss": 64.9412, + "loss": 243071.275, "step": 6180 }, { "epoch": 0.012504191631281891, - "grad_norm": 94.820068359375, + "grad_norm": 6720.89404296875, "learning_rate": 1.238e-06, - "loss": 66.6636, + "loss": 328967.4, "step": 6190 }, { "epoch": 0.012524392263965707, - "grad_norm": 151.39108276367188, + "grad_norm": 8823.3701171875, "learning_rate": 1.2400000000000002e-06, - "loss": 95.0998, + "loss": 249973.6, "step": 6200 }, { "epoch": 0.012544592896649522, - "grad_norm": 591.7532958984375, + "grad_norm": 269097.3125, "learning_rate": 1.2420000000000001e-06, - "loss": 65.9034, + "loss": 214273.75, "step": 6210 }, { "epoch": 0.012564793529333339, - "grad_norm": 327.3201599121094, + "grad_norm": 88231.3828125, "learning_rate": 1.244e-06, - "loss": 80.4414, + "loss": 174008.5375, "step": 6220 }, { "epoch": 0.012584994162017154, - "grad_norm": 1470.0701904296875, + "grad_norm": 1429332.125, "learning_rate": 1.2460000000000002e-06, - "loss": 100.0844, + "loss": 520317.85, "step": 6230 }, { "epoch": 0.01260519479470097, - "grad_norm": 938.1318359375, + "grad_norm": 14200.275390625, "learning_rate": 1.248e-06, - "loss": 52.7637, + "loss": 188254.9125, "step": 6240 }, { "epoch": 0.012625395427384785, - "grad_norm": 291.33367919921875, + "grad_norm": 40809.046875, "learning_rate": 1.25e-06, - "loss": 32.4566, + "loss": 149793.325, "step": 6250 }, { "epoch": 0.012645596060068601, - "grad_norm": 343.1535339355469, + "grad_norm": 75000.9140625, "learning_rate": 1.2520000000000003e-06, - "loss": 60.9567, + "loss": 156596.75, "step": 6260 }, { "epoch": 0.012665796692752416, - "grad_norm": 168.9444580078125, + "grad_norm": 25231.25, "learning_rate": 1.2540000000000002e-06, - "loss": 40.3749, + "loss": 128126.2, "step": 6270 }, { "epoch": 0.012685997325436233, - "grad_norm": 221.40597534179688, + "grad_norm": 42188.95703125, "learning_rate": 1.256e-06, - "loss": 55.3372, + "loss": 230712.775, "step": 6280 }, { "epoch": 0.012706197958120048, - "grad_norm": 56.085731506347656, + "grad_norm": 5412.009765625, "learning_rate": 1.258e-06, - "loss": 71.4992, + "loss": 170857.6125, "step": 6290 }, { "epoch": 0.012726398590803864, - "grad_norm": 2517.319091796875, + "grad_norm": 1305927.25, "learning_rate": 1.26e-06, - "loss": 70.7924, + "loss": 331268.925, "step": 6300 }, { "epoch": 0.012746599223487679, - "grad_norm": 459.1685791015625, + "grad_norm": 170967.03125, "learning_rate": 1.2620000000000002e-06, - "loss": 56.0217, + "loss": 220812.45, "step": 6310 }, { "epoch": 0.012766799856171495, - "grad_norm": 988.790771484375, + "grad_norm": 304431.40625, "learning_rate": 1.2640000000000003e-06, - "loss": 50.932, + "loss": 120354.475, "step": 6320 }, { "epoch": 0.01278700048885531, - "grad_norm": 573.5408935546875, + "grad_norm": 72249.4921875, "learning_rate": 1.266e-06, - "loss": 34.1787, + "loss": 111158.5, "step": 6330 }, { "epoch": 0.012807201121539127, - "grad_norm": 265.6327819824219, + "grad_norm": 83831.0625, "learning_rate": 1.268e-06, - "loss": 41.8507, + "loss": 109360.4375, "step": 6340 }, { "epoch": 0.012827401754222941, - "grad_norm": 296.12347412109375, + "grad_norm": 20683.271484375, "learning_rate": 1.2700000000000001e-06, - "loss": 106.3619, + "loss": 389372.85, "step": 6350 }, { "epoch": 0.012847602386906758, - "grad_norm": 297.1531982421875, + "grad_norm": 100931.78125, "learning_rate": 1.2720000000000003e-06, - "loss": 65.5945, + "loss": 232739.85, "step": 6360 }, { "epoch": 0.012867803019590573, - "grad_norm": 150.37347412109375, + "grad_norm": 5239.923828125, "learning_rate": 1.2740000000000002e-06, - "loss": 41.635, + "loss": 106887.4875, "step": 6370 }, { "epoch": 0.01288800365227439, - "grad_norm": 273.054443359375, + "grad_norm": 50467.11328125, "learning_rate": 1.276e-06, - "loss": 48.1564, + "loss": 155046.275, "step": 6380 }, { "epoch": 0.012908204284958204, - "grad_norm": 1045.2257080078125, + "grad_norm": 377263.0, "learning_rate": 1.278e-06, - "loss": 56.9141, + "loss": 186476.025, "step": 6390 }, { "epoch": 0.01292840491764202, - "grad_norm": 2872.592041015625, + "grad_norm": 886907.0625, "learning_rate": 1.28e-06, - "loss": 102.438, + "loss": 361586.175, "step": 6400 }, { "epoch": 0.012948605550325835, - "grad_norm": 486.7997741699219, + "grad_norm": 158851.6875, "learning_rate": 1.2820000000000002e-06, - "loss": 75.9217, + "loss": 248059.0, "step": 6410 }, { "epoch": 0.012968806183009652, - "grad_norm": 323.7727355957031, + "grad_norm": 122449.84375, "learning_rate": 1.284e-06, - "loss": 58.3205, + "loss": 212122.125, "step": 6420 }, { "epoch": 0.012989006815693467, - "grad_norm": 286.941650390625, + "grad_norm": 818.8440551757812, "learning_rate": 1.286e-06, - "loss": 42.9914, + "loss": 158087.6875, "step": 6430 }, { "epoch": 0.013009207448377283, - "grad_norm": 77.75645446777344, + "grad_norm": 9309.22265625, "learning_rate": 1.288e-06, - "loss": 59.1297, + "loss": 135013.3375, "step": 6440 }, { "epoch": 0.013029408081061098, - "grad_norm": 347.530517578125, + "grad_norm": 176511.71875, "learning_rate": 1.2900000000000001e-06, - "loss": 52.3165, + "loss": 190930.95, "step": 6450 }, { "epoch": 0.013049608713744915, - "grad_norm": 300.7527160644531, + "grad_norm": 127421.9375, "learning_rate": 1.2920000000000003e-06, - "loss": 76.1604, + "loss": 210179.925, "step": 6460 }, { "epoch": 0.01306980934642873, - "grad_norm": 98.0569839477539, + "grad_norm": 4826.8134765625, "learning_rate": 1.294e-06, - "loss": 55.7005, + "loss": 247025.7, "step": 6470 }, { "epoch": 0.013090009979112546, - "grad_norm": 414.9523010253906, + "grad_norm": 16342.23828125, "learning_rate": 1.296e-06, - "loss": 63.4408, + "loss": 320687.875, "step": 6480 }, { "epoch": 0.01311021061179636, - "grad_norm": 241.30056762695312, + "grad_norm": 57369.98828125, "learning_rate": 1.2980000000000001e-06, - "loss": 75.7492, + "loss": 245098.0, "step": 6490 }, { "epoch": 0.013130411244480177, - "grad_norm": 250.26959228515625, + "grad_norm": 23721.046875, "learning_rate": 1.3e-06, - "loss": 65.906, + "loss": 355139.15, "step": 6500 }, { "epoch": 0.013150611877163992, - "grad_norm": 788.426025390625, + "grad_norm": 142323.265625, "learning_rate": 1.3020000000000002e-06, - "loss": 42.9289, + "loss": 142262.75, "step": 6510 }, { "epoch": 0.013170812509847809, - "grad_norm": 789.620361328125, + "grad_norm": 443519.875, "learning_rate": 1.304e-06, - "loss": 61.2505, + "loss": 216430.85, "step": 6520 }, { "epoch": 0.013191013142531623, - "grad_norm": 239.33192443847656, + "grad_norm": 26617.560546875, "learning_rate": 1.306e-06, - "loss": 108.727, + "loss": 472621.75, "step": 6530 }, { "epoch": 0.01321121377521544, - "grad_norm": 123.12330627441406, + "grad_norm": 7654.61962890625, "learning_rate": 1.308e-06, - "loss": 43.9603, + "loss": 111619.85, "step": 6540 }, { "epoch": 0.013231414407899255, - "grad_norm": 204.96498107910156, + "grad_norm": 29514.83203125, "learning_rate": 1.3100000000000002e-06, - "loss": 59.4844, + "loss": 192930.1, "step": 6550 }, { "epoch": 0.013251615040583071, - "grad_norm": 235.30938720703125, + "grad_norm": 30480.56640625, "learning_rate": 1.3120000000000003e-06, - "loss": 69.4452, + "loss": 268452.825, "step": 6560 }, { "epoch": 0.013271815673266886, - "grad_norm": 814.299072265625, + "grad_norm": 545056.875, "learning_rate": 1.314e-06, - "loss": 86.4013, + "loss": 346121.85, "step": 6570 }, { "epoch": 0.013292016305950703, - "grad_norm": 1707.3133544921875, + "grad_norm": 1394649.125, "learning_rate": 1.316e-06, - "loss": 48.617, + "loss": 226595.8, "step": 6580 }, { "epoch": 0.013312216938634517, - "grad_norm": 476.1501770019531, + "grad_norm": 200816.625, "learning_rate": 1.3180000000000001e-06, - "loss": 69.3972, + "loss": 276027.675, "step": 6590 }, { "epoch": 0.013332417571318334, - "grad_norm": 264.0830078125, + "grad_norm": 39447.2109375, "learning_rate": 1.32e-06, - "loss": 78.6769, + "loss": 134441.475, "step": 6600 }, { "epoch": 0.013352618204002149, - "grad_norm": 153.24546813964844, + "grad_norm": 4620.87060546875, "learning_rate": 1.3220000000000002e-06, - "loss": 28.0659, + "loss": 174096.0375, "step": 6610 }, { "epoch": 0.013372818836685965, - "grad_norm": 292.97222900390625, + "grad_norm": 37517.1796875, "learning_rate": 1.324e-06, - "loss": 54.801, + "loss": 174373.45, "step": 6620 }, { "epoch": 0.01339301946936978, - "grad_norm": 224.08602905273438, + "grad_norm": 40001.0859375, "learning_rate": 1.326e-06, - "loss": 31.9852, + "loss": 111202.1, "step": 6630 }, { "epoch": 0.013413220102053596, - "grad_norm": 142.6312255859375, + "grad_norm": 17173.22265625, "learning_rate": 1.328e-06, - "loss": 70.548, + "loss": 345478.85, "step": 6640 }, { "epoch": 0.013433420734737411, - "grad_norm": 442.8785095214844, + "grad_norm": 178764.328125, "learning_rate": 1.3300000000000002e-06, - "loss": 45.6281, + "loss": 168041.4125, "step": 6650 }, { "epoch": 0.013453621367421228, - "grad_norm": 793.637451171875, + "grad_norm": 542910.625, "learning_rate": 1.3320000000000003e-06, - "loss": 60.038, + "loss": 153800.0, "step": 6660 }, { "epoch": 0.013473822000105043, - "grad_norm": 124.31806182861328, + "grad_norm": 17794.544921875, "learning_rate": 1.334e-06, - "loss": 57.615, + "loss": 192659.7, "step": 6670 }, { "epoch": 0.013494022632788859, - "grad_norm": 345.0965576171875, + "grad_norm": 70625.1484375, "learning_rate": 1.336e-06, - "loss": 87.4724, + "loss": 422425.2, "step": 6680 }, { "epoch": 0.013514223265472674, - "grad_norm": 245.18463134765625, + "grad_norm": 18070.994140625, "learning_rate": 1.3380000000000001e-06, - "loss": 34.2467, + "loss": 76139.775, "step": 6690 }, { "epoch": 0.01353442389815649, - "grad_norm": 1459.039306640625, + "grad_norm": 151185.28125, "learning_rate": 1.34e-06, - "loss": 62.9462, + "loss": 175536.225, "step": 6700 }, { "epoch": 0.013554624530840305, - "grad_norm": 68.5372085571289, + "grad_norm": 21520.4140625, "learning_rate": 1.3420000000000002e-06, - "loss": 32.351, + "loss": 123036.5, "step": 6710 }, { "epoch": 0.013574825163524122, - "grad_norm": 170.9087677001953, + "grad_norm": 98770.984375, "learning_rate": 1.344e-06, - "loss": 39.0703, + "loss": 84191.9438, "step": 6720 }, { "epoch": 0.013595025796207937, - "grad_norm": 222.70040893554688, + "grad_norm": 25659.365234375, "learning_rate": 1.3460000000000001e-06, - "loss": 99.5773, + "loss": 374248.875, "step": 6730 }, { "epoch": 0.013615226428891753, - "grad_norm": 458.9627990722656, + "grad_norm": 112567.7109375, "learning_rate": 1.348e-06, - "loss": 57.0565, + "loss": 124846.7875, "step": 6740 }, { "epoch": 0.013635427061575568, - "grad_norm": 197.03370666503906, + "grad_norm": 5139.02490234375, "learning_rate": 1.3500000000000002e-06, - "loss": 26.2809, + "loss": 54600.1937, "step": 6750 }, { "epoch": 0.013655627694259384, - "grad_norm": 479.0645751953125, + "grad_norm": 251627.25, "learning_rate": 1.352e-06, - "loss": 30.4892, + "loss": 104549.0312, "step": 6760 }, { "epoch": 0.0136758283269432, - "grad_norm": 1120.02783203125, + "grad_norm": 768132.75, "learning_rate": 1.354e-06, - "loss": 63.872, + "loss": 233092.8, "step": 6770 }, { "epoch": 0.013696028959627016, - "grad_norm": 709.2474365234375, + "grad_norm": 644533.625, "learning_rate": 1.356e-06, - "loss": 53.0257, + "loss": 200798.575, "step": 6780 }, { "epoch": 0.01371622959231083, - "grad_norm": 588.8958740234375, + "grad_norm": 147891.953125, "learning_rate": 1.3580000000000002e-06, - "loss": 37.0737, + "loss": 87579.2125, "step": 6790 }, { "epoch": 0.013736430224994647, - "grad_norm": 517.6036376953125, + "grad_norm": 44424.9375, "learning_rate": 1.3600000000000001e-06, - "loss": 57.4366, + "loss": 166592.65, "step": 6800 }, { "epoch": 0.013756630857678462, - "grad_norm": 636.5734252929688, + "grad_norm": 121960.2890625, "learning_rate": 1.362e-06, - "loss": 166.4559, + "loss": 425040.3, "step": 6810 }, { "epoch": 0.013776831490362278, - "grad_norm": 831.8811645507812, + "grad_norm": 373055.28125, "learning_rate": 1.364e-06, - "loss": 38.2919, + "loss": 93255.425, "step": 6820 }, { "epoch": 0.013797032123046093, - "grad_norm": 404.2745666503906, + "grad_norm": 65672.953125, "learning_rate": 1.3660000000000001e-06, - "loss": 72.2935, + "loss": 227529.675, "step": 6830 }, { "epoch": 0.01381723275572991, - "grad_norm": 589.676513671875, + "grad_norm": 415010.9375, "learning_rate": 1.368e-06, - "loss": 75.9369, + "loss": 460171.15, "step": 6840 }, { "epoch": 0.013837433388413724, - "grad_norm": 460.65155029296875, + "grad_norm": 268214.28125, "learning_rate": 1.3700000000000002e-06, - "loss": 72.7194, + "loss": 272207.925, "step": 6850 }, { "epoch": 0.013857634021097541, - "grad_norm": 424.5543518066406, + "grad_norm": 189850.546875, "learning_rate": 1.372e-06, - "loss": 51.3904, + "loss": 247564.55, "step": 6860 }, { "epoch": 0.013877834653781356, - "grad_norm": 166.091796875, + "grad_norm": 12004.3076171875, "learning_rate": 1.374e-06, - "loss": 33.0741, + "loss": 70316.4, "step": 6870 }, { "epoch": 0.013898035286465172, - "grad_norm": 274.93896484375, + "grad_norm": 89409.3125, "learning_rate": 1.376e-06, - "loss": 29.9831, + "loss": 116259.2625, "step": 6880 }, { "epoch": 0.013918235919148987, - "grad_norm": 334.6905517578125, + "grad_norm": 141290.4375, "learning_rate": 1.3780000000000002e-06, - "loss": 44.5172, + "loss": 193146.7375, "step": 6890 }, { "epoch": 0.013938436551832804, - "grad_norm": 276.1719665527344, + "grad_norm": 103762.84375, "learning_rate": 1.3800000000000001e-06, - "loss": 36.0919, + "loss": 117090.6375, "step": 6900 }, { "epoch": 0.013958637184516618, - "grad_norm": 325.14794921875, + "grad_norm": 284794.40625, "learning_rate": 1.382e-06, - "loss": 41.6051, + "loss": 157597.425, "step": 6910 }, { "epoch": 0.013978837817200435, - "grad_norm": 211.0586395263672, + "grad_norm": 45535.09375, "learning_rate": 1.384e-06, - "loss": 62.0934, + "loss": 388543.95, "step": 6920 }, { "epoch": 0.01399903844988425, - "grad_norm": 198.04037475585938, + "grad_norm": 61266.109375, "learning_rate": 1.3860000000000002e-06, - "loss": 39.1451, + "loss": 99332.3562, "step": 6930 }, { "epoch": 0.014019239082568066, - "grad_norm": 750.244140625, + "grad_norm": 649088.6875, "learning_rate": 1.388e-06, - "loss": 50.147, + "loss": 193117.75, "step": 6940 }, { "epoch": 0.014039439715251881, - "grad_norm": 328.3049621582031, + "grad_norm": 67062.78125, "learning_rate": 1.3900000000000002e-06, - "loss": 27.4373, + "loss": 76796.3188, "step": 6950 }, { "epoch": 0.014059640347935698, - "grad_norm": 1477.84130859375, + "grad_norm": 1006775.0, "learning_rate": 1.392e-06, - "loss": 84.3971, + "loss": 379582.75, "step": 6960 }, { "epoch": 0.014079840980619512, - "grad_norm": 462.97772216796875, + "grad_norm": 207541.71875, "learning_rate": 1.3940000000000001e-06, - "loss": 55.8385, + "loss": 241658.325, "step": 6970 }, { "epoch": 0.014100041613303329, - "grad_norm": 797.4838256835938, + "grad_norm": 430541.40625, "learning_rate": 1.396e-06, - "loss": 52.192, + "loss": 168067.6, "step": 6980 }, { "epoch": 0.014120242245987144, - "grad_norm": 156.33946228027344, + "grad_norm": 13868.9052734375, "learning_rate": 1.3980000000000002e-06, - "loss": 21.0467, + "loss": 76655.6125, "step": 6990 }, { "epoch": 0.01414044287867096, - "grad_norm": 229.4462890625, + "grad_norm": 8073.255859375, "learning_rate": 1.4000000000000001e-06, - "loss": 45.2043, + "loss": 117925.9125, "step": 7000 }, { "epoch": 0.014160643511354775, - "grad_norm": 361.8078918457031, + "grad_norm": 176640.828125, "learning_rate": 1.402e-06, - "loss": 40.1699, + "loss": 121438.325, "step": 7010 }, { "epoch": 0.014180844144038592, - "grad_norm": 297.76959228515625, + "grad_norm": 230740.59375, "learning_rate": 1.404e-06, - "loss": 48.5784, + "loss": 144349.675, "step": 7020 }, { "epoch": 0.014201044776722406, - "grad_norm": 256.14617919921875, + "grad_norm": 84460.0234375, "learning_rate": 1.4060000000000002e-06, - "loss": 45.7503, + "loss": 179634.55, "step": 7030 }, { "epoch": 0.014221245409406223, - "grad_norm": 298.47015380859375, + "grad_norm": 81148.1796875, "learning_rate": 1.4080000000000001e-06, - "loss": 51.799, + "loss": 223394.475, "step": 7040 }, { "epoch": 0.014241446042090038, - "grad_norm": 1568.6728515625, + "grad_norm": 109302.703125, "learning_rate": 1.41e-06, - "loss": 54.6099, + "loss": 116722.6625, "step": 7050 }, { "epoch": 0.014261646674773854, - "grad_norm": 192.48580932617188, + "grad_norm": 96607.40625, "learning_rate": 1.412e-06, - "loss": 24.9589, + "loss": 93284.1187, "step": 7060 }, { "epoch": 0.014281847307457669, - "grad_norm": 524.90771484375, + "grad_norm": 78391.2734375, "learning_rate": 1.4140000000000001e-06, - "loss": 39.4973, + "loss": 154028.55, "step": 7070 }, { "epoch": 0.014302047940141486, - "grad_norm": 391.25164794921875, + "grad_norm": 224134.953125, "learning_rate": 1.416e-06, - "loss": 42.5879, + "loss": 222894.625, "step": 7080 }, { "epoch": 0.0143222485728253, - "grad_norm": 183.65029907226562, + "grad_norm": 16769.279296875, "learning_rate": 1.4180000000000002e-06, - "loss": 44.8341, + "loss": 108194.325, "step": 7090 }, { "epoch": 0.014342449205509117, - "grad_norm": 338.1338806152344, + "grad_norm": 158996.6875, "learning_rate": 1.42e-06, - "loss": 35.527, + "loss": 106899.7875, "step": 7100 }, { "epoch": 0.014362649838192932, - "grad_norm": 543.2742919921875, + "grad_norm": 423994.59375, "learning_rate": 1.4220000000000001e-06, - "loss": 86.0665, + "loss": 436265.65, "step": 7110 }, { "epoch": 0.014382850470876748, - "grad_norm": 224.66915893554688, + "grad_norm": 20151.673828125, "learning_rate": 1.424e-06, - "loss": 33.3185, + "loss": 155395.7375, "step": 7120 }, { "epoch": 0.014403051103560563, - "grad_norm": 88.92449951171875, + "grad_norm": 4831.05224609375, "learning_rate": 1.4260000000000002e-06, - "loss": 34.1092, + "loss": 155968.8875, "step": 7130 }, { "epoch": 0.01442325173624438, - "grad_norm": 595.7294921875, + "grad_norm": 155692.4375, "learning_rate": 1.4280000000000001e-06, - "loss": 57.5506, + "loss": 116445.5125, "step": 7140 }, { "epoch": 0.014443452368928194, - "grad_norm": 447.6490173339844, + "grad_norm": 104348.765625, "learning_rate": 1.43e-06, - "loss": 29.7112, + "loss": 33732.4656, "step": 7150 }, { "epoch": 0.01446365300161201, - "grad_norm": 159.2474365234375, + "grad_norm": 77297.5703125, "learning_rate": 1.432e-06, - "loss": 30.5846, + "loss": 135118.7125, "step": 7160 }, { "epoch": 0.014483853634295826, - "grad_norm": 246.44580078125, + "grad_norm": 89996.6640625, "learning_rate": 1.4340000000000002e-06, - "loss": 67.1369, + "loss": 274482.0, "step": 7170 }, { "epoch": 0.014504054266979642, - "grad_norm": 54.01875305175781, + "grad_norm": 4797.0703125, "learning_rate": 1.436e-06, - "loss": 27.2304, + "loss": 134943.775, "step": 7180 }, { "epoch": 0.014524254899663457, - "grad_norm": 714.08203125, + "grad_norm": 33957.203125, "learning_rate": 1.4380000000000003e-06, - "loss": 49.9532, + "loss": 190302.0125, "step": 7190 }, { "epoch": 0.014544455532347273, - "grad_norm": 211.70220947265625, + "grad_norm": 26090.111328125, "learning_rate": 1.44e-06, - "loss": 53.6889, + "loss": 246593.75, "step": 7200 }, { "epoch": 0.014564656165031088, - "grad_norm": 68.74442291259766, + "grad_norm": 543.584228515625, "learning_rate": 1.4420000000000001e-06, - "loss": 50.0597, + "loss": 154278.2125, "step": 7210 }, { "epoch": 0.014584856797714905, - "grad_norm": 675.3694458007812, + "grad_norm": 236326.203125, "learning_rate": 1.444e-06, - "loss": 52.8109, + "loss": 243347.85, "step": 7220 }, { "epoch": 0.01460505743039872, - "grad_norm": 1048.88037109375, + "grad_norm": 596342.5, "learning_rate": 1.4460000000000002e-06, - "loss": 56.0821, + "loss": 213823.375, "step": 7230 }, { "epoch": 0.014625258063082536, - "grad_norm": 272.8748779296875, + "grad_norm": 73992.8515625, "learning_rate": 1.4480000000000002e-06, - "loss": 40.488, + "loss": 82177.8938, "step": 7240 }, { "epoch": 0.014645458695766351, - "grad_norm": 188.908447265625, + "grad_norm": 13114.7783203125, "learning_rate": 1.45e-06, - "loss": 28.6095, + "loss": 105268.85, "step": 7250 }, { "epoch": 0.014665659328450167, - "grad_norm": 178.1385955810547, + "grad_norm": 23606.841796875, "learning_rate": 1.452e-06, - "loss": 41.0875, + "loss": 160776.6, "step": 7260 }, { "epoch": 0.014685859961133982, - "grad_norm": 219.2093963623047, + "grad_norm": 209837.84375, "learning_rate": 1.4540000000000002e-06, - "loss": 61.8799, + "loss": 281638.425, "step": 7270 }, { "epoch": 0.014706060593817799, - "grad_norm": 905.444091796875, + "grad_norm": 217570.40625, "learning_rate": 1.4560000000000001e-06, - "loss": 51.7865, + "loss": 364104.35, "step": 7280 }, { "epoch": 0.014726261226501614, - "grad_norm": 118.24497985839844, + "grad_norm": 21951.427734375, "learning_rate": 1.4580000000000003e-06, - "loss": 37.4235, + "loss": 110277.275, "step": 7290 }, { "epoch": 0.01474646185918543, - "grad_norm": 231.55853271484375, + "grad_norm": 113760.4609375, "learning_rate": 1.46e-06, - "loss": 87.3908, + "loss": 266835.425, "step": 7300 }, { "epoch": 0.014766662491869245, - "grad_norm": 219.38470458984375, + "grad_norm": 66544.6953125, "learning_rate": 1.4620000000000001e-06, - "loss": 45.1549, + "loss": 282404.6, "step": 7310 }, { "epoch": 0.014786863124553061, - "grad_norm": 65.88916015625, + "grad_norm": 5108.46240234375, "learning_rate": 1.464e-06, - "loss": 62.2457, + "loss": 293096.775, "step": 7320 }, { "epoch": 0.014807063757236876, - "grad_norm": 345.18585205078125, + "grad_norm": 192183.296875, "learning_rate": 1.4660000000000002e-06, - "loss": 51.6268, + "loss": 197156.1625, "step": 7330 }, { "epoch": 0.014827264389920693, - "grad_norm": 85.6872329711914, + "grad_norm": 7179.640625, "learning_rate": 1.4680000000000002e-06, - "loss": 84.4872, + "loss": 352330.125, "step": 7340 }, { "epoch": 0.014847465022604507, - "grad_norm": 342.62701416015625, + "grad_norm": 57160.22265625, "learning_rate": 1.4700000000000001e-06, - "loss": 49.8818, + "loss": 175921.0625, "step": 7350 }, { "epoch": 0.014867665655288324, "grad_norm": 0.0, "learning_rate": 1.472e-06, - "loss": 52.9737, + "loss": 205848.9, "step": 7360 }, { "epoch": 0.014887866287972139, - "grad_norm": 78.58345794677734, + "grad_norm": 1354.9571533203125, "learning_rate": 1.4740000000000002e-06, - "loss": 50.7601, + "loss": 143368.7, "step": 7370 }, { "epoch": 0.014908066920655955, - "grad_norm": 257.71466064453125, + "grad_norm": 68485.09375, "learning_rate": 1.4760000000000001e-06, - "loss": 73.4479, + "loss": 226299.225, "step": 7380 }, { "epoch": 0.01492826755333977, - "grad_norm": 774.4879760742188, + "grad_norm": 618570.6875, "learning_rate": 1.478e-06, - "loss": 80.2251, + "loss": 180249.7625, "step": 7390 }, { "epoch": 0.014948468186023587, - "grad_norm": 672.1459350585938, + "grad_norm": 640672.0, "learning_rate": 1.48e-06, - "loss": 78.1241, + "loss": 280667.325, "step": 7400 }, { "epoch": 0.014968668818707401, - "grad_norm": 408.3189392089844, + "grad_norm": 27340.494140625, "learning_rate": 1.4820000000000002e-06, - "loss": 59.58, + "loss": 327345.45, "step": 7410 }, { "epoch": 0.014988869451391218, - "grad_norm": 576.1275634765625, + "grad_norm": 10149.701171875, "learning_rate": 1.4840000000000001e-06, - "loss": 42.909, + "loss": 142214.125, "step": 7420 }, { "epoch": 0.015009070084075033, - "grad_norm": 186.1236572265625, + "grad_norm": 9804.146484375, "learning_rate": 1.4860000000000003e-06, - "loss": 47.7544, + "loss": 159819.125, "step": 7430 }, { "epoch": 0.01502927071675885, - "grad_norm": 198.97328186035156, + "grad_norm": 10356.6875, "learning_rate": 1.488e-06, - "loss": 54.6158, + "loss": 120858.2375, "step": 7440 }, { "epoch": 0.015049471349442664, - "grad_norm": 239.58718872070312, + "grad_norm": 58825.515625, "learning_rate": 1.4900000000000001e-06, - "loss": 70.9197, + "loss": 196717.2, "step": 7450 }, { "epoch": 0.01506967198212648, - "grad_norm": 521.1500854492188, + "grad_norm": 34384.80078125, "learning_rate": 1.492e-06, - "loss": 54.2824, + "loss": 171238.75, "step": 7460 }, { "epoch": 0.015089872614810295, - "grad_norm": 133.13917541503906, + "grad_norm": 14983.0751953125, "learning_rate": 1.4940000000000002e-06, - "loss": 43.3351, + "loss": 153863.45, "step": 7470 }, { "epoch": 0.015110073247494112, - "grad_norm": 405.8093566894531, + "grad_norm": 160187.546875, "learning_rate": 1.4960000000000002e-06, - "loss": 62.4667, + "loss": 295292.35, "step": 7480 }, { "epoch": 0.015130273880177927, - "grad_norm": 255.1741485595703, + "grad_norm": 28309.86328125, "learning_rate": 1.498e-06, - "loss": 86.9749, + "loss": 329709.9, "step": 7490 }, { "epoch": 0.015150474512861743, - "grad_norm": 220.36903381347656, + "grad_norm": 44743.60546875, "learning_rate": 1.5e-06, - "loss": 89.2164, + "loss": 480984.25, "step": 7500 }, { "epoch": 0.015170675145545558, - "grad_norm": 461.5882568359375, + "grad_norm": 93355.0, "learning_rate": 1.5020000000000002e-06, - "loss": 64.1008, + "loss": 177684.75, "step": 7510 }, { "epoch": 0.015190875778229375, - "grad_norm": 325.4097595214844, + "grad_norm": 63803.03515625, "learning_rate": 1.5040000000000001e-06, - "loss": 33.0139, + "loss": 122679.175, "step": 7520 }, { "epoch": 0.01521107641091319, - "grad_norm": 234.7192840576172, + "grad_norm": 77338.3203125, "learning_rate": 1.5060000000000003e-06, - "loss": 94.2688, + "loss": 322276.725, "step": 7530 }, { "epoch": 0.015231277043597006, - "grad_norm": 1088.190185546875, + "grad_norm": 928060.75, "learning_rate": 1.508e-06, - "loss": 47.6814, + "loss": 202830.6125, "step": 7540 }, { "epoch": 0.01525147767628082, - "grad_norm": 593.3748168945312, + "grad_norm": 327036.90625, "learning_rate": 1.5100000000000002e-06, - "loss": 31.7104, + "loss": 106976.225, "step": 7550 }, { "epoch": 0.015271678308964637, - "grad_norm": 446.8435974121094, + "grad_norm": 162417.1875, "learning_rate": 1.512e-06, - "loss": 55.3187, + "loss": 197134.075, "step": 7560 }, { "epoch": 0.015291878941648452, - "grad_norm": 840.773193359375, + "grad_norm": 609896.25, "learning_rate": 1.5140000000000002e-06, - "loss": 54.7462, + "loss": 197494.9875, "step": 7570 }, { "epoch": 0.015312079574332269, - "grad_norm": 947.057373046875, + "grad_norm": 711554.75, "learning_rate": 1.5160000000000002e-06, - "loss": 57.9198, + "loss": 193812.75, "step": 7580 }, { "epoch": 0.015332280207016083, - "grad_norm": 636.2012329101562, + "grad_norm": 274021.25, "learning_rate": 1.5180000000000001e-06, - "loss": 35.4827, + "loss": 90001.8, "step": 7590 }, { "epoch": 0.0153524808396999, - "grad_norm": 373.7704162597656, + "grad_norm": 164636.296875, "learning_rate": 1.52e-06, - "loss": 31.5516, + "loss": 86268.1375, "step": 7600 }, { "epoch": 0.015372681472383715, - "grad_norm": 1601.3780517578125, + "grad_norm": 1308259.0, "learning_rate": 1.5220000000000002e-06, - "loss": 76.4885, + "loss": 353620.3, "step": 7610 }, { "epoch": 0.015392882105067531, - "grad_norm": 140.75204467773438, + "grad_norm": 22790.62109375, "learning_rate": 1.5240000000000001e-06, - "loss": 51.5626, + "loss": 142802.825, "step": 7620 }, { "epoch": 0.015413082737751346, - "grad_norm": 119.63394165039062, + "grad_norm": 22653.013671875, "learning_rate": 1.5260000000000003e-06, - "loss": 67.5972, + "loss": 272719.175, "step": 7630 }, { "epoch": 0.015433283370435162, - "grad_norm": 161.45762634277344, + "grad_norm": 34531.84375, "learning_rate": 1.528e-06, - "loss": 22.2136, + "loss": 71541.9625, "step": 7640 }, { "epoch": 0.015453484003118977, - "grad_norm": 231.3303680419922, + "grad_norm": 29007.041015625, "learning_rate": 1.5300000000000002e-06, - "loss": 47.4064, + "loss": 190821.05, "step": 7650 }, { "epoch": 0.015473684635802794, - "grad_norm": 482.3003845214844, + "grad_norm": 324881.75, "learning_rate": 1.5320000000000001e-06, - "loss": 80.9816, + "loss": 354236.45, "step": 7660 }, { "epoch": 0.015493885268486609, - "grad_norm": 168.20175170898438, + "grad_norm": 27903.099609375, "learning_rate": 1.5340000000000003e-06, - "loss": 36.5181, + "loss": 111809.4875, "step": 7670 }, { "epoch": 0.015514085901170425, - "grad_norm": 1920.1688232421875, + "grad_norm": 1334918.625, "learning_rate": 1.536e-06, - "loss": 64.7135, + "loss": 514472.3, "step": 7680 }, { "epoch": 0.01553428653385424, - "grad_norm": 313.94708251953125, + "grad_norm": 156925.0625, "learning_rate": 1.5380000000000001e-06, - "loss": 37.5383, + "loss": 217691.3, "step": 7690 }, { "epoch": 0.015554487166538056, - "grad_norm": 425.00714111328125, + "grad_norm": 165765.828125, "learning_rate": 1.54e-06, - "loss": 45.3157, + "loss": 185836.5375, "step": 7700 }, { "epoch": 0.015574687799221871, - "grad_norm": 86.49829864501953, + "grad_norm": 10360.9951171875, "learning_rate": 1.5420000000000002e-06, - "loss": 43.9076, + "loss": 155044.075, "step": 7710 }, { "epoch": 0.015594888431905688, - "grad_norm": 229.47691345214844, + "grad_norm": 91174.984375, "learning_rate": 1.5440000000000002e-06, - "loss": 66.7047, + "loss": 285316.05, "step": 7720 }, { "epoch": 0.015615089064589503, - "grad_norm": 2023.368408203125, + "grad_norm": 2422477.25, "learning_rate": 1.546e-06, - "loss": 37.3324, + "loss": 308131.1, "step": 7730 }, { "epoch": 0.01563528969727332, - "grad_norm": 101.54640197753906, + "grad_norm": 12157.2255859375, "learning_rate": 1.548e-06, - "loss": 37.2192, + "loss": 82299.3125, "step": 7740 }, { "epoch": 0.015655490329957136, - "grad_norm": 452.2474670410156, + "grad_norm": 237615.234375, "learning_rate": 1.5500000000000002e-06, - "loss": 33.3281, + "loss": 156149.475, "step": 7750 }, { "epoch": 0.01567569096264095, - "grad_norm": 153.22509765625, + "grad_norm": 17269.208984375, "learning_rate": 1.5520000000000001e-06, - "loss": 53.4306, + "loss": 240618.15, "step": 7760 }, { "epoch": 0.015695891595324765, - "grad_norm": 303.5377502441406, + "grad_norm": 93391.2890625, "learning_rate": 1.5540000000000003e-06, - "loss": 72.9343, + "loss": 365253.45, "step": 7770 }, { "epoch": 0.01571609222800858, - "grad_norm": 256.4889831542969, + "grad_norm": 205575.21875, "learning_rate": 1.556e-06, - "loss": 36.4374, + "loss": 119333.5125, "step": 7780 }, { "epoch": 0.015736292860692398, - "grad_norm": 449.4687805175781, + "grad_norm": 216630.65625, "learning_rate": 1.5580000000000002e-06, - "loss": 56.3437, + "loss": 288919.15, "step": 7790 }, { "epoch": 0.01575649349337621, - "grad_norm": 433.4064636230469, + "grad_norm": 205580.484375, "learning_rate": 1.56e-06, - "loss": 61.6731, + "loss": 306144.425, "step": 7800 }, { "epoch": 0.015776694126060028, - "grad_norm": 600.9246215820312, + "grad_norm": 154740.140625, "learning_rate": 1.5620000000000002e-06, - "loss": 63.9666, + "loss": 207686.075, "step": 7810 }, { "epoch": 0.015796894758743844, - "grad_norm": 333.37652587890625, + "grad_norm": 112742.796875, "learning_rate": 1.5640000000000002e-06, - "loss": 31.7235, + "loss": 116146.725, "step": 7820 }, { "epoch": 0.01581709539142766, - "grad_norm": 632.3251953125, + "grad_norm": 175746.265625, "learning_rate": 1.566e-06, - "loss": 23.0323, + "loss": 35449.6438, "step": 7830 }, { "epoch": 0.015837296024111474, - "grad_norm": 181.81378173828125, + "grad_norm": 53609.9609375, "learning_rate": 1.568e-06, - "loss": 56.7787, + "loss": 311988.525, "step": 7840 }, { "epoch": 0.01585749665679529, - "grad_norm": 149.82843017578125, + "grad_norm": 1675.403076171875, "learning_rate": 1.5700000000000002e-06, - "loss": 30.9736, + "loss": 118235.975, "step": 7850 }, { "epoch": 0.015877697289479107, - "grad_norm": 163.4024200439453, + "grad_norm": 79036.703125, "learning_rate": 1.5720000000000002e-06, - "loss": 66.7266, + "loss": 333110.15, "step": 7860 }, { "epoch": 0.015897897922162924, - "grad_norm": 272.0547180175781, + "grad_norm": 121041.5078125, "learning_rate": 1.5740000000000003e-06, - "loss": 47.1427, + "loss": 131227.175, "step": 7870 }, { "epoch": 0.015918098554846737, - "grad_norm": 975.4248046875, + "grad_norm": 968334.125, "learning_rate": 1.576e-06, - "loss": 43.4788, + "loss": 213540.95, "step": 7880 }, { "epoch": 0.015938299187530553, - "grad_norm": 598.1261596679688, + "grad_norm": 373632.90625, "learning_rate": 1.5780000000000002e-06, - "loss": 59.9543, + "loss": 231027.975, "step": 7890 }, { "epoch": 0.01595849982021437, - "grad_norm": 402.0766296386719, + "grad_norm": 194557.671875, "learning_rate": 1.5800000000000001e-06, - "loss": 58.3692, + "loss": 186443.0875, "step": 7900 }, { "epoch": 0.015978700452898186, - "grad_norm": 197.23599243164062, + "grad_norm": 17203.94921875, "learning_rate": 1.5820000000000003e-06, - "loss": 38.9944, + "loss": 109924.1875, "step": 7910 }, { "epoch": 0.015998901085582, - "grad_norm": 477.9730529785156, + "grad_norm": 99493.1484375, "learning_rate": 1.5840000000000002e-06, - "loss": 75.8041, + "loss": 254302.475, "step": 7920 }, { "epoch": 0.016019101718265816, - "grad_norm": 377.2072448730469, + "grad_norm": 115524.3984375, "learning_rate": 1.586e-06, - "loss": 57.0376, + "loss": 323464.5, "step": 7930 }, { "epoch": 0.016039302350949632, - "grad_norm": 113.2324447631836, + "grad_norm": 24736.4765625, "learning_rate": 1.588e-06, - "loss": 63.0597, + "loss": 344560.075, "step": 7940 }, { "epoch": 0.01605950298363345, - "grad_norm": 564.6503295898438, + "grad_norm": 389144.84375, "learning_rate": 1.5900000000000002e-06, - "loss": 78.9989, + "loss": 260230.3, "step": 7950 }, { "epoch": 0.016079703616317262, - "grad_norm": 238.32669067382812, + "grad_norm": 21696.59375, "learning_rate": 1.5920000000000002e-06, - "loss": 30.8973, + "loss": 187144.6625, "step": 7960 }, { "epoch": 0.01609990424900108, - "grad_norm": 418.0289611816406, + "grad_norm": 107240.8203125, "learning_rate": 1.594e-06, - "loss": 56.0625, + "loss": 172624.25, "step": 7970 }, { "epoch": 0.016120104881684895, - "grad_norm": 172.59120178222656, + "grad_norm": 5977.24072265625, "learning_rate": 1.596e-06, - "loss": 38.1346, + "loss": 184867.5625, "step": 7980 }, { "epoch": 0.01614030551436871, - "grad_norm": 675.3900146484375, + "grad_norm": 349368.46875, "learning_rate": 1.5980000000000002e-06, - "loss": 57.6103, + "loss": 174286.6, "step": 7990 }, { "epoch": 0.016160506147052525, - "grad_norm": 425.52471923828125, + "grad_norm": 36486.40234375, "learning_rate": 1.6000000000000001e-06, - "loss": 61.3249, + "loss": 170603.55, "step": 8000 }, { "epoch": 0.01618070677973634, - "grad_norm": 302.97198486328125, + "grad_norm": 126595.5, "learning_rate": 1.6020000000000003e-06, - "loss": 54.2579, + "loss": 253838.6, "step": 8010 }, { "epoch": 0.016200907412420158, - "grad_norm": 216.4959259033203, + "grad_norm": 29300.46875, "learning_rate": 1.604e-06, - "loss": 42.2368, + "loss": 68367.35, "step": 8020 }, { "epoch": 0.016221108045103974, - "grad_norm": 188.3519744873047, + "grad_norm": 15894.7734375, "learning_rate": 1.606e-06, - "loss": 39.4254, + "loss": 217096.0, "step": 8030 }, { "epoch": 0.016241308677787787, - "grad_norm": 997.1007080078125, + "grad_norm": 650139.0625, "learning_rate": 1.608e-06, - "loss": 79.8198, + "loss": 426402.9, "step": 8040 }, { "epoch": 0.016261509310471604, - "grad_norm": 195.25997924804688, + "grad_norm": 12715.7294921875, "learning_rate": 1.6100000000000003e-06, - "loss": 77.7452, + "loss": 329479.75, "step": 8050 }, { "epoch": 0.01628170994315542, - "grad_norm": 239.44577026367188, + "grad_norm": 253209.921875, "learning_rate": 1.6120000000000002e-06, - "loss": 43.4959, + "loss": 130440.9875, "step": 8060 }, { "epoch": 0.016301910575839237, - "grad_norm": 484.91546630859375, + "grad_norm": 247684.90625, "learning_rate": 1.614e-06, - "loss": 91.6501, + "loss": 496236.25, "step": 8070 }, { "epoch": 0.01632211120852305, - "grad_norm": 179.45724487304688, + "grad_norm": 101178.1328125, "learning_rate": 1.616e-06, - "loss": 66.2526, + "loss": 350229.925, "step": 8080 }, { "epoch": 0.016342311841206866, - "grad_norm": 298.34918212890625, + "grad_norm": 24327.1796875, "learning_rate": 1.6180000000000002e-06, - "loss": 31.2584, + "loss": 78622.45, "step": 8090 }, { "epoch": 0.016362512473890683, - "grad_norm": 783.3357543945312, + "grad_norm": 155916.9375, "learning_rate": 1.6200000000000002e-06, - "loss": 60.7814, + "loss": 345574.075, "step": 8100 }, { "epoch": 0.0163827131065745, - "grad_norm": 666.5510864257812, + "grad_norm": 442321.15625, "learning_rate": 1.6220000000000003e-06, - "loss": 96.3631, + "loss": 297430.1, "step": 8110 }, { "epoch": 0.016402913739258312, - "grad_norm": 211.1580352783203, + "grad_norm": 273138.96875, "learning_rate": 1.624e-06, - "loss": 41.1815, + "loss": 186881.4125, "step": 8120 }, { "epoch": 0.01642311437194213, - "grad_norm": 1165.19287109375, + "grad_norm": 1176609.0, "learning_rate": 1.626e-06, - "loss": 59.7263, + "loss": 269783.95, "step": 8130 }, { "epoch": 0.016443315004625945, - "grad_norm": 537.1383056640625, + "grad_norm": 86809.6796875, "learning_rate": 1.6280000000000001e-06, - "loss": 31.1325, + "loss": 93812.3687, "step": 8140 }, { "epoch": 0.016463515637309762, - "grad_norm": 219.5115966796875, + "grad_norm": 104506.09375, "learning_rate": 1.6300000000000003e-06, - "loss": 58.5545, + "loss": 291599.3, "step": 8150 }, { "epoch": 0.016483716269993575, - "grad_norm": 392.9336853027344, + "grad_norm": 61653.7890625, "learning_rate": 1.6320000000000002e-06, - "loss": 40.9983, + "loss": 94548.425, "step": 8160 }, { "epoch": 0.01650391690267739, - "grad_norm": 500.58673095703125, + "grad_norm": 267629.125, "learning_rate": 1.634e-06, - "loss": 26.4883, + "loss": 69935.0625, "step": 8170 }, { "epoch": 0.016524117535361208, - "grad_norm": 230.2061767578125, + "grad_norm": 51978.8203125, "learning_rate": 1.636e-06, - "loss": 69.0027, + "loss": 221986.55, "step": 8180 }, { "epoch": 0.016544318168045025, - "grad_norm": 139.67991638183594, + "grad_norm": 13824.529296875, "learning_rate": 1.6380000000000002e-06, - "loss": 40.0236, + "loss": 166805.5375, "step": 8190 }, { "epoch": 0.016564518800728838, - "grad_norm": 151.8848419189453, + "grad_norm": 19078.29296875, "learning_rate": 1.6400000000000002e-06, - "loss": 62.8098, + "loss": 281617.65, "step": 8200 }, { "epoch": 0.016584719433412654, - "grad_norm": 158.05601501464844, + "grad_norm": 48939.94921875, "learning_rate": 1.6420000000000003e-06, - "loss": 43.0802, + "loss": 98684.1062, "step": 8210 }, { "epoch": 0.01660492006609647, - "grad_norm": 207.73497009277344, + "grad_norm": 4527.85693359375, "learning_rate": 1.644e-06, - "loss": 41.6115, + "loss": 98289.4625, "step": 8220 }, { "epoch": 0.016625120698780287, - "grad_norm": 417.2087707519531, + "grad_norm": 73550.125, "learning_rate": 1.646e-06, - "loss": 70.0724, + "loss": 270273.175, "step": 8230 }, { "epoch": 0.0166453213314641, - "grad_norm": 93.3095932006836, + "grad_norm": 24748.099609375, "learning_rate": 1.6480000000000001e-06, - "loss": 69.3706, + "loss": 257013.325, "step": 8240 }, { "epoch": 0.016665521964147917, - "grad_norm": 329.656494140625, + "grad_norm": 2765.24365234375, "learning_rate": 1.6500000000000003e-06, - "loss": 34.4144, + "loss": 99217.75, "step": 8250 }, { "epoch": 0.016685722596831733, - "grad_norm": 126.59026336669922, + "grad_norm": 22675.955078125, "learning_rate": 1.6520000000000002e-06, - "loss": 53.5684, + "loss": 196459.475, "step": 8260 }, { "epoch": 0.01670592322951555, - "grad_norm": 140.4243621826172, + "grad_norm": 27652.763671875, "learning_rate": 1.654e-06, - "loss": 43.6177, + "loss": 129225.1, "step": 8270 }, { "epoch": 0.016726123862199363, - "grad_norm": 353.848876953125, + "grad_norm": 123883.890625, "learning_rate": 1.6560000000000001e-06, - "loss": 53.1289, + "loss": 246811.7, "step": 8280 }, { "epoch": 0.01674632449488318, - "grad_norm": 335.8575134277344, + "grad_norm": 80747.5859375, "learning_rate": 1.6580000000000003e-06, - "loss": 85.2379, + "loss": 352840.075, "step": 8290 }, { "epoch": 0.016766525127566996, - "grad_norm": 278.6214904785156, + "grad_norm": 127759.5390625, "learning_rate": 1.6600000000000002e-06, - "loss": 63.4511, + "loss": 304458.525, "step": 8300 }, { "epoch": 0.016786725760250813, - "grad_norm": 179.34422302246094, + "grad_norm": 75219.8984375, "learning_rate": 1.662e-06, - "loss": 72.0042, + "loss": 341796.325, "step": 8310 }, { "epoch": 0.016806926392934626, - "grad_norm": 93.28134155273438, + "grad_norm": 1235.9111328125, "learning_rate": 1.664e-06, - "loss": 45.7476, + "loss": 171970.0625, "step": 8320 }, { "epoch": 0.016827127025618442, - "grad_norm": 499.8504943847656, + "grad_norm": 219026.453125, "learning_rate": 1.666e-06, - "loss": 82.3932, + "loss": 249504.475, "step": 8330 }, { "epoch": 0.01684732765830226, - "grad_norm": 238.96231079101562, + "grad_norm": 51439.18359375, "learning_rate": 1.6680000000000002e-06, - "loss": 57.8808, + "loss": 242490.275, "step": 8340 }, { "epoch": 0.016867528290986075, - "grad_norm": 115.48772430419922, + "grad_norm": 128323.8984375, "learning_rate": 1.6700000000000003e-06, - "loss": 56.5755, + "loss": 268959.25, "step": 8350 }, { "epoch": 0.016887728923669888, - "grad_norm": 564.8267822265625, + "grad_norm": 258862.8125, "learning_rate": 1.672e-06, - "loss": 52.928, + "loss": 176406.275, "step": 8360 }, { "epoch": 0.016907929556353705, - "grad_norm": 166.8284454345703, + "grad_norm": 15073.0986328125, "learning_rate": 1.674e-06, - "loss": 43.1772, + "loss": 170636.275, "step": 8370 }, { "epoch": 0.01692813018903752, - "grad_norm": 146.07325744628906, + "grad_norm": 6223.2353515625, "learning_rate": 1.6760000000000001e-06, - "loss": 31.1367, + "loss": 86845.65, "step": 8380 }, { "epoch": 0.016948330821721338, - "grad_norm": 354.1470031738281, + "grad_norm": 26275.232421875, "learning_rate": 1.6780000000000003e-06, - "loss": 34.8656, + "loss": 102728.2875, "step": 8390 }, { "epoch": 0.01696853145440515, - "grad_norm": 262.3908996582031, + "grad_norm": 33051.6015625, "learning_rate": 1.6800000000000002e-06, - "loss": 32.7354, + "loss": 124043.0, "step": 8400 }, { "epoch": 0.016988732087088967, - "grad_norm": 135.3816680908203, + "grad_norm": 37339.703125, "learning_rate": 1.682e-06, - "loss": 43.6971, + "loss": 175611.0, "step": 8410 }, { "epoch": 0.017008932719772784, - "grad_norm": 307.46795654296875, + "grad_norm": 17978.5703125, "learning_rate": 1.684e-06, - "loss": 49.57, + "loss": 167716.6375, "step": 8420 }, { "epoch": 0.0170291333524566, - "grad_norm": 268.0341491699219, + "grad_norm": 147148.234375, "learning_rate": 1.686e-06, - "loss": 30.8596, + "loss": 166211.925, "step": 8430 }, { "epoch": 0.017049333985140414, - "grad_norm": 144.54171752929688, + "grad_norm": 5670.3740234375, "learning_rate": 1.6880000000000002e-06, - "loss": 37.8622, + "loss": 94768.8625, "step": 8440 }, { "epoch": 0.01706953461782423, - "grad_norm": 437.869384765625, + "grad_norm": 96811.6796875, "learning_rate": 1.6900000000000003e-06, - "loss": 76.1461, + "loss": 189864.6875, "step": 8450 }, { "epoch": 0.017089735250508047, - "grad_norm": 190.57923889160156, + "grad_norm": 125581.515625, "learning_rate": 1.692e-06, - "loss": 54.4853, + "loss": 264883.95, "step": 8460 }, { "epoch": 0.017109935883191863, - "grad_norm": 359.6463928222656, + "grad_norm": 95154.7890625, "learning_rate": 1.694e-06, - "loss": 91.8604, + "loss": 507918.55, "step": 8470 }, { "epoch": 0.017130136515875676, - "grad_norm": 144.54615783691406, + "grad_norm": 5532.56396484375, "learning_rate": 1.6960000000000002e-06, - "loss": 44.5617, + "loss": 214438.325, "step": 8480 }, { "epoch": 0.017150337148559493, - "grad_norm": 274.204345703125, + "grad_norm": 51728.6796875, "learning_rate": 1.6980000000000003e-06, - "loss": 76.0625, + "loss": 256925.9, "step": 8490 }, { "epoch": 0.01717053778124331, - "grad_norm": 633.35986328125, + "grad_norm": 274411.75, "learning_rate": 1.7000000000000002e-06, - "loss": 47.2993, + "loss": 180156.825, "step": 8500 }, { "epoch": 0.017190738413927126, - "grad_norm": 455.1701354980469, + "grad_norm": 199173.625, "learning_rate": 1.702e-06, - "loss": 61.543, + "loss": 280492.425, "step": 8510 }, { "epoch": 0.01721093904661094, - "grad_norm": 325.9155578613281, + "grad_norm": 126698.953125, "learning_rate": 1.7040000000000001e-06, - "loss": 45.0754, + "loss": 190648.025, "step": 8520 }, { "epoch": 0.017231139679294755, - "grad_norm": 97.58197784423828, + "grad_norm": 7434.88671875, "learning_rate": 1.706e-06, - "loss": 26.2271, + "loss": 95093.725, "step": 8530 }, { "epoch": 0.017251340311978572, - "grad_norm": 443.0581970214844, + "grad_norm": 398367.71875, "learning_rate": 1.7080000000000002e-06, - "loss": 78.8859, + "loss": 339804.25, "step": 8540 }, { "epoch": 0.01727154094466239, - "grad_norm": 1419.3890380859375, + "grad_norm": 1540372.0, "learning_rate": 1.7100000000000004e-06, - "loss": 93.4078, + "loss": 342819.8, "step": 8550 }, { "epoch": 0.0172917415773462, - "grad_norm": 643.1996459960938, + "grad_norm": 367394.8125, "learning_rate": 1.712e-06, - "loss": 60.312, + "loss": 203929.875, "step": 8560 }, { "epoch": 0.017311942210030018, - "grad_norm": 587.92138671875, + "grad_norm": 566595.1875, "learning_rate": 1.714e-06, - "loss": 35.0556, + "loss": 249264.175, "step": 8570 }, { "epoch": 0.017332142842713835, - "grad_norm": 157.58848571777344, + "grad_norm": 26283.34765625, "learning_rate": 1.7160000000000002e-06, - "loss": 20.5768, + "loss": 72706.6562, "step": 8580 }, { "epoch": 0.01735234347539765, - "grad_norm": 369.1510925292969, + "grad_norm": 127820.8671875, "learning_rate": 1.7180000000000003e-06, - "loss": 39.7221, + "loss": 80102.15, "step": 8590 }, { "epoch": 0.017372544108081464, - "grad_norm": 424.2974853515625, + "grad_norm": 199204.296875, "learning_rate": 1.72e-06, - "loss": 74.7076, + "loss": 335037.5, "step": 8600 }, { "epoch": 0.01739274474076528, - "grad_norm": 399.80169677734375, + "grad_norm": 74149.9765625, "learning_rate": 1.722e-06, - "loss": 59.7491, + "loss": 248309.8, "step": 8610 }, { "epoch": 0.017412945373449097, - "grad_norm": 601.986572265625, + "grad_norm": 427194.75, "learning_rate": 1.7240000000000001e-06, - "loss": 58.6193, + "loss": 320019.25, "step": 8620 }, { "epoch": 0.017433146006132914, - "grad_norm": 306.30322265625, + "grad_norm": 89093.0, "learning_rate": 1.726e-06, - "loss": 43.8858, + "loss": 221804.625, "step": 8630 }, { "epoch": 0.017453346638816727, - "grad_norm": 472.3922119140625, + "grad_norm": 345310.25, "learning_rate": 1.7280000000000002e-06, - "loss": 59.512, + "loss": 466206.65, "step": 8640 }, { "epoch": 0.017473547271500543, - "grad_norm": 228.1231689453125, + "grad_norm": 301045.59375, "learning_rate": 1.73e-06, - "loss": 30.1505, + "loss": 115214.7625, "step": 8650 }, { "epoch": 0.01749374790418436, - "grad_norm": 114.92398834228516, + "grad_norm": 21997.884765625, "learning_rate": 1.732e-06, - "loss": 42.0227, + "loss": 136823.675, "step": 8660 }, { "epoch": 0.017513948536868176, - "grad_norm": 386.7256774902344, + "grad_norm": 129498.2265625, "learning_rate": 1.734e-06, - "loss": 30.8715, + "loss": 121666.225, "step": 8670 }, { "epoch": 0.01753414916955199, - "grad_norm": 253.56845092773438, + "grad_norm": 108470.65625, "learning_rate": 1.7360000000000002e-06, - "loss": 64.0922, + "loss": 300269.675, "step": 8680 }, { "epoch": 0.017554349802235806, - "grad_norm": 123.68321990966797, + "grad_norm": 13974.2802734375, "learning_rate": 1.7380000000000003e-06, - "loss": 42.714, + "loss": 68140.5562, "step": 8690 }, { "epoch": 0.017574550434919622, - "grad_norm": 475.31451416015625, + "grad_norm": 462673.21875, "learning_rate": 1.74e-06, - "loss": 48.9452, + "loss": 273311.075, "step": 8700 }, { "epoch": 0.01759475106760344, - "grad_norm": 698.922607421875, + "grad_norm": 413977.25, "learning_rate": 1.742e-06, - "loss": 61.3517, + "loss": 202715.7875, "step": 8710 }, { "epoch": 0.017614951700287252, - "grad_norm": 499.5632019042969, + "grad_norm": 329861.15625, "learning_rate": 1.7440000000000002e-06, - "loss": 20.2815, + "loss": 61341.1875, "step": 8720 }, { "epoch": 0.01763515233297107, - "grad_norm": 156.6888427734375, + "grad_norm": 22969.1484375, "learning_rate": 1.746e-06, - "loss": 43.4921, + "loss": 111252.7875, "step": 8730 }, { "epoch": 0.017655352965654885, - "grad_norm": 238.9498748779297, + "grad_norm": 141690.265625, "learning_rate": 1.7480000000000002e-06, - "loss": 66.6753, + "loss": 185266.3, "step": 8740 }, { "epoch": 0.0176755535983387, - "grad_norm": 112.41715240478516, + "grad_norm": 11485.2763671875, "learning_rate": 1.75e-06, - "loss": 74.7779, + "loss": 233691.35, "step": 8750 }, { "epoch": 0.017695754231022515, - "grad_norm": 277.79583740234375, + "grad_norm": 29335.814453125, "learning_rate": 1.7520000000000001e-06, - "loss": 46.157, + "loss": 120279.4375, "step": 8760 }, { "epoch": 0.01771595486370633, - "grad_norm": 286.8831481933594, + "grad_norm": 130628.3125, "learning_rate": 1.754e-06, - "loss": 45.5434, + "loss": 142427.1625, "step": 8770 }, { "epoch": 0.017736155496390148, - "grad_norm": 336.81591796875, + "grad_norm": 352035.15625, "learning_rate": 1.7560000000000002e-06, - "loss": 29.6937, + "loss": 112508.2875, "step": 8780 }, { "epoch": 0.017756356129073964, - "grad_norm": 168.8451690673828, + "grad_norm": 22694.732421875, "learning_rate": 1.7580000000000004e-06, - "loss": 66.5047, + "loss": 291657.275, "step": 8790 }, { "epoch": 0.017776556761757777, - "grad_norm": 188.74234008789062, + "grad_norm": 5431.76806640625, "learning_rate": 1.76e-06, - "loss": 58.4439, + "loss": 198684.8625, "step": 8800 }, { "epoch": 0.017796757394441594, - "grad_norm": 189.65457153320312, + "grad_norm": 14876.6875, "learning_rate": 1.762e-06, - "loss": 45.4092, + "loss": 81385.4563, "step": 8810 }, { "epoch": 0.01781695802712541, - "grad_norm": 272.9226379394531, + "grad_norm": 39579.82421875, "learning_rate": 1.7640000000000002e-06, - "loss": 44.9742, + "loss": 118722.0, "step": 8820 }, { "epoch": 0.017837158659809227, - "grad_norm": 273.947509765625, + "grad_norm": 106284.8046875, "learning_rate": 1.7660000000000001e-06, - "loss": 35.8658, + "loss": 147111.0875, "step": 8830 }, { "epoch": 0.01785735929249304, - "grad_norm": 634.2229614257812, + "grad_norm": 752857.375, "learning_rate": 1.7680000000000003e-06, - "loss": 46.6809, + "loss": 203821.325, "step": 8840 }, { "epoch": 0.017877559925176856, - "grad_norm": 413.4915466308594, + "grad_norm": 111270.703125, "learning_rate": 1.77e-06, - "loss": 62.6405, + "loss": 207412.6375, "step": 8850 }, { "epoch": 0.017897760557860673, - "grad_norm": 269.6117248535156, + "grad_norm": 55082.8671875, "learning_rate": 1.7720000000000001e-06, - "loss": 33.1034, + "loss": 70750.7, "step": 8860 }, { "epoch": 0.01791796119054449, - "grad_norm": 294.054931640625, + "grad_norm": 69754.140625, "learning_rate": 1.774e-06, - "loss": 46.5627, + "loss": 137613.8625, "step": 8870 }, { "epoch": 0.017938161823228303, - "grad_norm": 448.3184509277344, + "grad_norm": 213694.59375, "learning_rate": 1.7760000000000002e-06, - "loss": 38.8997, + "loss": 109314.1125, "step": 8880 }, { "epoch": 0.01795836245591212, - "grad_norm": 393.34515380859375, + "grad_norm": 234735.375, "learning_rate": 1.7780000000000004e-06, - "loss": 45.0721, + "loss": 154708.425, "step": 8890 }, { "epoch": 0.017978563088595936, - "grad_norm": 223.7178955078125, + "grad_norm": 42629.8671875, "learning_rate": 1.7800000000000001e-06, - "loss": 38.89, + "loss": 131700.4, "step": 8900 }, { "epoch": 0.017998763721279752, - "grad_norm": 425.2696228027344, + "grad_norm": 138410.25, "learning_rate": 1.782e-06, - "loss": 38.7017, + "loss": 92197.9688, "step": 8910 }, { "epoch": 0.018018964353963565, - "grad_norm": 161.086181640625, + "grad_norm": 12872.5224609375, "learning_rate": 1.7840000000000002e-06, - "loss": 62.1248, + "loss": 209992.25, "step": 8920 }, { "epoch": 0.018039164986647382, - "grad_norm": 495.1114807128906, + "grad_norm": 124019.953125, "learning_rate": 1.7860000000000001e-06, - "loss": 55.0598, + "loss": 184136.525, "step": 8930 }, { "epoch": 0.0180593656193312, - "grad_norm": 495.57708740234375, + "grad_norm": 198214.28125, "learning_rate": 1.788e-06, - "loss": 39.2305, + "loss": 123898.775, "step": 8940 }, { "epoch": 0.018079566252015015, - "grad_norm": 80.4286880493164, + "grad_norm": 3564.497314453125, "learning_rate": 1.79e-06, - "loss": 42.2665, + "loss": 161231.75, "step": 8950 }, { "epoch": 0.018099766884698828, - "grad_norm": 471.2948913574219, + "grad_norm": 130117.265625, "learning_rate": 1.7920000000000002e-06, - "loss": 66.7034, + "loss": 293862.45, "step": 8960 }, { "epoch": 0.018119967517382644, - "grad_norm": 1077.8858642578125, + "grad_norm": 655578.875, "learning_rate": 1.794e-06, - "loss": 44.6607, + "loss": 190018.45, "step": 8970 }, { "epoch": 0.01814016815006646, - "grad_norm": 314.0081787109375, + "grad_norm": 224807.703125, "learning_rate": 1.7960000000000003e-06, - "loss": 39.0553, + "loss": 103708.875, "step": 8980 }, { "epoch": 0.018160368782750277, - "grad_norm": 1193.4112548828125, + "grad_norm": 978972.9375, "learning_rate": 1.798e-06, - "loss": 51.7662, + "loss": 189932.2875, "step": 8990 }, { "epoch": 0.01818056941543409, - "grad_norm": 170.75794982910156, + "grad_norm": 59526.234375, "learning_rate": 1.8000000000000001e-06, - "loss": 44.9323, + "loss": 243750.075, "step": 9000 }, { "epoch": 0.018200770048117907, - "grad_norm": 397.14892578125, + "grad_norm": 44576.1328125, "learning_rate": 1.802e-06, - "loss": 49.9452, + "loss": 196037.2625, "step": 9010 }, { "epoch": 0.018220970680801724, - "grad_norm": 737.1470947265625, + "grad_norm": 602477.5, "learning_rate": 1.8040000000000002e-06, - "loss": 41.7371, + "loss": 169600.575, "step": 9020 }, { "epoch": 0.01824117131348554, - "grad_norm": 250.91387939453125, + "grad_norm": 367696.65625, "learning_rate": 1.8060000000000002e-06, - "loss": 38.1328, + "loss": 258548.3, "step": 9030 }, { "epoch": 0.018261371946169353, - "grad_norm": 265.27801513671875, + "grad_norm": 159485.984375, "learning_rate": 1.808e-06, - "loss": 40.8418, + "loss": 181713.625, "step": 9040 }, { "epoch": 0.01828157257885317, - "grad_norm": 380.687255859375, + "grad_norm": 49286.4296875, "learning_rate": 1.81e-06, - "loss": 36.5224, + "loss": 95812.0, "step": 9050 }, { "epoch": 0.018301773211536986, - "grad_norm": 462.69659423828125, + "grad_norm": 192718.546875, "learning_rate": 1.8120000000000002e-06, - "loss": 54.9324, + "loss": 168421.825, "step": 9060 }, { "epoch": 0.018321973844220803, - "grad_norm": 355.5519104003906, + "grad_norm": 116852.0546875, "learning_rate": 1.8140000000000001e-06, - "loss": 56.7894, + "loss": 116111.2, "step": 9070 }, { "epoch": 0.018342174476904616, - "grad_norm": 163.27798461914062, + "grad_norm": 6888.126953125, "learning_rate": 1.8160000000000003e-06, - "loss": 22.1431, + "loss": 58578.4375, "step": 9080 }, { "epoch": 0.018362375109588432, - "grad_norm": 232.2137451171875, + "grad_norm": 119071.859375, "learning_rate": 1.818e-06, - "loss": 70.4994, + "loss": 148952.65, "step": 9090 }, { "epoch": 0.01838257574227225, - "grad_norm": 121.7843017578125, + "grad_norm": 6403.80810546875, "learning_rate": 1.8200000000000002e-06, - "loss": 78.3187, + "loss": 358409.025, "step": 9100 }, { "epoch": 0.018402776374956065, - "grad_norm": 295.08087158203125, + "grad_norm": 8376.9775390625, "learning_rate": 1.822e-06, - "loss": 41.4374, + "loss": 261476.575, "step": 9110 }, { "epoch": 0.01842297700763988, - "grad_norm": 484.17034912109375, + "grad_norm": 162831.734375, "learning_rate": 1.8240000000000002e-06, - "loss": 91.9978, + "loss": 424168.55, "step": 9120 }, { "epoch": 0.018443177640323695, "grad_norm": 0.0, "learning_rate": 1.8260000000000002e-06, - "loss": 39.86, + "loss": 204041.7875, "step": 9130 }, { "epoch": 0.01846337827300751, - "grad_norm": 408.32794189453125, + "grad_norm": 367768.71875, "learning_rate": 1.8280000000000001e-06, - "loss": 51.7569, + "loss": 204617.725, "step": 9140 }, { "epoch": 0.018483578905691325, - "grad_norm": 757.9410400390625, + "grad_norm": 396576.09375, "learning_rate": 1.83e-06, - "loss": 43.8201, + "loss": 310049.325, "step": 9150 }, { "epoch": 0.01850377953837514, - "grad_norm": 339.34014892578125, + "grad_norm": 11562.51953125, "learning_rate": 1.8320000000000002e-06, - "loss": 32.6057, + "loss": 62880.875, "step": 9160 }, { "epoch": 0.018523980171058958, - "grad_norm": 267.2773742675781, + "grad_norm": 94717.1953125, "learning_rate": 1.8340000000000001e-06, - "loss": 49.1096, + "loss": 226470.175, "step": 9170 }, { "epoch": 0.018544180803742774, - "grad_norm": 138.07858276367188, + "grad_norm": 10801.46875, "learning_rate": 1.8360000000000003e-06, - "loss": 50.861, + "loss": 144022.975, "step": 9180 }, { "epoch": 0.018564381436426587, - "grad_norm": 732.4011840820312, + "grad_norm": 456426.6875, "learning_rate": 1.838e-06, - "loss": 50.626, + "loss": 224213.525, "step": 9190 }, { "epoch": 0.018584582069110404, - "grad_norm": 459.9410705566406, + "grad_norm": 125187.3984375, "learning_rate": 1.8400000000000002e-06, - "loss": 77.7155, + "loss": 392661.075, "step": 9200 }, { "epoch": 0.01860478270179422, - "grad_norm": 361.04827880859375, + "grad_norm": 156210.359375, "learning_rate": 1.8420000000000001e-06, - "loss": 56.3962, + "loss": 306096.425, "step": 9210 }, { "epoch": 0.018624983334478037, - "grad_norm": 103.32390594482422, + "grad_norm": 29360.91015625, "learning_rate": 1.8440000000000003e-06, - "loss": 34.9536, + "loss": 182847.875, "step": 9220 }, { "epoch": 0.01864518396716185, - "grad_norm": 258.6226501464844, + "grad_norm": 35038.0078125, "learning_rate": 1.846e-06, - "loss": 40.2146, + "loss": 164709.4625, "step": 9230 }, { "epoch": 0.018665384599845666, - "grad_norm": 69.1675033569336, + "grad_norm": 2821.8828125, "learning_rate": 1.8480000000000001e-06, - "loss": 31.9552, + "loss": 133397.2875, "step": 9240 }, { "epoch": 0.018685585232529483, - "grad_norm": 538.9363403320312, + "grad_norm": 199431.71875, "learning_rate": 1.85e-06, - "loss": 39.1298, + "loss": 128709.275, "step": 9250 }, { "epoch": 0.0187057858652133, - "grad_norm": 197.433837890625, + "grad_norm": 61770.203125, "learning_rate": 1.8520000000000002e-06, - "loss": 35.7704, + "loss": 92683.5188, "step": 9260 }, { "epoch": 0.018725986497897112, - "grad_norm": 257.5333251953125, + "grad_norm": 70081.2265625, "learning_rate": 1.8540000000000002e-06, - "loss": 28.1576, + "loss": 128537.775, "step": 9270 }, { "epoch": 0.01874618713058093, - "grad_norm": 513.22314453125, + "grad_norm": 387805.875, "learning_rate": 1.856e-06, - "loss": 44.4404, + "loss": 131184.45, "step": 9280 }, { "epoch": 0.018766387763264746, - "grad_norm": 539.5836181640625, + "grad_norm": 230471.1875, "learning_rate": 1.858e-06, - "loss": 25.1197, + "loss": 67048.1625, "step": 9290 }, { "epoch": 0.018786588395948562, - "grad_norm": 821.5946655273438, + "grad_norm": 269256.9375, "learning_rate": 1.8600000000000002e-06, - "loss": 60.9803, + "loss": 280743.125, "step": 9300 }, { "epoch": 0.018806789028632375, - "grad_norm": 614.6856689453125, + "grad_norm": 196129.0, "learning_rate": 1.8620000000000001e-06, - "loss": 72.3941, + "loss": 277547.475, "step": 9310 }, { "epoch": 0.01882698966131619, - "grad_norm": 442.805908203125, + "grad_norm": 167322.703125, "learning_rate": 1.8640000000000003e-06, - "loss": 53.3565, + "loss": 226679.35, "step": 9320 }, { "epoch": 0.018847190294000008, - "grad_norm": 345.6584167480469, + "grad_norm": 105825.3203125, "learning_rate": 1.866e-06, - "loss": 58.1192, + "loss": 161138.0875, "step": 9330 }, { "epoch": 0.018867390926683825, - "grad_norm": 157.91995239257812, + "grad_norm": 74800.1953125, "learning_rate": 1.8680000000000002e-06, - "loss": 62.5507, + "loss": 320423.5, "step": 9340 }, { "epoch": 0.018887591559367638, - "grad_norm": 75.21997833251953, + "grad_norm": 14199.1162109375, "learning_rate": 1.87e-06, - "loss": 22.4524, + "loss": 65298.0563, "step": 9350 }, { "epoch": 0.018907792192051454, - "grad_norm": 256.8834228515625, + "grad_norm": 79235.25, "learning_rate": 1.8720000000000002e-06, - "loss": 37.4442, + "loss": 154080.5625, "step": 9360 }, { "epoch": 0.01892799282473527, - "grad_norm": 272.7713623046875, + "grad_norm": 49315.17578125, "learning_rate": 1.8740000000000002e-06, - "loss": 56.0822, + "loss": 208302.325, "step": 9370 }, { "epoch": 0.018948193457419087, - "grad_norm": 998.1771240234375, + "grad_norm": 766886.3125, "learning_rate": 1.8760000000000001e-06, - "loss": 31.9965, + "loss": 150688.75, "step": 9380 }, { "epoch": 0.0189683940901029, - "grad_norm": 119.96626281738281, + "grad_norm": 20780.0, "learning_rate": 1.878e-06, - "loss": 27.9579, + "loss": 138434.3875, "step": 9390 }, { "epoch": 0.018988594722786717, - "grad_norm": 696.4967651367188, + "grad_norm": 277064.71875, "learning_rate": 1.8800000000000002e-06, - "loss": 64.9537, + "loss": 228644.625, "step": 9400 }, { "epoch": 0.019008795355470533, - "grad_norm": 272.74456787109375, + "grad_norm": 136873.3125, "learning_rate": 1.8820000000000001e-06, - "loss": 55.4981, + "loss": 292500.625, "step": 9410 }, { "epoch": 0.01902899598815435, - "grad_norm": 132.8701171875, + "grad_norm": 170678.828125, "learning_rate": 1.8840000000000003e-06, - "loss": 45.7956, + "loss": 266123.575, "step": 9420 }, { "epoch": 0.019049196620838163, - "grad_norm": 301.0440368652344, + "grad_norm": 68716.0390625, "learning_rate": 1.886e-06, - "loss": 54.8154, + "loss": 277233.6, "step": 9430 }, { "epoch": 0.01906939725352198, - "grad_norm": 0.0, + "grad_norm": 1533938.625, "learning_rate": 1.8880000000000002e-06, - "loss": 39.3911, + "loss": 267523.6, "step": 9440 }, { "epoch": 0.019089597886205796, - "grad_norm": 126.65670013427734, + "grad_norm": 17387.728515625, "learning_rate": 1.8900000000000001e-06, - "loss": 33.7083, + "loss": 118217.85, "step": 9450 }, { "epoch": 0.019109798518889613, - "grad_norm": 363.18896484375, + "grad_norm": 75370.25, "learning_rate": 1.8920000000000003e-06, - "loss": 35.3334, + "loss": 107929.3125, "step": 9460 }, { "epoch": 0.019129999151573426, - "grad_norm": 409.2659606933594, + "grad_norm": 198404.15625, "learning_rate": 1.8940000000000002e-06, - "loss": 50.183, + "loss": 234778.525, "step": 9470 }, { "epoch": 0.019150199784257242, - "grad_norm": 589.9176025390625, + "grad_norm": 359127.84375, "learning_rate": 1.8960000000000001e-06, - "loss": 47.419, + "loss": 177440.8125, "step": 9480 }, { "epoch": 0.01917040041694106, - "grad_norm": 1499.8275146484375, + "grad_norm": 1569546.625, "learning_rate": 1.898e-06, - "loss": 62.8974, + "loss": 367530.6, "step": 9490 }, { "epoch": 0.019190601049624875, - "grad_norm": 370.2489318847656, + "grad_norm": 193981.125, "learning_rate": 1.9000000000000002e-06, - "loss": 49.096, + "loss": 189880.7625, "step": 9500 }, { "epoch": 0.01921080168230869, - "grad_norm": 108.92589569091797, + "grad_norm": 33809.28515625, "learning_rate": 1.9020000000000002e-06, - "loss": 31.4124, + "loss": 138347.9625, "step": 9510 }, { "epoch": 0.019231002314992505, - "grad_norm": 317.8792419433594, + "grad_norm": 60968.015625, "learning_rate": 1.9040000000000003e-06, - "loss": 65.8406, + "loss": 259975.275, "step": 9520 }, { "epoch": 0.01925120294767632, - "grad_norm": 227.07623291015625, + "grad_norm": 79259.734375, "learning_rate": 1.906e-06, - "loss": 49.0239, + "loss": 211220.425, "step": 9530 }, { "epoch": 0.019271403580360138, - "grad_norm": 235.9082794189453, + "grad_norm": 88129.421875, "learning_rate": 1.908e-06, - "loss": 42.3599, + "loss": 128965.1375, "step": 9540 }, { "epoch": 0.01929160421304395, - "grad_norm": 496.91766357421875, + "grad_norm": 200650.171875, "learning_rate": 1.9100000000000003e-06, - "loss": 39.7962, + "loss": 171273.3, "step": 9550 }, { "epoch": 0.019311804845727767, - "grad_norm": 622.4961547851562, + "grad_norm": 110858.8515625, "learning_rate": 1.912e-06, - "loss": 51.8609, + "loss": 187053.725, "step": 9560 }, { "epoch": 0.019332005478411584, - "grad_norm": 177.71238708496094, + "grad_norm": 8799.7587890625, "learning_rate": 1.9140000000000002e-06, - "loss": 27.1392, + "loss": 91186.3, "step": 9570 }, { "epoch": 0.0193522061110954, - "grad_norm": 122.27787780761719, + "grad_norm": 18846.958984375, "learning_rate": 1.916e-06, - "loss": 36.2883, + "loss": 90929.15, "step": 9580 }, { "epoch": 0.019372406743779214, "grad_norm": 0.0, "learning_rate": 1.918e-06, - "loss": 53.6395, + "loss": 217471.675, "step": 9590 }, { "epoch": 0.01939260737646303, - "grad_norm": 430.7233581542969, + "grad_norm": 70123.9140625, "learning_rate": 1.9200000000000003e-06, - "loss": 27.4208, + "loss": 96851.6313, "step": 9600 }, { "epoch": 0.019412808009146847, - "grad_norm": 460.03204345703125, + "grad_norm": 269410.8125, "learning_rate": 1.9220000000000004e-06, - "loss": 35.7222, + "loss": 138341.3, "step": 9610 }, { "epoch": 0.019433008641830663, - "grad_norm": 351.02545166015625, + "grad_norm": 69017.5859375, "learning_rate": 1.924e-06, - "loss": 27.4592, + "loss": 88187.5625, "step": 9620 }, { "epoch": 0.019453209274514476, - "grad_norm": 139.60052490234375, + "grad_norm": 30629.16796875, "learning_rate": 1.9260000000000003e-06, - "loss": 100.1594, + "loss": 288017.35, "step": 9630 }, { "epoch": 0.019473409907198293, - "grad_norm": 538.0855102539062, + "grad_norm": 436847.375, "learning_rate": 1.928e-06, - "loss": 33.5064, + "loss": 149871.825, "step": 9640 }, { "epoch": 0.01949361053988211, - "grad_norm": 55.12922668457031, + "grad_norm": 3049.2880859375, "learning_rate": 1.93e-06, - "loss": 43.873, + "loss": 141081.45, "step": 9650 }, { "epoch": 0.019513811172565926, - "grad_norm": 787.0375366210938, + "grad_norm": 753976.5625, "learning_rate": 1.9320000000000003e-06, - "loss": 50.113, + "loss": 248549.3, "step": 9660 }, { "epoch": 0.01953401180524974, - "grad_norm": 229.5850372314453, + "grad_norm": 88150.890625, "learning_rate": 1.934e-06, - "loss": 39.9231, + "loss": 117371.15, "step": 9670 }, { "epoch": 0.019554212437933555, - "grad_norm": 602.0407104492188, + "grad_norm": 180739.5625, "learning_rate": 1.936e-06, - "loss": 30.7054, + "loss": 92679.7625, "step": 9680 }, { "epoch": 0.019574413070617372, - "grad_norm": 451.3493957519531, + "grad_norm": 12266.8837890625, "learning_rate": 1.9380000000000003e-06, - "loss": 60.3465, + "loss": 277797.85, "step": 9690 }, { "epoch": 0.01959461370330119, - "grad_norm": 376.4764099121094, + "grad_norm": 63206.10546875, "learning_rate": 1.94e-06, - "loss": 71.5062, + "loss": 340878.0, "step": 9700 }, { "epoch": 0.019614814335985, - "grad_norm": 235.15089416503906, + "grad_norm": 29524.6796875, "learning_rate": 1.942e-06, - "loss": 34.5826, + "loss": 87905.9875, "step": 9710 }, { "epoch": 0.019635014968668818, - "grad_norm": 28.095537185668945, + "grad_norm": 2980.3837890625, "learning_rate": 1.944e-06, - "loss": 23.4616, + "loss": 100447.4937, "step": 9720 }, { "epoch": 0.019655215601352635, - "grad_norm": 751.23193359375, + "grad_norm": 63209.9375, "learning_rate": 1.946e-06, - "loss": 45.8394, + "loss": 152497.9125, "step": 9730 }, { "epoch": 0.01967541623403645, - "grad_norm": 1215.552734375, + "grad_norm": 714168.5625, "learning_rate": 1.9480000000000002e-06, - "loss": 69.7944, + "loss": 206707.5875, "step": 9740 }, { "epoch": 0.019695616866720264, - "grad_norm": 299.5130615234375, + "grad_norm": 116997.9765625, "learning_rate": 1.9500000000000004e-06, - "loss": 27.4116, + "loss": 91741.55, "step": 9750 }, { "epoch": 0.01971581749940408, - "grad_norm": 189.2348175048828, + "grad_norm": 23982.330078125, "learning_rate": 1.952e-06, - "loss": 38.8738, + "loss": 116716.925, "step": 9760 }, { "epoch": 0.019736018132087897, - "grad_norm": 193.2762908935547, + "grad_norm": 10120.0703125, "learning_rate": 1.9540000000000003e-06, - "loss": 44.1289, + "loss": 140564.675, "step": 9770 }, { "epoch": 0.019756218764771714, - "grad_norm": 157.08790588378906, + "grad_norm": 129205.390625, "learning_rate": 1.956e-06, - "loss": 25.5136, + "loss": 98927.9625, "step": 9780 }, { "epoch": 0.019776419397455527, - "grad_norm": 276.8238525390625, + "grad_norm": 18854.40234375, "learning_rate": 1.958e-06, - "loss": 75.9879, + "loss": 455151.9, "step": 9790 }, { "epoch": 0.019796620030139343, - "grad_norm": 173.24652099609375, + "grad_norm": 30223.154296875, "learning_rate": 1.9600000000000003e-06, - "loss": 60.5741, + "loss": 151692.825, "step": 9800 }, { "epoch": 0.01981682066282316, - "grad_norm": 514.0528564453125, + "grad_norm": 207403.171875, "learning_rate": 1.9620000000000004e-06, - "loss": 35.3093, + "loss": 111132.25, "step": 9810 }, { "epoch": 0.019837021295506976, - "grad_norm": 699.653076171875, + "grad_norm": 246225.703125, "learning_rate": 1.964e-06, - "loss": 43.2077, + "loss": 246638.15, "step": 9820 }, { "epoch": 0.01985722192819079, - "grad_norm": 358.9642028808594, + "grad_norm": 170809.28125, "learning_rate": 1.9660000000000003e-06, - "loss": 18.847, + "loss": 42310.4812, "step": 9830 }, { "epoch": 0.019877422560874606, - "grad_norm": 82.87079620361328, + "grad_norm": 9566.841796875, "learning_rate": 1.968e-06, - "loss": 41.4592, + "loss": 188284.4125, "step": 9840 }, { "epoch": 0.019897623193558422, - "grad_norm": 327.3822326660156, + "grad_norm": 98163.96875, "learning_rate": 1.97e-06, - "loss": 42.3351, + "loss": 155659.175, "step": 9850 }, { "epoch": 0.01991782382624224, - "grad_norm": 414.3736267089844, + "grad_norm": 262884.5, "learning_rate": 1.972e-06, - "loss": 70.6372, + "loss": 231699.225, "step": 9860 }, { "epoch": 0.019938024458926052, - "grad_norm": 453.106201171875, + "grad_norm": 110923.5, "learning_rate": 1.974e-06, - "loss": 53.4148, + "loss": 209592.35, "step": 9870 }, { "epoch": 0.01995822509160987, - "grad_norm": 313.9532775878906, + "grad_norm": 107043.8515625, "learning_rate": 1.9760000000000002e-06, - "loss": 66.0835, + "loss": 274136.15, "step": 9880 }, { "epoch": 0.019978425724293685, - "grad_norm": 423.7301025390625, + "grad_norm": 66834.2109375, "learning_rate": 1.9780000000000004e-06, - "loss": 45.8556, + "loss": 186796.9875, "step": 9890 }, { "epoch": 0.0199986263569775, - "grad_norm": 169.80453491210938, + "grad_norm": 9290.884765625, "learning_rate": 1.98e-06, - "loss": 39.9943, + "loss": 177494.725, "step": 9900 }, { "epoch": 0.020018826989661315, - "grad_norm": 731.8103637695312, + "grad_norm": 599032.1875, "learning_rate": 1.982e-06, - "loss": 25.0085, + "loss": 111563.4875, "step": 9910 }, { "epoch": 0.02003902762234513, - "grad_norm": 529.6599731445312, + "grad_norm": 72662.015625, "learning_rate": 1.984e-06, - "loss": 36.5183, + "loss": 73317.3625, "step": 9920 }, { "epoch": 0.020059228255028948, - "grad_norm": 213.07713317871094, + "grad_norm": 10192.505859375, "learning_rate": 1.986e-06, - "loss": 58.5261, + "loss": 294030.8, "step": 9930 }, { "epoch": 0.020079428887712764, - "grad_norm": 309.75762939453125, + "grad_norm": 43543.66015625, "learning_rate": 1.9880000000000003e-06, - "loss": 46.0943, + "loss": 511361.6, "step": 9940 }, { "epoch": 0.020099629520396577, - "grad_norm": 264.3821716308594, + "grad_norm": 39661.44921875, "learning_rate": 1.9900000000000004e-06, - "loss": 28.8483, + "loss": 92557.65, "step": 9950 }, { "epoch": 0.020119830153080394, - "grad_norm": 172.79559326171875, + "grad_norm": 9985.0087890625, "learning_rate": 1.992e-06, - "loss": 40.6877, + "loss": 128515.75, "step": 9960 }, { "epoch": 0.02014003078576421, - "grad_norm": 689.9242553710938, + "grad_norm": 1029568.75, "learning_rate": 1.9940000000000003e-06, - "loss": 53.548, + "loss": 308160.925, "step": 9970 }, { "epoch": 0.020160231418448027, - "grad_norm": 223.47425842285156, + "grad_norm": 116495.5703125, "learning_rate": 1.996e-06, - "loss": 52.1972, + "loss": 239910.725, "step": 9980 }, { "epoch": 0.02018043205113184, "grad_norm": 0.0, "learning_rate": 1.998e-06, - "loss": 39.6995, + "loss": 168247.3, "step": 9990 }, { "epoch": 0.020200632683815656, - "grad_norm": 368.7276306152344, + "grad_norm": 308445.5625, "learning_rate": 2.0000000000000003e-06, - "loss": 34.0271, + "loss": 165289.625, "step": 10000 }, { "epoch": 0.020220833316499473, - "grad_norm": 332.8701477050781, + "grad_norm": 250755.9375, "learning_rate": 2.002e-06, - "loss": 34.1079, + "loss": 94914.2688, "step": 10010 }, { "epoch": 0.02024103394918329, - "grad_norm": 132.14431762695312, + "grad_norm": 122606.5703125, "learning_rate": 2.004e-06, - "loss": 51.5085, + "loss": 275573.05, "step": 10020 }, { "epoch": 0.020261234581867103, - "grad_norm": 640.9298706054688, + "grad_norm": 692375.875, "learning_rate": 2.0060000000000004e-06, - "loss": 32.4422, + "loss": 132832.875, "step": 10030 }, { "epoch": 0.02028143521455092, - "grad_norm": 231.58595275878906, + "grad_norm": 92652.0078125, "learning_rate": 2.008e-06, - "loss": 24.953, + "loss": 57588.075, "step": 10040 }, { "epoch": 0.020301635847234736, - "grad_norm": 244.0689697265625, + "grad_norm": 94873.296875, "learning_rate": 2.0100000000000002e-06, - "loss": 47.1794, + "loss": 219276.325, "step": 10050 }, { "epoch": 0.020321836479918552, - "grad_norm": 739.2943725585938, + "grad_norm": 370621.03125, "learning_rate": 2.012e-06, - "loss": 62.5894, + "loss": 199631.15, "step": 10060 }, { "epoch": 0.020342037112602365, - "grad_norm": 266.4908752441406, + "grad_norm": 110412.3828125, "learning_rate": 2.014e-06, - "loss": 54.2614, + "loss": 194975.5125, "step": 10070 }, { "epoch": 0.020362237745286182, - "grad_norm": 477.4635314941406, + "grad_norm": 61865.3984375, "learning_rate": 2.0160000000000003e-06, - "loss": 54.2506, + "loss": 243265.275, "step": 10080 }, { "epoch": 0.02038243837797, - "grad_norm": 156.4163818359375, + "grad_norm": 14634.5927734375, "learning_rate": 2.0180000000000004e-06, - "loss": 25.1236, + "loss": 66251.7875, "step": 10090 }, { "epoch": 0.020402639010653815, - "grad_norm": 609.5767822265625, + "grad_norm": 53267.6796875, "learning_rate": 2.02e-06, - "loss": 58.6514, + "loss": 239047.4, "step": 10100 }, { "epoch": 0.020422839643337628, - "grad_norm": 595.8328247070312, + "grad_norm": 203086.296875, "learning_rate": 2.022e-06, - "loss": 45.1517, + "loss": 127655.95, "step": 10110 }, { "epoch": 0.020443040276021444, "grad_norm": 0.0, "learning_rate": 2.024e-06, - "loss": 54.5122, + "loss": 239519.5, "step": 10120 }, { "epoch": 0.02046324090870526, - "grad_norm": 164.32199096679688, + "grad_norm": 16439.02734375, "learning_rate": 2.026e-06, - "loss": 34.128, + "loss": 90778.0063, "step": 10130 }, { "epoch": 0.020483441541389077, - "grad_norm": 569.0836181640625, + "grad_norm": 566007.0625, "learning_rate": 2.0280000000000003e-06, - "loss": 36.4883, + "loss": 215094.975, "step": 10140 }, { "epoch": 0.02050364217407289, - "grad_norm": 134.37161254882812, + "grad_norm": 6451.05322265625, "learning_rate": 2.0300000000000005e-06, - "loss": 53.0233, + "loss": 200342.075, "step": 10150 }, { "epoch": 0.020523842806756707, - "grad_norm": 370.50897216796875, + "grad_norm": 24278.5234375, "learning_rate": 2.032e-06, - "loss": 55.7171, + "loss": 314518.825, "step": 10160 }, { "epoch": 0.020544043439440524, - "grad_norm": 181.24600219726562, + "grad_norm": 42175.890625, "learning_rate": 2.0340000000000003e-06, - "loss": 72.5106, + "loss": 369188.475, "step": 10170 }, { "epoch": 0.02056424407212434, - "grad_norm": 224.57296752929688, + "grad_norm": 53275.93359375, "learning_rate": 2.036e-06, - "loss": 57.2052, + "loss": 230838.725, "step": 10180 }, { "epoch": 0.020584444704808153, - "grad_norm": 468.5647277832031, + "grad_norm": 277483.125, "learning_rate": 2.0380000000000002e-06, - "loss": 67.7405, + "loss": 295991.675, "step": 10190 }, { "epoch": 0.02060464533749197, - "grad_norm": 224.03176879882812, + "grad_norm": 51335.8359375, "learning_rate": 2.04e-06, - "loss": 20.6499, + "loss": 54281.5875, "step": 10200 }, { "epoch": 0.020624845970175786, - "grad_norm": 91.05741119384766, + "grad_norm": 10198.130859375, "learning_rate": 2.042e-06, - "loss": 34.0487, + "loss": 257343.825, "step": 10210 }, { "epoch": 0.020645046602859603, - "grad_norm": 317.8121337890625, + "grad_norm": 74369.859375, "learning_rate": 2.0440000000000003e-06, - "loss": 76.1383, + "loss": 281244.425, "step": 10220 }, { "epoch": 0.020665247235543416, - "grad_norm": 171.6361846923828, + "grad_norm": 13320.4033203125, "learning_rate": 2.0460000000000004e-06, - "loss": 26.1916, + "loss": 77547.6313, "step": 10230 }, { "epoch": 0.020685447868227232, - "grad_norm": 221.1346435546875, + "grad_norm": 21368.619140625, "learning_rate": 2.048e-06, - "loss": 55.9204, + "loss": 336336.375, "step": 10240 }, { "epoch": 0.02070564850091105, - "grad_norm": 358.2643737792969, + "grad_norm": 128212.5546875, "learning_rate": 2.05e-06, - "loss": 30.2119, + "loss": 132494.4375, "step": 10250 }, { "epoch": 0.020725849133594865, - "grad_norm": 496.249267578125, + "grad_norm": 246439.109375, "learning_rate": 2.052e-06, - "loss": 41.935, + "loss": 222081.95, "step": 10260 }, { "epoch": 0.02074604976627868, - "grad_norm": 232.7117156982422, + "grad_norm": 19149.560546875, "learning_rate": 2.054e-06, - "loss": 45.4198, + "loss": 182048.4125, "step": 10270 }, { "epoch": 0.020766250398962495, - "grad_norm": 302.9405212402344, + "grad_norm": 133771.453125, "learning_rate": 2.0560000000000003e-06, - "loss": 30.6429, + "loss": 84358.5125, "step": 10280 }, { "epoch": 0.02078645103164631, - "grad_norm": 375.2894592285156, + "grad_norm": 108240.0625, "learning_rate": 2.0580000000000005e-06, - "loss": 28.4448, + "loss": 88887.4312, "step": 10290 }, { "epoch": 0.020806651664330128, - "grad_norm": 440.4918212890625, + "grad_norm": 11865.091796875, "learning_rate": 2.06e-06, - "loss": 40.1408, + "loss": 113050.05, "step": 10300 }, { "epoch": 0.02082685229701394, - "grad_norm": 173.5092315673828, + "grad_norm": 34138.80859375, "learning_rate": 2.062e-06, - "loss": 49.3462, + "loss": 246108.8, "step": 10310 }, { "epoch": 0.020847052929697758, - "grad_norm": 158.9846954345703, + "grad_norm": 8049.29248046875, "learning_rate": 2.064e-06, - "loss": 35.6647, + "loss": 104872.3875, "step": 10320 }, { "epoch": 0.020867253562381574, - "grad_norm": 96.53382110595703, + "grad_norm": 18735.966796875, "learning_rate": 2.066e-06, - "loss": 38.4712, + "loss": 119218.9875, "step": 10330 }, { "epoch": 0.02088745419506539, - "grad_norm": 387.89825439453125, + "grad_norm": 50239.6875, "learning_rate": 2.0680000000000004e-06, - "loss": 61.0449, + "loss": 175393.375, "step": 10340 }, { "epoch": 0.020907654827749204, - "grad_norm": 350.28350830078125, + "grad_norm": 218727.484375, "learning_rate": 2.07e-06, - "loss": 55.0126, + "loss": 218702.7, "step": 10350 }, { "epoch": 0.02092785546043302, - "grad_norm": 448.67596435546875, + "grad_norm": 175430.21875, "learning_rate": 2.0720000000000002e-06, - "loss": 35.3812, + "loss": 116825.525, "step": 10360 }, { "epoch": 0.020948056093116837, - "grad_norm": 268.1210021972656, + "grad_norm": 31505.91796875, "learning_rate": 2.0740000000000004e-06, - "loss": 63.0736, + "loss": 420594.7, "step": 10370 }, { "epoch": 0.020968256725800653, - "grad_norm": 345.1285400390625, + "grad_norm": 172105.78125, "learning_rate": 2.076e-06, - "loss": 38.5725, + "loss": 225520.975, "step": 10380 }, { "epoch": 0.020988457358484466, - "grad_norm": 358.0052185058594, + "grad_norm": 198758.921875, "learning_rate": 2.0780000000000003e-06, - "loss": 58.1518, + "loss": 324260.125, "step": 10390 }, { "epoch": 0.021008657991168283, - "grad_norm": 282.6187744140625, + "grad_norm": 24845.619140625, "learning_rate": 2.08e-06, - "loss": 41.3022, + "loss": 110448.4625, "step": 10400 }, { "epoch": 0.0210288586238521, - "grad_norm": 1270.18896484375, + "grad_norm": 1252559.25, "learning_rate": 2.082e-06, - "loss": 44.5932, + "loss": 277885.2, "step": 10410 }, { "epoch": 0.021049059256535916, - "grad_norm": 359.8027038574219, + "grad_norm": 206544.1875, "learning_rate": 2.0840000000000003e-06, - "loss": 48.8673, + "loss": 187192.475, "step": 10420 }, { "epoch": 0.02106925988921973, "grad_norm": 0.0, "learning_rate": 2.0860000000000004e-06, - "loss": 13.4491, + "loss": 41883.3281, "step": 10430 }, { "epoch": 0.021089460521903546, - "grad_norm": 228.76043701171875, + "grad_norm": 13673.1083984375, "learning_rate": 2.088e-06, - "loss": 63.8478, + "loss": 308043.725, "step": 10440 }, { "epoch": 0.021109661154587362, - "grad_norm": 292.0460510253906, + "grad_norm": 35814.21484375, "learning_rate": 2.09e-06, - "loss": 34.3013, + "loss": 144294.15, "step": 10450 }, { "epoch": 0.02112986178727118, - "grad_norm": 149.258056640625, + "grad_norm": 21655.98046875, "learning_rate": 2.092e-06, - "loss": 68.5596, + "loss": 281482.125, "step": 10460 }, { "epoch": 0.02115006241995499, - "grad_norm": 932.386962890625, + "grad_norm": 866236.8125, "learning_rate": 2.094e-06, - "loss": 74.2682, + "loss": 455088.95, "step": 10470 }, { "epoch": 0.021170263052638808, - "grad_norm": 432.47869873046875, + "grad_norm": 103840.0625, "learning_rate": 2.0960000000000003e-06, - "loss": 57.0165, + "loss": 343161.0, "step": 10480 }, { "epoch": 0.021190463685322625, - "grad_norm": 314.38153076171875, + "grad_norm": 20129.98828125, "learning_rate": 2.098e-06, - "loss": 33.2006, + "loss": 110661.5, "step": 10490 }, { "epoch": 0.02121066431800644, - "grad_norm": 437.1272277832031, + "grad_norm": 297649.4375, "learning_rate": 2.1000000000000002e-06, - "loss": 74.5867, + "loss": 334905.65, "step": 10500 }, { "epoch": 0.021230864950690254, - "grad_norm": 413.80389404296875, + "grad_norm": 181036.484375, "learning_rate": 2.102e-06, - "loss": 34.2584, + "loss": 142711.7, "step": 10510 }, { "epoch": 0.02125106558337407, - "grad_norm": 835.7724609375, + "grad_norm": 670847.625, "learning_rate": 2.104e-06, - "loss": 56.4592, + "loss": 233665.75, "step": 10520 }, { "epoch": 0.021271266216057887, - "grad_norm": 220.36672973632812, + "grad_norm": 148834.40625, "learning_rate": 2.1060000000000002e-06, - "loss": 23.0872, + "loss": 51089.4875, "step": 10530 }, { "epoch": 0.021291466848741704, - "grad_norm": 350.02117919921875, + "grad_norm": 76084.984375, "learning_rate": 2.108e-06, - "loss": 66.5229, + "loss": 182276.25, "step": 10540 }, { "epoch": 0.021311667481425517, - "grad_norm": 535.962158203125, + "grad_norm": 493170.875, "learning_rate": 2.11e-06, - "loss": 47.7854, + "loss": 288297.25, "step": 10550 }, { "epoch": 0.021331868114109333, - "grad_norm": 148.34683227539062, + "grad_norm": 33045.69921875, "learning_rate": 2.1120000000000003e-06, - "loss": 62.6502, + "loss": 296550.85, "step": 10560 }, { "epoch": 0.02135206874679315, - "grad_norm": 401.2169494628906, + "grad_norm": 39155.62890625, "learning_rate": 2.1140000000000004e-06, - "loss": 32.9045, + "loss": 143287.85, "step": 10570 }, { "epoch": 0.021372269379476967, - "grad_norm": 556.7566528320312, + "grad_norm": 515418.71875, "learning_rate": 2.116e-06, - "loss": 72.2003, + "loss": 365617.4, "step": 10580 }, { "epoch": 0.02139247001216078, - "grad_norm": 655.905029296875, + "grad_norm": 235497.40625, "learning_rate": 2.118e-06, - "loss": 45.2873, + "loss": 225068.05, "step": 10590 }, { "epoch": 0.021412670644844596, - "grad_norm": 165.234130859375, + "grad_norm": 64708.01171875, "learning_rate": 2.12e-06, - "loss": 52.1777, + "loss": 262285.0, "step": 10600 }, { "epoch": 0.021432871277528413, - "grad_norm": 218.18524169921875, + "grad_norm": 19995.99609375, "learning_rate": 2.122e-06, - "loss": 67.2895, + "loss": 339691.95, "step": 10610 }, { "epoch": 0.02145307191021223, - "grad_norm": 1015.1307983398438, + "grad_norm": 448805.875, "learning_rate": 2.1240000000000003e-06, - "loss": 48.3785, + "loss": 197892.625, "step": 10620 }, { "epoch": 0.021473272542896042, - "grad_norm": 163.9772186279297, + "grad_norm": 16386.470703125, "learning_rate": 2.1260000000000005e-06, - "loss": 33.712, + "loss": 133360.5, "step": 10630 }, { "epoch": 0.02149347317557986, - "grad_norm": 384.0441589355469, + "grad_norm": 291070.21875, "learning_rate": 2.128e-06, - "loss": 50.0235, + "loss": 344133.1, "step": 10640 }, { "epoch": 0.021513673808263675, - "grad_norm": 316.5508728027344, + "grad_norm": 19442.341796875, "learning_rate": 2.13e-06, - "loss": 44.1556, + "loss": 212195.15, "step": 10650 }, { "epoch": 0.021533874440947492, - "grad_norm": 281.0931396484375, + "grad_norm": 102768.0859375, "learning_rate": 2.132e-06, - "loss": 61.2343, + "loss": 280881.25, "step": 10660 }, { "epoch": 0.021554075073631305, - "grad_norm": 201.84912109375, + "grad_norm": 109993.0390625, "learning_rate": 2.1340000000000002e-06, - "loss": 40.26, + "loss": 128113.3875, "step": 10670 }, { "epoch": 0.02157427570631512, - "grad_norm": 197.72352600097656, + "grad_norm": 36109.28515625, "learning_rate": 2.1360000000000004e-06, - "loss": 42.9183, + "loss": 334705.375, "step": 10680 }, { "epoch": 0.021594476338998938, - "grad_norm": 256.32012939453125, + "grad_norm": 89997.0625, "learning_rate": 2.138e-06, - "loss": 48.9452, + "loss": 170194.3, "step": 10690 }, { "epoch": 0.021614676971682754, - "grad_norm": 298.1128845214844, + "grad_norm": 274441.5, "learning_rate": 2.1400000000000003e-06, - "loss": 43.0314, + "loss": 237795.05, "step": 10700 }, { "epoch": 0.021634877604366567, - "grad_norm": 612.3408813476562, + "grad_norm": 582226.6875, "learning_rate": 2.142e-06, - "loss": 69.2272, + "loss": 437209.1, "step": 10710 }, { "epoch": 0.021655078237050384, - "grad_norm": 111.99131774902344, + "grad_norm": 63613.77734375, "learning_rate": 2.144e-06, - "loss": 60.5289, + "loss": 244160.6, "step": 10720 }, { "epoch": 0.0216752788697342, - "grad_norm": 750.7109985351562, + "grad_norm": 468912.875, "learning_rate": 2.1460000000000003e-06, - "loss": 48.5465, + "loss": 264586.7, "step": 10730 }, { "epoch": 0.021695479502418017, - "grad_norm": 216.7166748046875, + "grad_norm": 30696.8828125, "learning_rate": 2.148e-06, - "loss": 54.5388, + "loss": 249193.675, "step": 10740 }, { "epoch": 0.02171568013510183, - "grad_norm": 344.44561767578125, + "grad_norm": 40559.4296875, "learning_rate": 2.15e-06, - "loss": 50.0201, + "loss": 139253.6375, "step": 10750 }, { "epoch": 0.021735880767785647, - "grad_norm": 162.8369598388672, + "grad_norm": 5625.7021484375, "learning_rate": 2.1520000000000003e-06, - "loss": 54.5186, + "loss": 143224.35, "step": 10760 }, { "epoch": 0.021756081400469463, - "grad_norm": 215.80905151367188, + "grad_norm": 43339.375, "learning_rate": 2.1540000000000005e-06, - "loss": 46.4014, + "loss": 177658.575, "step": 10770 }, { "epoch": 0.02177628203315328, - "grad_norm": 521.4719848632812, + "grad_norm": 207810.09375, "learning_rate": 2.156e-06, - "loss": 61.8271, + "loss": 205456.525, "step": 10780 }, { "epoch": 0.021796482665837093, - "grad_norm": 225.6836700439453, + "grad_norm": 93350.328125, "learning_rate": 2.158e-06, - "loss": 74.675, + "loss": 436111.45, "step": 10790 }, { "epoch": 0.02181668329852091, - "grad_norm": 883.0526123046875, + "grad_norm": 893509.0, "learning_rate": 2.16e-06, - "loss": 57.7596, + "loss": 335985.575, "step": 10800 }, { "epoch": 0.021836883931204726, - "grad_norm": 123.62247467041016, + "grad_norm": 5209.62841796875, "learning_rate": 2.1620000000000002e-06, - "loss": 32.6825, + "loss": 112050.1625, "step": 10810 }, { "epoch": 0.021857084563888542, - "grad_norm": 310.66766357421875, + "grad_norm": 7849.24267578125, "learning_rate": 2.1640000000000004e-06, - "loss": 49.481, + "loss": 131839.275, "step": 10820 }, { "epoch": 0.021877285196572355, - "grad_norm": 124.57794189453125, + "grad_norm": 17504.818359375, "learning_rate": 2.166e-06, - "loss": 40.2806, + "loss": 199185.2125, "step": 10830 }, { "epoch": 0.021897485829256172, - "grad_norm": 623.1845092773438, + "grad_norm": 464575.25, "learning_rate": 2.1680000000000002e-06, - "loss": 72.5964, + "loss": 361266.35, "step": 10840 }, { "epoch": 0.02191768646193999, - "grad_norm": 310.37078857421875, + "grad_norm": 30842.9296875, "learning_rate": 2.17e-06, - "loss": 26.9713, + "loss": 53671.5188, "step": 10850 }, { "epoch": 0.021937887094623805, "grad_norm": 0.0, "learning_rate": 2.172e-06, - "loss": 49.7367, + "loss": 212229.375, "step": 10860 }, { "epoch": 0.021958087727307618, - "grad_norm": 1510.5750732421875, + "grad_norm": 1209301.0, "learning_rate": 2.1740000000000003e-06, - "loss": 51.1697, + "loss": 279155.35, "step": 10870 }, { "epoch": 0.021978288359991435, - "grad_norm": 145.9217529296875, + "grad_norm": 15978.1728515625, "learning_rate": 2.176e-06, - "loss": 60.085, + "loss": 209689.975, "step": 10880 }, { "epoch": 0.02199848899267525, - "grad_norm": 96.63705444335938, + "grad_norm": 39182.4375, "learning_rate": 2.178e-06, - "loss": 49.9026, + "loss": 148013.65, "step": 10890 }, { "epoch": 0.022018689625359068, - "grad_norm": 1135.82763671875, + "grad_norm": 1493931.75, "learning_rate": 2.1800000000000003e-06, - "loss": 60.5735, + "loss": 503753.1, "step": 10900 }, { "epoch": 0.02203889025804288, - "grad_norm": 387.2878112792969, + "grad_norm": 135613.25, "learning_rate": 2.182e-06, - "loss": 62.8734, + "loss": 287817.475, "step": 10910 }, { "epoch": 0.022059090890726697, - "grad_norm": 375.1517333984375, + "grad_norm": 87817.140625, "learning_rate": 2.184e-06, - "loss": 63.5932, + "loss": 374785.3, "step": 10920 }, { "epoch": 0.022079291523410514, - "grad_norm": 284.25970458984375, + "grad_norm": 69477.1015625, "learning_rate": 2.186e-06, - "loss": 52.8156, + "loss": 231013.75, "step": 10930 }, { "epoch": 0.02209949215609433, - "grad_norm": 618.7918701171875, + "grad_norm": 335719.34375, "learning_rate": 2.188e-06, - "loss": 35.2665, + "loss": 170333.55, "step": 10940 }, { "epoch": 0.022119692788778143, "grad_norm": 0.0, "learning_rate": 2.19e-06, - "loss": 48.9761, + "loss": 195188.5875, "step": 10950 }, { "epoch": 0.02213989342146196, - "grad_norm": 557.6535034179688, + "grad_norm": 85045.2734375, "learning_rate": 2.1920000000000004e-06, - "loss": 36.8412, + "loss": 117404.225, "step": 10960 }, { "epoch": 0.022160094054145776, - "grad_norm": 103.97280883789062, + "grad_norm": 4724.892578125, "learning_rate": 2.194e-06, - "loss": 40.2107, + "loss": 321286.15, "step": 10970 }, { "epoch": 0.022180294686829593, - "grad_norm": 813.8086547851562, + "grad_norm": 844928.75, "learning_rate": 2.1960000000000002e-06, - "loss": 45.7711, + "loss": 234327.35, "step": 10980 }, { "epoch": 0.022200495319513406, - "grad_norm": 743.0590209960938, + "grad_norm": 787134.5, "learning_rate": 2.198e-06, - "loss": 66.9321, + "loss": 438021.1, "step": 10990 }, { "epoch": 0.022220695952197222, - "grad_norm": 760.7230834960938, + "grad_norm": 769362.5, "learning_rate": 2.2e-06, - "loss": 44.689, + "loss": 189649.075, "step": 11000 }, { "epoch": 0.02224089658488104, - "grad_norm": 94.33094787597656, + "grad_norm": 21264.240234375, "learning_rate": 2.2020000000000003e-06, - "loss": 26.6486, + "loss": 61169.7688, "step": 11010 }, { "epoch": 0.022261097217564856, - "grad_norm": 480.4449157714844, + "grad_norm": 46419.70703125, "learning_rate": 2.2040000000000004e-06, - "loss": 51.6362, + "loss": 134348.3625, "step": 11020 }, { "epoch": 0.02228129785024867, - "grad_norm": 244.3746337890625, + "grad_norm": 39749.52734375, "learning_rate": 2.206e-06, - "loss": 46.9043, + "loss": 219679.125, "step": 11030 }, { "epoch": 0.022301498482932485, - "grad_norm": 175.8813018798828, + "grad_norm": 6323.9912109375, "learning_rate": 2.2080000000000003e-06, - "loss": 43.7451, + "loss": 294979.1, "step": 11040 }, { "epoch": 0.0223216991156163, - "grad_norm": 320.31201171875, + "grad_norm": 88953.78125, "learning_rate": 2.21e-06, - "loss": 33.0492, + "loss": 55952.9625, "step": 11050 }, { "epoch": 0.022341899748300118, - "grad_norm": 297.73944091796875, + "grad_norm": 138993.4375, "learning_rate": 2.212e-06, - "loss": 46.3043, + "loss": 171653.4125, "step": 11060 }, { "epoch": 0.02236210038098393, - "grad_norm": 283.83697509765625, + "grad_norm": 20015.61328125, "learning_rate": 2.2140000000000003e-06, - "loss": 47.5, + "loss": 184142.2125, "step": 11070 }, { "epoch": 0.022382301013667748, - "grad_norm": 2311.3134765625, + "grad_norm": 2490858.0, "learning_rate": 2.216e-06, - "loss": 64.3231, + "loss": 476982.6, "step": 11080 }, { "epoch": 0.022402501646351564, - "grad_norm": 422.9956359863281, + "grad_norm": 183483.171875, "learning_rate": 2.218e-06, - "loss": 75.2266, + "loss": 410949.925, "step": 11090 }, { "epoch": 0.02242270227903538, - "grad_norm": 290.9288024902344, + "grad_norm": 87295.6953125, "learning_rate": 2.2200000000000003e-06, - "loss": 32.0426, + "loss": 71145.375, "step": 11100 }, { "epoch": 0.022442902911719194, - "grad_norm": 519.488037109375, + "grad_norm": 88965.734375, "learning_rate": 2.222e-06, - "loss": 27.0542, + "loss": 78855.8125, "step": 11110 }, { "epoch": 0.02246310354440301, - "grad_norm": 255.41062927246094, + "grad_norm": 6047.06494140625, "learning_rate": 2.2240000000000002e-06, - "loss": 56.4743, + "loss": 214548.775, "step": 11120 }, { "epoch": 0.022483304177086827, - "grad_norm": 176.76251220703125, + "grad_norm": 87858.5234375, "learning_rate": 2.226e-06, - "loss": 21.9631, + "loss": 130519.675, "step": 11130 }, { "epoch": 0.022503504809770643, - "grad_norm": 248.34571838378906, + "grad_norm": 22104.552734375, "learning_rate": 2.228e-06, - "loss": 23.0657, + "loss": 83149.2375, "step": 11140 }, { "epoch": 0.022523705442454457, - "grad_norm": 154.7519073486328, + "grad_norm": 16682.693359375, "learning_rate": 2.2300000000000002e-06, - "loss": 37.2831, + "loss": 150480.4, "step": 11150 }, { "epoch": 0.022543906075138273, - "grad_norm": 68.35366821289062, + "grad_norm": 8341.7353515625, "learning_rate": 2.2320000000000004e-06, - "loss": 58.1159, + "loss": 272830.675, "step": 11160 }, { "epoch": 0.02256410670782209, - "grad_norm": 325.9544677734375, + "grad_norm": 215770.828125, "learning_rate": 2.234e-06, - "loss": 62.9366, + "loss": 265484.55, "step": 11170 }, { "epoch": 0.022584307340505906, - "grad_norm": 250.54490661621094, + "grad_norm": 25720.90234375, "learning_rate": 2.2360000000000003e-06, - "loss": 43.2179, + "loss": 104988.075, "step": 11180 }, { "epoch": 0.02260450797318972, - "grad_norm": 538.22412109375, + "grad_norm": 351475.90625, "learning_rate": 2.238e-06, - "loss": 35.7263, + "loss": 124979.6875, "step": 11190 }, { "epoch": 0.022624708605873536, - "grad_norm": 331.21099853515625, + "grad_norm": 65903.28125, "learning_rate": 2.24e-06, - "loss": 42.7651, + "loss": 151977.8625, "step": 11200 }, { "epoch": 0.022644909238557352, - "grad_norm": 368.0136413574219, + "grad_norm": 75081.3671875, "learning_rate": 2.2420000000000003e-06, - "loss": 25.3137, + "loss": 106738.7875, "step": 11210 }, { "epoch": 0.02266510987124117, - "grad_norm": 2070.708740234375, + "grad_norm": 2420445.75, "learning_rate": 2.244e-06, - "loss": 68.0299, + "loss": 485389.25, "step": 11220 }, { "epoch": 0.022685310503924982, - "grad_norm": 180.6357879638672, + "grad_norm": 25445.6953125, "learning_rate": 2.246e-06, - "loss": 43.6809, + "loss": 169792.35, "step": 11230 }, { "epoch": 0.0227055111366088, "grad_norm": 0.0, "learning_rate": 2.2480000000000003e-06, - "loss": 31.6172, + "loss": 59886.125, "step": 11240 }, { "epoch": 0.022725711769292615, - "grad_norm": 306.71124267578125, + "grad_norm": 123477.890625, "learning_rate": 2.25e-06, - "loss": 99.2189, + "loss": 596789.5, "step": 11250 }, { "epoch": 0.02274591240197643, - "grad_norm": 367.4490966796875, + "grad_norm": 86213.984375, "learning_rate": 2.252e-06, - "loss": 22.6287, + "loss": 116559.925, "step": 11260 }, { "epoch": 0.022766113034660244, - "grad_norm": 158.46063232421875, + "grad_norm": 33373.98046875, "learning_rate": 2.254e-06, - "loss": 37.753, + "loss": 149166.5125, "step": 11270 }, { "epoch": 0.02278631366734406, - "grad_norm": 131.4368438720703, + "grad_norm": 14531.958984375, "learning_rate": 2.256e-06, - "loss": 57.2574, + "loss": 528277.95, "step": 11280 }, { "epoch": 0.022806514300027877, - "grad_norm": 308.2001647949219, + "grad_norm": 85663.5625, "learning_rate": 2.2580000000000002e-06, - "loss": 50.8366, + "loss": 389429.575, "step": 11290 }, { "epoch": 0.022826714932711694, - "grad_norm": 974.0122680664062, + "grad_norm": 569617.6875, "learning_rate": 2.2600000000000004e-06, - "loss": 49.5046, + "loss": 187540.5, "step": 11300 }, { "epoch": 0.022846915565395507, - "grad_norm": 119.5244140625, + "grad_norm": 21138.814453125, "learning_rate": 2.262e-06, - "loss": 42.4719, + "loss": 276540.7, "step": 11310 }, { "epoch": 0.022867116198079324, - "grad_norm": 331.5387878417969, + "grad_norm": 132398.03125, "learning_rate": 2.2640000000000003e-06, - "loss": 59.0774, + "loss": 192192.6, "step": 11320 }, { "epoch": 0.02288731683076314, - "grad_norm": 268.9253845214844, + "grad_norm": 267026.875, "learning_rate": 2.266e-06, - "loss": 24.6483, + "loss": 110597.925, "step": 11330 }, { "epoch": 0.022907517463446957, - "grad_norm": 414.32568359375, + "grad_norm": 525164.75, "learning_rate": 2.268e-06, - "loss": 50.0243, + "loss": 305098.4, "step": 11340 }, { "epoch": 0.02292771809613077, - "grad_norm": 109.05892944335938, + "grad_norm": 14395.517578125, "learning_rate": 2.2700000000000003e-06, - "loss": 36.9603, + "loss": 134904.8875, "step": 11350 }, { "epoch": 0.022947918728814586, - "grad_norm": 81.1783218383789, + "grad_norm": 12587.4853515625, "learning_rate": 2.2720000000000004e-06, - "loss": 32.3198, + "loss": 125746.9125, "step": 11360 }, { "epoch": 0.022968119361498403, - "grad_norm": 259.58740234375, + "grad_norm": 10431.8115234375, "learning_rate": 2.274e-06, - "loss": 53.6075, + "loss": 248374.875, "step": 11370 }, { "epoch": 0.02298831999418222, - "grad_norm": 1292.6165771484375, + "grad_norm": 685441.0625, "learning_rate": 2.2760000000000003e-06, - "loss": 63.8573, + "loss": 210242.5, "step": 11380 }, { "epoch": 0.023008520626866032, - "grad_norm": 288.04559326171875, + "grad_norm": 106268.7734375, "learning_rate": 2.278e-06, - "loss": 53.7196, + "loss": 204168.8875, "step": 11390 }, { "epoch": 0.02302872125954985, - "grad_norm": 287.5912780761719, + "grad_norm": 115173.6484375, "learning_rate": 2.28e-06, - "loss": 30.822, + "loss": 177847.175, "step": 11400 }, { "epoch": 0.023048921892233665, - "grad_norm": 1172.4749755859375, + "grad_norm": 458476.96875, "learning_rate": 2.282e-06, - "loss": 49.8892, + "loss": 203967.6, "step": 11410 }, { "epoch": 0.023069122524917482, - "grad_norm": 301.4336242675781, + "grad_norm": 68717.6171875, "learning_rate": 2.284e-06, - "loss": 35.3512, + "loss": 177917.4875, "step": 11420 }, { "epoch": 0.023089323157601295, - "grad_norm": 81.34664154052734, + "grad_norm": 6851.28564453125, "learning_rate": 2.2860000000000002e-06, - "loss": 48.5327, + "loss": 226488.375, "step": 11430 }, { "epoch": 0.02310952379028511, - "grad_norm": 102.46014404296875, + "grad_norm": 26431.19140625, "learning_rate": 2.2880000000000004e-06, - "loss": 31.259, + "loss": 96566.725, "step": 11440 }, { "epoch": 0.023129724422968928, - "grad_norm": 502.70574951171875, + "grad_norm": 153582.34375, "learning_rate": 2.29e-06, - "loss": 48.86, + "loss": 187648.6375, "step": 11450 }, { "epoch": 0.023149925055652745, - "grad_norm": 226.1034698486328, + "grad_norm": 98495.109375, "learning_rate": 2.2920000000000002e-06, - "loss": 19.4871, + "loss": 90046.1375, "step": 11460 }, { "epoch": 0.023170125688336558, - "grad_norm": 446.7929992675781, + "grad_norm": 273788.375, "learning_rate": 2.294e-06, - "loss": 42.4573, + "loss": 191117.825, "step": 11470 }, { "epoch": 0.023190326321020374, - "grad_norm": 73.54280090332031, + "grad_norm": 2182.5947265625, "learning_rate": 2.296e-06, - "loss": 40.967, + "loss": 214038.725, "step": 11480 }, { "epoch": 0.02321052695370419, - "grad_norm": 433.07720947265625, + "grad_norm": 127439.90625, "learning_rate": 2.2980000000000003e-06, - "loss": 51.1418, + "loss": 220722.1, "step": 11490 }, { "epoch": 0.023230727586388007, - "grad_norm": 206.02415466308594, + "grad_norm": 311651.0625, "learning_rate": 2.3000000000000004e-06, - "loss": 50.15, + "loss": 264769.4, "step": 11500 }, { "epoch": 0.02325092821907182, - "grad_norm": 502.6234130859375, + "grad_norm": 360631.59375, "learning_rate": 2.302e-06, - "loss": 68.2567, + "loss": 229427.825, "step": 11510 }, { "epoch": 0.023271128851755637, - "grad_norm": 1012.3300170898438, + "grad_norm": 894920.25, "learning_rate": 2.3040000000000003e-06, - "loss": 69.7107, + "loss": 318937.875, "step": 11520 }, { "epoch": 0.023291329484439453, - "grad_norm": 201.14218139648438, + "grad_norm": 54542.90234375, "learning_rate": 2.306e-06, - "loss": 30.1701, + "loss": 152383.975, "step": 11530 }, { "epoch": 0.02331153011712327, - "grad_norm": 700.2841186523438, + "grad_norm": 143595.0, "learning_rate": 2.308e-06, - "loss": 54.2419, + "loss": 179013.825, "step": 11540 }, { "epoch": 0.023331730749807083, - "grad_norm": 234.2910614013672, + "grad_norm": 49814.3046875, "learning_rate": 2.3100000000000003e-06, - "loss": 29.051, + "loss": 76869.8687, "step": 11550 }, { "epoch": 0.0233519313824909, - "grad_norm": 174.03155517578125, + "grad_norm": 36735.5, "learning_rate": 2.312e-06, - "loss": 38.0417, + "loss": 86621.8188, "step": 11560 }, { "epoch": 0.023372132015174716, - "grad_norm": 316.1815490722656, + "grad_norm": 192586.234375, "learning_rate": 2.314e-06, - "loss": 50.405, + "loss": 351408.35, "step": 11570 }, { "epoch": 0.023392332647858533, - "grad_norm": 117.5355224609375, + "grad_norm": 33397.43359375, "learning_rate": 2.3160000000000004e-06, - "loss": 60.2684, + "loss": 259296.35, "step": 11580 }, { "epoch": 0.023412533280542346, - "grad_norm": 135.48684692382812, + "grad_norm": 27275.26953125, "learning_rate": 2.318e-06, - "loss": 37.1933, + "loss": 337401.65, "step": 11590 }, { "epoch": 0.023432733913226162, - "grad_norm": 149.25283813476562, + "grad_norm": 66797.65625, "learning_rate": 2.3200000000000002e-06, - "loss": 50.3472, + "loss": 179736.4625, "step": 11600 }, { "epoch": 0.02345293454590998, - "grad_norm": 357.5829772949219, + "grad_norm": 5149.1572265625, "learning_rate": 2.322e-06, - "loss": 40.448, + "loss": 121597.05, "step": 11610 }, { "epoch": 0.023473135178593795, - "grad_norm": 516.7957153320312, + "grad_norm": 291694.625, "learning_rate": 2.324e-06, - "loss": 30.6153, + "loss": 199722.75, "step": 11620 }, { "epoch": 0.023493335811277608, - "grad_norm": 361.15252685546875, + "grad_norm": 87956.21875, "learning_rate": 2.3260000000000003e-06, - "loss": 42.5947, + "loss": 204191.85, "step": 11630 }, { "epoch": 0.023513536443961425, - "grad_norm": 608.917236328125, + "grad_norm": 11760.2724609375, "learning_rate": 2.3280000000000004e-06, - "loss": 59.6969, + "loss": 261996.675, "step": 11640 }, { "epoch": 0.02353373707664524, - "grad_norm": 313.3769836425781, + "grad_norm": 73485.765625, "learning_rate": 2.33e-06, - "loss": 30.6715, + "loss": 91844.8, "step": 11650 }, { "epoch": 0.023553937709329058, - "grad_norm": 150.16448974609375, + "grad_norm": 24635.0234375, "learning_rate": 2.3320000000000003e-06, - "loss": 34.5272, + "loss": 143534.5375, "step": 11660 }, { "epoch": 0.02357413834201287, - "grad_norm": 89.28950500488281, + "grad_norm": 6367.73193359375, "learning_rate": 2.334e-06, - "loss": 28.3635, + "loss": 95617.075, "step": 11670 }, { "epoch": 0.023594338974696687, - "grad_norm": 381.5764465332031, + "grad_norm": 141970.5, "learning_rate": 2.336e-06, - "loss": 52.9642, + "loss": 220427.275, "step": 11680 }, { "epoch": 0.023614539607380504, - "grad_norm": 280.15704345703125, + "grad_norm": 175935.09375, "learning_rate": 2.3380000000000003e-06, - "loss": 40.3236, + "loss": 106615.35, "step": 11690 }, { "epoch": 0.02363474024006432, - "grad_norm": 355.76104736328125, + "grad_norm": 273929.96875, "learning_rate": 2.3400000000000005e-06, - "loss": 78.9774, + "loss": 359533.1, "step": 11700 }, { "epoch": 0.023654940872748133, - "grad_norm": 905.1873779296875, + "grad_norm": 1051199.75, "learning_rate": 2.342e-06, - "loss": 59.1007, + "loss": 280341.925, "step": 11710 }, { "epoch": 0.02367514150543195, - "grad_norm": 515.1763305664062, + "grad_norm": 249443.5, "learning_rate": 2.3440000000000003e-06, - "loss": 55.5727, + "loss": 292792.75, "step": 11720 }, { "epoch": 0.023695342138115767, - "grad_norm": 401.4289855957031, + "grad_norm": 200450.4375, "learning_rate": 2.346e-06, - "loss": 38.942, + "loss": 191450.1625, "step": 11730 }, { "epoch": 0.023715542770799583, - "grad_norm": 683.9542846679688, + "grad_norm": 815182.75, "learning_rate": 2.3480000000000002e-06, - "loss": 49.5397, + "loss": 279827.775, "step": 11740 }, { "epoch": 0.023735743403483396, - "grad_norm": 247.4552459716797, + "grad_norm": 61530.3203125, "learning_rate": 2.35e-06, - "loss": 44.7048, + "loss": 225271.175, "step": 11750 }, { "epoch": 0.023755944036167213, - "grad_norm": 208.85464477539062, + "grad_norm": 904.901611328125, "learning_rate": 2.352e-06, - "loss": 35.0484, + "loss": 122831.9125, "step": 11760 }, { "epoch": 0.02377614466885103, - "grad_norm": 102.29475402832031, + "grad_norm": 27425.634765625, "learning_rate": 2.3540000000000002e-06, - "loss": 33.6253, + "loss": 103569.7812, "step": 11770 }, { "epoch": 0.023796345301534846, - "grad_norm": 601.3646850585938, + "grad_norm": 180426.203125, "learning_rate": 2.3560000000000004e-06, - "loss": 53.7536, + "loss": 193780.425, "step": 11780 }, { "epoch": 0.02381654593421866, - "grad_norm": 1281.22412109375, + "grad_norm": 134209.078125, "learning_rate": 2.358e-06, - "loss": 72.3357, + "loss": 332309.175, "step": 11790 }, { "epoch": 0.023836746566902475, - "grad_norm": 877.3131103515625, + "grad_norm": 779654.0, "learning_rate": 2.3600000000000003e-06, - "loss": 67.5423, + "loss": 272454.45, "step": 11800 }, { "epoch": 0.023856947199586292, - "grad_norm": 1316.6104736328125, + "grad_norm": 1488059.5, "learning_rate": 2.362e-06, - "loss": 58.9611, + "loss": 354767.7, "step": 11810 }, { "epoch": 0.02387714783227011, - "grad_norm": 209.12490844726562, + "grad_norm": 34111.63671875, "learning_rate": 2.364e-06, - "loss": 40.0051, + "loss": 211576.5, "step": 11820 }, { "epoch": 0.02389734846495392, - "grad_norm": 171.822265625, + "grad_norm": 40656.6484375, "learning_rate": 2.3660000000000003e-06, - "loss": 25.1099, + "loss": 121734.1375, "step": 11830 }, { "epoch": 0.023917549097637738, - "grad_norm": 432.1847229003906, + "grad_norm": 259538.421875, "learning_rate": 2.3680000000000005e-06, - "loss": 24.7392, + "loss": 123128.125, "step": 11840 }, { "epoch": 0.023937749730321554, - "grad_norm": 236.37118530273438, + "grad_norm": 9259.552734375, "learning_rate": 2.37e-06, - "loss": 37.9927, + "loss": 100060.4875, "step": 11850 }, { "epoch": 0.02395795036300537, - "grad_norm": 279.443115234375, + "grad_norm": 50963.30078125, "learning_rate": 2.3720000000000003e-06, - "loss": 39.5327, + "loss": 172604.075, "step": 11860 }, { "epoch": 0.023978150995689184, - "grad_norm": 194.41497802734375, + "grad_norm": 4804.1044921875, "learning_rate": 2.374e-06, - "loss": 41.6783, + "loss": 151507.225, "step": 11870 }, { "epoch": 0.023998351628373, - "grad_norm": 143.53030395507812, + "grad_norm": 3759.806884765625, "learning_rate": 2.376e-06, - "loss": 27.8356, + "loss": 129653.525, "step": 11880 }, { "epoch": 0.024018552261056817, - "grad_norm": 330.04986572265625, + "grad_norm": 378769.25, "learning_rate": 2.3780000000000004e-06, - "loss": 57.2515, + "loss": 327088.325, "step": 11890 }, { "epoch": 0.024038752893740634, - "grad_norm": 278.6073913574219, + "grad_norm": 98594.15625, "learning_rate": 2.38e-06, - "loss": 32.8321, + "loss": 181859.3125, "step": 11900 }, { "epoch": 0.024058953526424447, - "grad_norm": 942.486328125, + "grad_norm": 748660.875, "learning_rate": 2.3820000000000002e-06, - "loss": 47.1595, + "loss": 249386.975, "step": 11910 }, { "epoch": 0.024079154159108263, - "grad_norm": 322.2868957519531, + "grad_norm": 139820.703125, "learning_rate": 2.3840000000000004e-06, - "loss": 32.1692, + "loss": 75960.6, "step": 11920 }, { "epoch": 0.02409935479179208, - "grad_norm": 194.4140625, + "grad_norm": 125340.671875, "learning_rate": 2.386e-06, - "loss": 62.3565, + "loss": 260291.25, "step": 11930 }, { "epoch": 0.024119555424475896, - "grad_norm": 90.10770416259766, + "grad_norm": 4665.6103515625, "learning_rate": 2.3880000000000003e-06, - "loss": 32.4736, + "loss": 169067.6125, "step": 11940 }, { "epoch": 0.02413975605715971, - "grad_norm": 335.71942138671875, + "grad_norm": 147073.484375, "learning_rate": 2.39e-06, - "loss": 57.2216, + "loss": 343347.425, "step": 11950 }, { "epoch": 0.024159956689843526, - "grad_norm": 205.7191619873047, + "grad_norm": 19220.353515625, "learning_rate": 2.392e-06, - "loss": 36.9119, + "loss": 191275.8625, "step": 11960 }, { "epoch": 0.024180157322527342, - "grad_norm": 185.11715698242188, + "grad_norm": 7799.826171875, "learning_rate": 2.3940000000000003e-06, - "loss": 21.545, + "loss": 83292.3438, "step": 11970 }, { "epoch": 0.02420035795521116, - "grad_norm": 383.2213439941406, + "grad_norm": 210596.65625, "learning_rate": 2.3960000000000004e-06, - "loss": 24.6921, + "loss": 90913.8313, "step": 11980 }, { "epoch": 0.024220558587894972, - "grad_norm": 471.1438293457031, + "grad_norm": 403373.8125, "learning_rate": 2.398e-06, - "loss": 46.5666, + "loss": 307446.075, "step": 11990 }, { "epoch": 0.02424075922057879, - "grad_norm": 251.3881378173828, + "grad_norm": 79116.9765625, "learning_rate": 2.4000000000000003e-06, - "loss": 41.0322, + "loss": 121713.4375, "step": 12000 }, { "epoch": 0.024260959853262605, - "grad_norm": 165.10269165039062, + "grad_norm": 14068.7265625, "learning_rate": 2.402e-06, - "loss": 38.1161, + "loss": 153937.875, "step": 12010 }, { "epoch": 0.02428116048594642, - "grad_norm": 147.99966430664062, + "grad_norm": 15601.5244140625, "learning_rate": 2.404e-06, - "loss": 35.0694, + "loss": 103802.4563, "step": 12020 }, { "epoch": 0.024301361118630235, - "grad_norm": 291.3413391113281, + "grad_norm": 334320.5, "learning_rate": 2.4060000000000003e-06, - "loss": 41.1185, + "loss": 171589.1375, "step": 12030 }, { "epoch": 0.02432156175131405, - "grad_norm": 508.818359375, + "grad_norm": 213970.09375, "learning_rate": 2.408e-06, - "loss": 39.5748, + "loss": 133031.4625, "step": 12040 }, { "epoch": 0.024341762383997868, - "grad_norm": 440.5616455078125, + "grad_norm": 376684.03125, "learning_rate": 2.4100000000000002e-06, - "loss": 57.0476, + "loss": 261667.3, "step": 12050 }, { "epoch": 0.024361963016681684, - "grad_norm": 466.8944396972656, + "grad_norm": 271932.71875, "learning_rate": 2.4120000000000004e-06, - "loss": 37.9086, + "loss": 162669.1, "step": 12060 }, { "epoch": 0.024382163649365497, - "grad_norm": 310.8055419921875, + "grad_norm": 43465.80859375, "learning_rate": 2.414e-06, - "loss": 25.3148, + "loss": 95097.175, "step": 12070 }, { "epoch": 0.024402364282049314, - "grad_norm": 117.59268951416016, + "grad_norm": 1140.7071533203125, "learning_rate": 2.4160000000000002e-06, - "loss": 22.3649, + "loss": 121821.575, "step": 12080 }, { "epoch": 0.02442256491473313, - "grad_norm": 155.5489959716797, + "grad_norm": 17136.4765625, "learning_rate": 2.418e-06, - "loss": 60.5863, + "loss": 286343.3, "step": 12090 }, { "epoch": 0.024442765547416947, - "grad_norm": 205.6082305908203, + "grad_norm": 45697.9609375, "learning_rate": 2.42e-06, - "loss": 31.7363, + "loss": 160785.1125, "step": 12100 }, { "epoch": 0.02446296618010076, - "grad_norm": 162.174560546875, + "grad_norm": 3274.23388671875, "learning_rate": 2.4220000000000003e-06, - "loss": 48.4217, + "loss": 337672.625, "step": 12110 }, { "epoch": 0.024483166812784576, - "grad_norm": 184.87391662597656, + "grad_norm": 14420.8173828125, "learning_rate": 2.4240000000000004e-06, - "loss": 30.011, + "loss": 99232.1562, "step": 12120 }, { "epoch": 0.024503367445468393, - "grad_norm": 53.217567443847656, + "grad_norm": 12052.5966796875, "learning_rate": 2.426e-06, - "loss": 48.7947, + "loss": 125702.6625, "step": 12130 }, { "epoch": 0.02452356807815221, - "grad_norm": 357.15142822265625, + "grad_norm": 454793.375, "learning_rate": 2.428e-06, - "loss": 50.6773, + "loss": 302800.4, "step": 12140 }, { "epoch": 0.024543768710836023, - "grad_norm": 363.4394836425781, + "grad_norm": 12679.9873046875, "learning_rate": 2.43e-06, - "loss": 37.6455, + "loss": 201041.0, "step": 12150 }, { "epoch": 0.02456396934351984, - "grad_norm": 984.3299560546875, + "grad_norm": 753563.5625, "learning_rate": 2.432e-06, - "loss": 36.9793, + "loss": 179384.0, "step": 12160 }, { "epoch": 0.024584169976203656, - "grad_norm": 74.2818374633789, + "grad_norm": 13656.064453125, "learning_rate": 2.4340000000000003e-06, - "loss": 58.5528, + "loss": 547631.0, "step": 12170 }, { "epoch": 0.02460437060888747, - "grad_norm": 286.1300048828125, + "grad_norm": 171678.546875, "learning_rate": 2.4360000000000005e-06, - "loss": 34.9469, + "loss": 115608.5875, "step": 12180 }, { "epoch": 0.024624571241571285, - "grad_norm": 367.5870361328125, + "grad_norm": 119285.859375, "learning_rate": 2.438e-06, - "loss": 51.9979, + "loss": 217795.25, "step": 12190 }, { "epoch": 0.0246447718742551, - "grad_norm": 281.93695068359375, + "grad_norm": 283171.09375, "learning_rate": 2.4400000000000004e-06, - "loss": 37.0231, + "loss": 179440.2875, "step": 12200 }, { "epoch": 0.024664972506938918, - "grad_norm": 172.1575469970703, + "grad_norm": 29404.98046875, "learning_rate": 2.442e-06, - "loss": 57.3167, + "loss": 158527.5625, "step": 12210 }, { "epoch": 0.02468517313962273, - "grad_norm": 232.27503967285156, + "grad_norm": 29963.169921875, "learning_rate": 2.4440000000000002e-06, - "loss": 40.503, + "loss": 144625.2125, "step": 12220 }, { "epoch": 0.024705373772306548, - "grad_norm": 243.8904571533203, + "grad_norm": 100343.5078125, "learning_rate": 2.4460000000000004e-06, - "loss": 94.956, + "loss": 688233.2, "step": 12230 }, { "epoch": 0.024725574404990364, - "grad_norm": 246.08596801757812, + "grad_norm": 45991.91015625, "learning_rate": 2.448e-06, - "loss": 40.0518, + "loss": 104723.2688, "step": 12240 }, { "epoch": 0.02474577503767418, - "grad_norm": 480.3242492675781, + "grad_norm": 492821.5, "learning_rate": 2.4500000000000003e-06, - "loss": 51.3203, + "loss": 222283.5, "step": 12250 }, { "epoch": 0.024765975670357994, - "grad_norm": 403.8663330078125, + "grad_norm": 107598.3046875, "learning_rate": 2.4520000000000004e-06, - "loss": 37.7848, + "loss": 166167.1, "step": 12260 }, { "epoch": 0.02478617630304181, - "grad_norm": 435.7867431640625, + "grad_norm": 64114.5859375, "learning_rate": 2.454e-06, - "loss": 35.7, + "loss": 120290.3125, "step": 12270 }, { "epoch": 0.024806376935725627, - "grad_norm": 219.42881774902344, + "grad_norm": 27179.6171875, "learning_rate": 2.4560000000000003e-06, - "loss": 40.8003, + "loss": 191246.675, "step": 12280 }, { "epoch": 0.024826577568409443, - "grad_norm": 381.6520690917969, + "grad_norm": 423731.59375, "learning_rate": 2.458e-06, - "loss": 57.4115, + "loss": 264406.225, "step": 12290 }, { "epoch": 0.024846778201093257, - "grad_norm": 152.27432250976562, + "grad_norm": 52689.24609375, "learning_rate": 2.46e-06, - "loss": 33.6104, + "loss": 93010.55, "step": 12300 }, { "epoch": 0.024866978833777073, - "grad_norm": 177.92501831054688, + "grad_norm": 10245.447265625, "learning_rate": 2.4620000000000003e-06, - "loss": 37.5311, + "loss": 139247.0375, "step": 12310 }, { "epoch": 0.02488717946646089, - "grad_norm": 256.8657531738281, + "grad_norm": 66352.796875, "learning_rate": 2.4640000000000005e-06, - "loss": 50.5635, + "loss": 194832.3125, "step": 12320 }, { "epoch": 0.024907380099144706, - "grad_norm": 335.3462219238281, + "grad_norm": 144975.296875, "learning_rate": 2.466e-06, - "loss": 50.6268, + "loss": 195349.5, "step": 12330 }, { "epoch": 0.02492758073182852, - "grad_norm": 262.1639709472656, + "grad_norm": 64961.83203125, "learning_rate": 2.468e-06, - "loss": 39.8741, + "loss": 211813.35, "step": 12340 }, { "epoch": 0.024947781364512336, - "grad_norm": 622.4638061523438, + "grad_norm": 555392.3125, "learning_rate": 2.47e-06, - "loss": 58.4955, + "loss": 354107.85, "step": 12350 }, { "epoch": 0.024967981997196152, - "grad_norm": 143.38784790039062, + "grad_norm": 4147.486328125, "learning_rate": 2.4720000000000002e-06, - "loss": 36.4286, + "loss": 126441.3125, "step": 12360 }, { "epoch": 0.02498818262987997, - "grad_norm": 171.88291931152344, + "grad_norm": 50746.5078125, "learning_rate": 2.4740000000000004e-06, - "loss": 52.9979, + "loss": 241331.825, "step": 12370 }, { "epoch": 0.025008383262563782, - "grad_norm": 323.2747497558594, + "grad_norm": 15823.841796875, "learning_rate": 2.476e-06, - "loss": 34.0012, + "loss": 126644.775, "step": 12380 }, { "epoch": 0.0250285838952476, - "grad_norm": 582.0142211914062, + "grad_norm": 606753.5625, "learning_rate": 2.4780000000000002e-06, - "loss": 38.0104, + "loss": 181456.375, "step": 12390 }, { "epoch": 0.025048784527931415, - "grad_norm": 280.22613525390625, + "grad_norm": 88115.5546875, "learning_rate": 2.4800000000000004e-06, - "loss": 68.14, + "loss": 263885.25, "step": 12400 }, { "epoch": 0.02506898516061523, - "grad_norm": 256.53253173828125, + "grad_norm": 35972.06640625, "learning_rate": 2.482e-06, - "loss": 38.5101, + "loss": 180270.9375, "step": 12410 }, { "epoch": 0.025089185793299044, "grad_norm": 0.0, "learning_rate": 2.4840000000000003e-06, - "loss": 37.3352, + "loss": 146052.7, "step": 12420 }, { "epoch": 0.02510938642598286, - "grad_norm": 988.313232421875, + "grad_norm": 1285006.375, "learning_rate": 2.486e-06, - "loss": 59.9187, + "loss": 330159.45, "step": 12430 }, { "epoch": 0.025129587058666678, - "grad_norm": 127.60823059082031, + "grad_norm": 20915.482421875, "learning_rate": 2.488e-06, - "loss": 35.5049, + "loss": 216495.575, "step": 12440 }, { "epoch": 0.025149787691350494, - "grad_norm": 363.1122131347656, + "grad_norm": 219756.25, "learning_rate": 2.4900000000000003e-06, - "loss": 33.9354, + "loss": 142711.375, "step": 12450 }, { "epoch": 0.025169988324034307, - "grad_norm": 98.77279663085938, + "grad_norm": 88686.0703125, "learning_rate": 2.4920000000000005e-06, - "loss": 35.16, + "loss": 192655.8875, "step": 12460 }, { "epoch": 0.025190188956718124, - "grad_norm": 126.35530090332031, + "grad_norm": 16942.857421875, "learning_rate": 2.494e-06, - "loss": 38.8848, + "loss": 252825.05, "step": 12470 }, { "epoch": 0.02521038958940194, - "grad_norm": 247.77597045898438, + "grad_norm": 3451.429931640625, "learning_rate": 2.496e-06, - "loss": 34.5122, + "loss": 148746.8, "step": 12480 }, { "epoch": 0.025230590222085757, - "grad_norm": 1060.6580810546875, + "grad_norm": 132759.234375, "learning_rate": 2.498e-06, - "loss": 61.8685, + "loss": 205976.4375, "step": 12490 }, { "epoch": 0.02525079085476957, - "grad_norm": 54.816871643066406, + "grad_norm": 4270.912109375, "learning_rate": 2.5e-06, - "loss": 52.8173, + "loss": 137452.1125, "step": 12500 }, { "epoch": 0.025270991487453386, - "grad_norm": 120.01831817626953, + "grad_norm": 710.3927001953125, "learning_rate": 2.502e-06, - "loss": 31.829, + "loss": 197826.0125, "step": 12510 }, { "epoch": 0.025291192120137203, - "grad_norm": 242.2848663330078, + "grad_norm": 9483.5673828125, "learning_rate": 2.5040000000000005e-06, - "loss": 67.7217, + "loss": 279608.325, "step": 12520 }, { "epoch": 0.02531139275282102, - "grad_norm": 329.4706115722656, + "grad_norm": 83680.125, "learning_rate": 2.5060000000000002e-06, - "loss": 79.2915, + "loss": 495682.2, "step": 12530 }, { "epoch": 0.025331593385504832, - "grad_norm": 640.5140991210938, + "grad_norm": 98745.1484375, "learning_rate": 2.5080000000000004e-06, - "loss": 61.1286, + "loss": 360862.45, "step": 12540 }, { "epoch": 0.02535179401818865, - "grad_norm": 700.0884399414062, + "grad_norm": 350357.78125, "learning_rate": 2.51e-06, - "loss": 48.8919, + "loss": 245406.175, "step": 12550 }, { "epoch": 0.025371994650872465, - "grad_norm": 746.3092041015625, + "grad_norm": 470874.0625, "learning_rate": 2.512e-06, - "loss": 45.3646, + "loss": 156642.75, "step": 12560 }, { "epoch": 0.025392195283556282, - "grad_norm": 481.09521484375, + "grad_norm": 196388.78125, "learning_rate": 2.5140000000000004e-06, - "loss": 25.3038, + "loss": 89663.8687, "step": 12570 }, { "epoch": 0.025412395916240095, - "grad_norm": 428.625244140625, + "grad_norm": 4324.07470703125, "learning_rate": 2.516e-06, - "loss": 26.8353, + "loss": 80865.2125, "step": 12580 }, { "epoch": 0.02543259654892391, - "grad_norm": 2997.267822265625, + "grad_norm": 0.0, "learning_rate": 2.5180000000000003e-06, - "loss": 81.1764, + "loss": 156998.2875, "step": 12590 }, { "epoch": 0.025452797181607728, - "grad_norm": 481.7459716796875, + "grad_norm": 316922.75, "learning_rate": 2.52e-06, - "loss": 45.9044, + "loss": 235043.75, "step": 12600 }, { "epoch": 0.025472997814291545, - "grad_norm": 192.6971893310547, + "grad_norm": 95972.4296875, "learning_rate": 2.522e-06, - "loss": 34.4935, + "loss": 90616.4, "step": 12610 }, { "epoch": 0.025493198446975358, - "grad_norm": 116.75853729248047, + "grad_norm": 11718.0166015625, "learning_rate": 2.5240000000000003e-06, - "loss": 31.1347, + "loss": 119019.9125, "step": 12620 }, { "epoch": 0.025513399079659174, - "grad_norm": 515.5565795898438, + "grad_norm": 159589.578125, "learning_rate": 2.526e-06, - "loss": 37.4098, + "loss": 145857.85, "step": 12630 }, { "epoch": 0.02553359971234299, - "grad_norm": 781.3652954101562, + "grad_norm": 1553523.375, "learning_rate": 2.5280000000000006e-06, - "loss": 62.6506, + "loss": 392228.175, "step": 12640 }, { "epoch": 0.025553800345026807, - "grad_norm": 145.27789306640625, + "grad_norm": 3714.459716796875, "learning_rate": 2.5300000000000003e-06, - "loss": 57.6778, + "loss": 175377.825, "step": 12650 }, { "epoch": 0.02557400097771062, - "grad_norm": 824.5062866210938, + "grad_norm": 535725.5625, "learning_rate": 2.532e-06, - "loss": 62.9039, + "loss": 350313.65, "step": 12660 }, { "epoch": 0.025594201610394437, - "grad_norm": 199.32632446289062, + "grad_norm": 62224.11328125, "learning_rate": 2.5340000000000002e-06, - "loss": 34.6479, + "loss": 119582.9, "step": 12670 }, { "epoch": 0.025614402243078253, - "grad_norm": 247.83538818359375, + "grad_norm": 65295.234375, "learning_rate": 2.536e-06, - "loss": 66.0479, + "loss": 329832.325, "step": 12680 }, { "epoch": 0.02563460287576207, - "grad_norm": 413.0625, + "grad_norm": 415219.65625, "learning_rate": 2.5380000000000005e-06, - "loss": 61.9256, + "loss": 353404.35, "step": 12690 }, { "epoch": 0.025654803508445883, - "grad_norm": 251.6697235107422, + "grad_norm": 10813.3779296875, "learning_rate": 2.5400000000000002e-06, - "loss": 49.3416, + "loss": 213613.875, "step": 12700 }, { "epoch": 0.0256750041411297, - "grad_norm": 234.376220703125, + "grad_norm": 82043.7890625, "learning_rate": 2.542e-06, - "loss": 33.7977, + "loss": 93695.2, "step": 12710 }, { "epoch": 0.025695204773813516, - "grad_norm": 566.3567504882812, + "grad_norm": 167439.375, "learning_rate": 2.5440000000000005e-06, - "loss": 48.0008, + "loss": 169041.075, "step": 12720 }, { "epoch": 0.025715405406497333, - "grad_norm": 352.84539794921875, + "grad_norm": 22605.080078125, "learning_rate": 2.5460000000000003e-06, - "loss": 33.9545, + "loss": 83927.3188, "step": 12730 }, { "epoch": 0.025735606039181146, - "grad_norm": 237.5641632080078, + "grad_norm": 61118.73828125, "learning_rate": 2.5480000000000004e-06, - "loss": 47.3736, + "loss": 142744.4875, "step": 12740 }, { "epoch": 0.025755806671864962, - "grad_norm": 186.34222412109375, + "grad_norm": 67260.140625, "learning_rate": 2.55e-06, - "loss": 63.3698, + "loss": 423287.7, "step": 12750 }, { "epoch": 0.02577600730454878, - "grad_norm": 363.53424072265625, + "grad_norm": 16536.447265625, "learning_rate": 2.552e-06, - "loss": 52.3423, + "loss": 290804.5, "step": 12760 }, { "epoch": 0.025796207937232595, - "grad_norm": 557.5015869140625, + "grad_norm": 549269.8125, "learning_rate": 2.5540000000000004e-06, - "loss": 79.621, + "loss": 315619.2, "step": 12770 }, { "epoch": 0.025816408569916408, - "grad_norm": 1093.6610107421875, + "grad_norm": 251196.859375, "learning_rate": 2.556e-06, - "loss": 66.2924, + "loss": 447576.25, "step": 12780 }, { "epoch": 0.025836609202600225, - "grad_norm": 196.23626708984375, + "grad_norm": 4308.90478515625, "learning_rate": 2.5580000000000003e-06, - "loss": 31.8605, + "loss": 197188.9875, "step": 12790 }, { "epoch": 0.02585680983528404, - "grad_norm": 116.64302062988281, + "grad_norm": 7606.18359375, "learning_rate": 2.56e-06, - "loss": 18.7056, + "loss": 34475.7, "step": 12800 }, { "epoch": 0.025877010467967858, - "grad_norm": 373.5444030761719, + "grad_norm": 156312.046875, "learning_rate": 2.562e-06, - "loss": 55.1945, + "loss": 278944.85, "step": 12810 }, { "epoch": 0.02589721110065167, - "grad_norm": 111.37442779541016, + "grad_norm": 72017.09375, "learning_rate": 2.5640000000000004e-06, - "loss": 22.0, + "loss": 89403.1562, "step": 12820 }, { "epoch": 0.025917411733335487, - "grad_norm": 304.47882080078125, + "grad_norm": 23867.05078125, "learning_rate": 2.566e-06, - "loss": 35.3111, + "loss": 146503.7625, "step": 12830 }, { "epoch": 0.025937612366019304, - "grad_norm": 916.466796875, + "grad_norm": 732485.8125, "learning_rate": 2.568e-06, - "loss": 40.8868, + "loss": 177056.475, "step": 12840 }, { "epoch": 0.02595781299870312, - "grad_norm": 418.1251220703125, + "grad_norm": 168610.25, "learning_rate": 2.5700000000000004e-06, - "loss": 36.8459, + "loss": 115464.8625, "step": 12850 }, { "epoch": 0.025978013631386934, - "grad_norm": 137.85354614257812, + "grad_norm": 34616.625, "learning_rate": 2.572e-06, - "loss": 49.6207, + "loss": 176824.75, "step": 12860 }, { "epoch": 0.02599821426407075, - "grad_norm": 420.1042175292969, + "grad_norm": 49692.5, "learning_rate": 2.5740000000000003e-06, - "loss": 49.9788, + "loss": 148882.0, "step": 12870 }, { "epoch": 0.026018414896754567, - "grad_norm": 531.953369140625, + "grad_norm": 370747.46875, "learning_rate": 2.576e-06, - "loss": 64.118, + "loss": 258544.275, "step": 12880 }, { "epoch": 0.026038615529438383, - "grad_norm": 1573.395751953125, + "grad_norm": 1599164.75, "learning_rate": 2.578e-06, - "loss": 69.8523, + "loss": 354787.925, "step": 12890 }, { "epoch": 0.026058816162122196, - "grad_norm": 736.00927734375, + "grad_norm": 319394.96875, "learning_rate": 2.5800000000000003e-06, - "loss": 42.5783, + "loss": 145127.375, "step": 12900 }, { "epoch": 0.026079016794806013, - "grad_norm": 432.5742492675781, + "grad_norm": 317170.25, "learning_rate": 2.582e-06, - "loss": 30.5095, + "loss": 112154.6125, "step": 12910 }, { "epoch": 0.02609921742748983, - "grad_norm": 325.77752685546875, + "grad_norm": 31690.107421875, "learning_rate": 2.5840000000000006e-06, - "loss": 33.1769, + "loss": 77772.8375, "step": 12920 }, { "epoch": 0.026119418060173646, - "grad_norm": 127.23358917236328, + "grad_norm": 18707.58984375, "learning_rate": 2.5860000000000003e-06, - "loss": 42.7986, + "loss": 204297.05, "step": 12930 }, { "epoch": 0.02613961869285746, - "grad_norm": 112.5145034790039, + "grad_norm": 12216.0205078125, "learning_rate": 2.588e-06, - "loss": 28.0613, + "loss": 148160.975, "step": 12940 }, { "epoch": 0.026159819325541275, - "grad_norm": 219.8375701904297, + "grad_norm": 65060.0859375, "learning_rate": 2.59e-06, - "loss": 60.5869, + "loss": 310978.325, "step": 12950 }, { "epoch": 0.026180019958225092, - "grad_norm": 290.2958068847656, + "grad_norm": 54450.765625, "learning_rate": 2.592e-06, - "loss": 49.7006, + "loss": 173472.075, "step": 12960 }, { "epoch": 0.02620022059090891, - "grad_norm": 420.064697265625, + "grad_norm": 151907.609375, "learning_rate": 2.5940000000000005e-06, - "loss": 48.0076, + "loss": 252305.275, "step": 12970 }, { "epoch": 0.02622042122359272, - "grad_norm": 394.51751708984375, + "grad_norm": 202176.1875, "learning_rate": 2.5960000000000002e-06, - "loss": 39.4181, + "loss": 215993.95, "step": 12980 }, { "epoch": 0.026240621856276538, - "grad_norm": 216.67689514160156, + "grad_norm": 68138.6875, "learning_rate": 2.598e-06, - "loss": 49.3929, + "loss": 202861.9125, "step": 12990 }, { "epoch": 0.026260822488960354, - "grad_norm": 212.09375, + "grad_norm": 5375.91015625, "learning_rate": 2.6e-06, - "loss": 52.2523, + "loss": 248852.7, "step": 13000 }, { "epoch": 0.02628102312164417, - "grad_norm": 471.0820007324219, + "grad_norm": 49989.7265625, "learning_rate": 2.6020000000000002e-06, - "loss": 49.3459, + "loss": 246261.675, "step": 13010 }, { "epoch": 0.026301223754327984, - "grad_norm": 131.7678985595703, + "grad_norm": 36027.0078125, "learning_rate": 2.6040000000000004e-06, - "loss": 46.5295, + "loss": 242685.525, "step": 13020 }, { "epoch": 0.0263214243870118, - "grad_norm": 555.260009765625, + "grad_norm": 91047.1328125, "learning_rate": 2.606e-06, - "loss": 50.6274, + "loss": 113200.1125, "step": 13030 }, { "epoch": 0.026341625019695617, - "grad_norm": 338.2981872558594, + "grad_norm": 224550.75, "learning_rate": 2.608e-06, - "loss": 35.9854, + "loss": 189645.7, "step": 13040 }, { "epoch": 0.026361825652379434, - "grad_norm": 925.1130981445312, + "grad_norm": 388784.03125, "learning_rate": 2.6100000000000004e-06, - "loss": 57.2883, + "loss": 145915.075, "step": 13050 }, { "epoch": 0.026382026285063247, - "grad_norm": 697.80859375, + "grad_norm": 750773.125, "learning_rate": 2.612e-06, - "loss": 49.0037, + "loss": 213651.8, "step": 13060 }, { "epoch": 0.026402226917747063, - "grad_norm": 241.2378692626953, + "grad_norm": 67873.4375, "learning_rate": 2.6140000000000003e-06, - "loss": 36.8014, + "loss": 229508.925, "step": 13070 }, { "epoch": 0.02642242755043088, - "grad_norm": 388.745849609375, + "grad_norm": 108934.921875, "learning_rate": 2.616e-06, - "loss": 62.7974, + "loss": 280183.15, "step": 13080 }, { "epoch": 0.026442628183114696, - "grad_norm": 1160.81787109375, + "grad_norm": 135194.6875, "learning_rate": 2.618e-06, - "loss": 27.3127, + "loss": 49307.6562, "step": 13090 }, { "epoch": 0.02646282881579851, - "grad_norm": 946.897705078125, + "grad_norm": 728505.1875, "learning_rate": 2.6200000000000003e-06, - "loss": 43.0482, + "loss": 181283.425, "step": 13100 }, { "epoch": 0.026483029448482326, - "grad_norm": 315.8953552246094, + "grad_norm": 24442.486328125, "learning_rate": 2.622e-06, - "loss": 39.7804, + "loss": 165358.8125, "step": 13110 }, { "epoch": 0.026503230081166142, - "grad_norm": 214.36895751953125, + "grad_norm": 84922.5859375, "learning_rate": 2.6240000000000006e-06, - "loss": 36.6853, + "loss": 160699.8875, "step": 13120 }, { "epoch": 0.02652343071384996, - "grad_norm": 186.8365478515625, + "grad_norm": 7053.833984375, "learning_rate": 2.6260000000000004e-06, - "loss": 26.8904, + "loss": 72775.6562, "step": 13130 }, { "epoch": 0.026543631346533772, - "grad_norm": 316.1466064453125, + "grad_norm": 282715.84375, "learning_rate": 2.628e-06, - "loss": 44.0668, + "loss": 191573.1, "step": 13140 }, { "epoch": 0.02656383197921759, - "grad_norm": 470.8910827636719, + "grad_norm": 184384.640625, "learning_rate": 2.6300000000000002e-06, - "loss": 52.1296, + "loss": 169818.925, "step": 13150 }, { "epoch": 0.026584032611901405, - "grad_norm": 299.4209899902344, + "grad_norm": 75991.1875, "learning_rate": 2.632e-06, - "loss": 34.1994, + "loss": 141872.625, "step": 13160 }, { "epoch": 0.02660423324458522, - "grad_norm": 273.2069396972656, + "grad_norm": 404547.8125, "learning_rate": 2.6340000000000005e-06, - "loss": 29.3161, + "loss": 239427.8, "step": 13170 }, { "epoch": 0.026624433877269035, - "grad_norm": 225.34320068359375, + "grad_norm": 158694.703125, "learning_rate": 2.6360000000000003e-06, - "loss": 28.9166, + "loss": 153427.7, "step": 13180 }, { "epoch": 0.02664463450995285, - "grad_norm": 238.6172637939453, + "grad_norm": 51968.3984375, "learning_rate": 2.638e-06, - "loss": 43.5059, + "loss": 147358.425, "step": 13190 }, { "epoch": 0.026664835142636668, - "grad_norm": 449.2897033691406, + "grad_norm": 594575.0625, "learning_rate": 2.64e-06, - "loss": 63.3421, + "loss": 393068.0, "step": 13200 }, { "epoch": 0.026685035775320484, - "grad_norm": 643.7606811523438, + "grad_norm": 346472.34375, "learning_rate": 2.6420000000000003e-06, - "loss": 41.4277, + "loss": 239122.2, "step": 13210 }, { "epoch": 0.026705236408004297, - "grad_norm": 244.51620483398438, + "grad_norm": 18441.890625, "learning_rate": 2.6440000000000004e-06, - "loss": 61.338, + "loss": 309610.475, "step": 13220 }, { "epoch": 0.026725437040688114, - "grad_norm": 519.582763671875, + "grad_norm": 229130.84375, "learning_rate": 2.646e-06, - "loss": 31.4954, + "loss": 319517.875, "step": 13230 }, { "epoch": 0.02674563767337193, - "grad_norm": 203.0201416015625, + "grad_norm": 83272.0625, "learning_rate": 2.648e-06, - "loss": 41.8135, + "loss": 150310.15, "step": 13240 }, { "epoch": 0.026765838306055747, - "grad_norm": 729.3497314453125, + "grad_norm": 643834.5625, "learning_rate": 2.6500000000000005e-06, - "loss": 60.1033, + "loss": 221928.8, "step": 13250 }, { "epoch": 0.02678603893873956, - "grad_norm": 62.9359130859375, + "grad_norm": 1964.906982421875, "learning_rate": 2.652e-06, - "loss": 58.7239, + "loss": 149136.525, "step": 13260 }, { "epoch": 0.026806239571423376, - "grad_norm": 394.38897705078125, + "grad_norm": 47228.32421875, "learning_rate": 2.6540000000000003e-06, - "loss": 33.6383, + "loss": 97385.8, "step": 13270 }, { "epoch": 0.026826440204107193, - "grad_norm": 251.70309448242188, + "grad_norm": 55893.14453125, "learning_rate": 2.656e-06, - "loss": 34.8139, + "loss": 159288.8, "step": 13280 }, { "epoch": 0.02684664083679101, - "grad_norm": 370.7093811035156, + "grad_norm": 55055.984375, "learning_rate": 2.6580000000000002e-06, - "loss": 36.3786, + "loss": 156919.2125, "step": 13290 }, { "epoch": 0.026866841469474823, - "grad_norm": 168.7527313232422, + "grad_norm": 19144.462890625, "learning_rate": 2.6600000000000004e-06, - "loss": 28.804, + "loss": 154129.9625, "step": 13300 }, { "epoch": 0.02688704210215864, - "grad_norm": 129.06845092773438, + "grad_norm": 28927.0625, "learning_rate": 2.662e-06, - "loss": 26.3878, + "loss": 46856.2937, "step": 13310 }, { "epoch": 0.026907242734842456, - "grad_norm": 417.08258056640625, + "grad_norm": 200117.03125, "learning_rate": 2.6640000000000007e-06, - "loss": 59.2538, + "loss": 300504.725, "step": 13320 }, { "epoch": 0.026927443367526272, - "grad_norm": 39.629695892333984, + "grad_norm": 77701.6015625, "learning_rate": 2.6660000000000004e-06, - "loss": 29.9545, + "loss": 73052.3438, "step": 13330 }, { "epoch": 0.026947644000210085, - "grad_norm": 308.0406188964844, + "grad_norm": 39690.42578125, "learning_rate": 2.668e-06, - "loss": 58.3024, + "loss": 334828.075, "step": 13340 }, { "epoch": 0.0269678446328939, - "grad_norm": 479.51123046875, + "grad_norm": 485238.46875, "learning_rate": 2.6700000000000003e-06, - "loss": 33.3494, + "loss": 150326.275, "step": 13350 }, { "epoch": 0.026988045265577718, - "grad_norm": 402.8848876953125, + "grad_norm": 5101.42041015625, "learning_rate": 2.672e-06, - "loss": 33.9898, + "loss": 141135.075, "step": 13360 }, { "epoch": 0.027008245898261535, - "grad_norm": 99.56165313720703, + "grad_norm": 13557.31640625, "learning_rate": 2.6740000000000006e-06, - "loss": 56.0542, + "loss": 332415.325, "step": 13370 }, { "epoch": 0.027028446530945348, - "grad_norm": 157.0609588623047, + "grad_norm": 56187.375, "learning_rate": 2.6760000000000003e-06, - "loss": 32.6149, + "loss": 133593.325, "step": 13380 }, { "epoch": 0.027048647163629164, - "grad_norm": 111.570068359375, + "grad_norm": 3896.21826171875, "learning_rate": 2.678e-06, - "loss": 57.6188, + "loss": 308714.8, "step": 13390 }, { "epoch": 0.02706884779631298, - "grad_norm": 594.5881958007812, + "grad_norm": 259424.59375, "learning_rate": 2.68e-06, - "loss": 61.9989, + "loss": 194434.45, "step": 13400 }, { "epoch": 0.027089048428996797, - "grad_norm": 245.7981719970703, + "grad_norm": 16564.037109375, "learning_rate": 2.6820000000000003e-06, - "loss": 44.8963, + "loss": 144421.725, "step": 13410 }, { "epoch": 0.02710924906168061, - "grad_norm": 402.9765319824219, + "grad_norm": 100085.234375, "learning_rate": 2.6840000000000005e-06, - "loss": 44.9606, + "loss": 266614.825, "step": 13420 }, { "epoch": 0.027129449694364427, - "grad_norm": 460.35906982421875, + "grad_norm": 177809.25, "learning_rate": 2.686e-06, - "loss": 37.3816, + "loss": 97101.5312, "step": 13430 }, { "epoch": 0.027149650327048244, - "grad_norm": 370.83734130859375, + "grad_norm": 46690.40234375, "learning_rate": 2.688e-06, - "loss": 32.1722, + "loss": 117287.2125, "step": 13440 }, { "epoch": 0.02716985095973206, - "grad_norm": 429.22320556640625, + "grad_norm": 41938.43359375, "learning_rate": 2.6900000000000005e-06, - "loss": 49.1246, + "loss": 222886.15, "step": 13450 }, { "epoch": 0.027190051592415873, - "grad_norm": 1014.9495849609375, + "grad_norm": 988650.25, "learning_rate": 2.6920000000000002e-06, - "loss": 72.7646, + "loss": 294837.625, "step": 13460 }, { "epoch": 0.02721025222509969, - "grad_norm": 349.65869140625, + "grad_norm": 74933.9140625, "learning_rate": 2.694e-06, - "loss": 107.6079, + "loss": 323212.4, "step": 13470 }, { "epoch": 0.027230452857783506, - "grad_norm": 258.6490783691406, + "grad_norm": 55363.234375, "learning_rate": 2.696e-06, - "loss": 20.8284, + "loss": 68329.5, "step": 13480 }, { "epoch": 0.027250653490467323, - "grad_norm": 282.52734375, + "grad_norm": 103458.7265625, "learning_rate": 2.6980000000000003e-06, - "loss": 66.564, + "loss": 342298.025, "step": 13490 }, { "epoch": 0.027270854123151136, - "grad_norm": 685.1763305664062, + "grad_norm": 146524.390625, "learning_rate": 2.7000000000000004e-06, - "loss": 68.7896, + "loss": 379509.5, "step": 13500 }, { "epoch": 0.027291054755834952, - "grad_norm": 507.5338134765625, + "grad_norm": 418331.15625, "learning_rate": 2.702e-06, - "loss": 41.0158, + "loss": 143212.25, "step": 13510 }, { "epoch": 0.02731125538851877, - "grad_norm": 325.1944274902344, + "grad_norm": 258388.046875, "learning_rate": 2.704e-06, - "loss": 48.0721, + "loss": 190038.9625, "step": 13520 }, { "epoch": 0.027331456021202585, - "grad_norm": 719.0826416015625, + "grad_norm": 391506.8125, "learning_rate": 2.7060000000000004e-06, - "loss": 32.8811, + "loss": 106681.275, "step": 13530 }, { "epoch": 0.0273516566538864, - "grad_norm": 210.51300048828125, + "grad_norm": 23211.62890625, "learning_rate": 2.708e-06, - "loss": 42.3936, + "loss": 144036.275, "step": 13540 }, { "epoch": 0.027371857286570215, - "grad_norm": 415.20587158203125, + "grad_norm": 130882.3984375, "learning_rate": 2.7100000000000003e-06, - "loss": 51.9527, + "loss": 239263.725, "step": 13550 }, { "epoch": 0.02739205791925403, - "grad_norm": 192.0109405517578, + "grad_norm": 68519.4296875, "learning_rate": 2.712e-06, - "loss": 42.2937, + "loss": 239261.55, "step": 13560 }, { "epoch": 0.027412258551937848, - "grad_norm": 646.5797119140625, + "grad_norm": 315476.71875, "learning_rate": 2.7139999999999998e-06, - "loss": 52.3339, + "loss": 219653.45, "step": 13570 }, { "epoch": 0.02743245918462166, - "grad_norm": 1235.9935302734375, + "grad_norm": 148792.71875, "learning_rate": 2.7160000000000003e-06, - "loss": 37.6989, + "loss": 83102.8875, "step": 13580 }, { "epoch": 0.027452659817305478, - "grad_norm": 368.6409912109375, + "grad_norm": 326682.15625, "learning_rate": 2.718e-06, - "loss": 63.2326, + "loss": 182029.25, "step": 13590 }, { "epoch": 0.027472860449989294, - "grad_norm": 777.1726684570312, + "grad_norm": 512411.625, "learning_rate": 2.7200000000000002e-06, - "loss": 44.6262, + "loss": 191528.4, "step": 13600 }, { "epoch": 0.02749306108267311, - "grad_norm": 335.92901611328125, + "grad_norm": 48669.47265625, "learning_rate": 2.7220000000000004e-06, - "loss": 47.3544, + "loss": 185166.325, "step": 13610 }, { "epoch": 0.027513261715356924, - "grad_norm": 227.70870971679688, + "grad_norm": 123075.2265625, "learning_rate": 2.724e-06, - "loss": 33.9673, + "loss": 130493.6625, "step": 13620 }, { "epoch": 0.02753346234804074, - "grad_norm": 506.89971923828125, + "grad_norm": 5909.2236328125, "learning_rate": 2.7260000000000002e-06, - "loss": 41.7608, + "loss": 154217.825, "step": 13630 }, { "epoch": 0.027553662980724557, - "grad_norm": 178.0671844482422, + "grad_norm": 70212.953125, "learning_rate": 2.728e-06, - "loss": 33.8887, + "loss": 74899.15, "step": 13640 }, { "epoch": 0.027573863613408373, - "grad_norm": 195.4427490234375, + "grad_norm": 42520.6484375, "learning_rate": 2.7300000000000005e-06, - "loss": 34.899, + "loss": 211227.775, "step": 13650 }, { "epoch": 0.027594064246092186, "grad_norm": 0.0, "learning_rate": 2.7320000000000003e-06, - "loss": 39.6042, + "loss": 258958.775, "step": 13660 }, { "epoch": 0.027614264878776003, - "grad_norm": 564.3861694335938, + "grad_norm": 280872.78125, "learning_rate": 2.734e-06, - "loss": 28.0871, + "loss": 166719.35, "step": 13670 }, { "epoch": 0.02763446551145982, - "grad_norm": 165.40550231933594, + "grad_norm": 18884.0625, "learning_rate": 2.736e-06, - "loss": 31.166, + "loss": 87596.75, "step": 13680 }, { "epoch": 0.027654666144143636, - "grad_norm": 253.98341369628906, + "grad_norm": 46441.75390625, "learning_rate": 2.7380000000000003e-06, - "loss": 49.0308, + "loss": 196788.95, "step": 13690 }, { "epoch": 0.02767486677682745, - "grad_norm": 182.1416015625, + "grad_norm": 151080.140625, "learning_rate": 2.7400000000000004e-06, - "loss": 58.7787, + "loss": 256222.85, "step": 13700 }, { "epoch": 0.027695067409511265, - "grad_norm": 223.27035522460938, + "grad_norm": 48123.23046875, "learning_rate": 2.742e-06, - "loss": 21.958, + "loss": 139289.8125, "step": 13710 }, { "epoch": 0.027715268042195082, - "grad_norm": 616.810302734375, + "grad_norm": 573012.6875, "learning_rate": 2.744e-06, - "loss": 43.3995, + "loss": 188277.2875, "step": 13720 }, { "epoch": 0.0277354686748789, - "grad_norm": 484.90972900390625, + "grad_norm": 474536.90625, "learning_rate": 2.7460000000000005e-06, - "loss": 37.2025, + "loss": 138396.275, "step": 13730 }, { "epoch": 0.02775566930756271, - "grad_norm": 605.7279052734375, + "grad_norm": 373820.5, "learning_rate": 2.748e-06, - "loss": 42.081, + "loss": 184620.425, "step": 13740 }, { "epoch": 0.027775869940246528, - "grad_norm": 629.0067749023438, + "grad_norm": 405169.90625, "learning_rate": 2.7500000000000004e-06, - "loss": 58.638, + "loss": 389902.75, "step": 13750 }, { "epoch": 0.027796070572930345, - "grad_norm": 275.5758361816406, + "grad_norm": 200350.671875, "learning_rate": 2.752e-06, - "loss": 37.6694, + "loss": 191346.275, "step": 13760 }, { "epoch": 0.02781627120561416, - "grad_norm": 243.70538330078125, + "grad_norm": 39455.7578125, "learning_rate": 2.754e-06, - "loss": 81.809, + "loss": 416350.625, "step": 13770 }, { "epoch": 0.027836471838297974, - "grad_norm": 561.8217163085938, + "grad_norm": 539520.0, "learning_rate": 2.7560000000000004e-06, - "loss": 34.3817, + "loss": 162306.8875, "step": 13780 }, { "epoch": 0.02785667247098179, - "grad_norm": 222.61399841308594, + "grad_norm": 13432.5703125, "learning_rate": 2.758e-06, - "loss": 28.4402, + "loss": 111777.1625, "step": 13790 }, { "epoch": 0.027876873103665607, - "grad_norm": 495.9284973144531, + "grad_norm": 568653.1875, "learning_rate": 2.7600000000000003e-06, - "loss": 36.2353, + "loss": 157801.8, "step": 13800 }, { "epoch": 0.027897073736349424, - "grad_norm": 553.8525390625, + "grad_norm": 441053.90625, "learning_rate": 2.7620000000000004e-06, - "loss": 52.706, + "loss": 176150.75, "step": 13810 }, { "epoch": 0.027917274369033237, - "grad_norm": 382.0885925292969, + "grad_norm": 242922.65625, "learning_rate": 2.764e-06, - "loss": 28.0362, + "loss": 115603.925, "step": 13820 }, { "epoch": 0.027937475001717053, - "grad_norm": 266.66009521484375, + "grad_norm": 47298.59765625, "learning_rate": 2.7660000000000003e-06, - "loss": 49.2993, + "loss": 211474.75, "step": 13830 }, { "epoch": 0.02795767563440087, - "grad_norm": 138.1078643798828, + "grad_norm": 7067.0458984375, "learning_rate": 2.768e-06, - "loss": 29.4495, + "loss": 91767.6687, "step": 13840 }, { "epoch": 0.027977876267084686, - "grad_norm": 206.65438842773438, + "grad_norm": 58497.015625, "learning_rate": 2.7700000000000006e-06, - "loss": 32.5288, + "loss": 157646.3875, "step": 13850 }, { "epoch": 0.0279980768997685, - "grad_norm": 440.5274963378906, + "grad_norm": 190329.15625, "learning_rate": 2.7720000000000003e-06, - "loss": 33.0266, + "loss": 118876.1375, "step": 13860 }, { "epoch": 0.028018277532452316, - "grad_norm": 428.0820007324219, + "grad_norm": 261989.4375, "learning_rate": 2.774e-06, - "loss": 35.1125, + "loss": 130787.5, "step": 13870 }, { "epoch": 0.028038478165136133, - "grad_norm": 447.42236328125, + "grad_norm": 229527.203125, "learning_rate": 2.776e-06, - "loss": 41.1532, + "loss": 212467.275, "step": 13880 }, { "epoch": 0.02805867879781995, - "grad_norm": 330.5121154785156, + "grad_norm": 56823.625, "learning_rate": 2.7780000000000003e-06, - "loss": 41.4558, + "loss": 146946.675, "step": 13890 }, { "epoch": 0.028078879430503762, - "grad_norm": 280.7071228027344, + "grad_norm": 15605.9990234375, "learning_rate": 2.7800000000000005e-06, - "loss": 36.8122, + "loss": 177866.45, "step": 13900 }, { "epoch": 0.02809908006318758, - "grad_norm": 197.0652313232422, + "grad_norm": 4409.02197265625, "learning_rate": 2.7820000000000002e-06, - "loss": 23.8722, + "loss": 112937.9, "step": 13910 }, { "epoch": 0.028119280695871395, - "grad_norm": 448.60772705078125, + "grad_norm": 145569.828125, "learning_rate": 2.784e-06, - "loss": 36.7757, + "loss": 128146.1, "step": 13920 }, { "epoch": 0.02813948132855521, - "grad_norm": 238.12721252441406, + "grad_norm": 154532.734375, "learning_rate": 2.7860000000000005e-06, - "loss": 21.7785, + "loss": 97974.125, "step": 13930 }, { "epoch": 0.028159681961239025, "grad_norm": 0.0, "learning_rate": 2.7880000000000002e-06, - "loss": 34.4197, + "loss": 249973.125, "step": 13940 }, { "epoch": 0.02817988259392284, - "grad_norm": 431.6346435546875, + "grad_norm": 63832.11328125, "learning_rate": 2.7900000000000004e-06, - "loss": 52.4934, + "loss": 187202.8125, "step": 13950 }, { "epoch": 0.028200083226606658, "grad_norm": 0.0, "learning_rate": 2.792e-06, - "loss": 31.0159, + "loss": 85916.0312, "step": 13960 }, { "epoch": 0.028220283859290474, - "grad_norm": 578.4807739257812, + "grad_norm": 315816.84375, "learning_rate": 2.794e-06, - "loss": 38.3501, + "loss": 147589.1875, "step": 13970 }, { "epoch": 0.028240484491974287, - "grad_norm": 248.0089569091797, + "grad_norm": 82316.390625, "learning_rate": 2.7960000000000004e-06, - "loss": 44.0154, + "loss": 200450.275, "step": 13980 }, { "epoch": 0.028260685124658104, - "grad_norm": 320.9745178222656, + "grad_norm": 163517.359375, "learning_rate": 2.798e-06, - "loss": 47.829, + "loss": 177798.6, "step": 13990 }, { "epoch": 0.02828088575734192, - "grad_norm": 224.57095336914062, + "grad_norm": 112001.078125, "learning_rate": 2.8000000000000003e-06, - "loss": 41.1967, + "loss": 165175.7375, "step": 14000 }, { "epoch": 0.028301086390025737, - "grad_norm": 123.32061767578125, + "grad_norm": 15330.88671875, "learning_rate": 2.8020000000000004e-06, - "loss": 46.885, + "loss": 197765.8625, "step": 14010 }, { "epoch": 0.02832128702270955, - "grad_norm": 307.6172180175781, + "grad_norm": 29542.140625, "learning_rate": 2.804e-06, - "loss": 37.5566, + "loss": 129525.5125, "step": 14020 }, { "epoch": 0.028341487655393367, - "grad_norm": 207.47947692871094, + "grad_norm": 12811.4306640625, "learning_rate": 2.8060000000000003e-06, - "loss": 57.1262, + "loss": 262166.65, "step": 14030 }, { "epoch": 0.028361688288077183, - "grad_norm": 419.0329284667969, + "grad_norm": 284514.125, "learning_rate": 2.808e-06, - "loss": 46.6623, + "loss": 226236.1, "step": 14040 }, { "epoch": 0.028381888920761, - "grad_norm": 177.60508728027344, + "grad_norm": 26744.359375, "learning_rate": 2.8100000000000006e-06, - "loss": 40.1285, + "loss": 204284.5, "step": 14050 }, { "epoch": 0.028402089553444813, - "grad_norm": 194.23019409179688, + "grad_norm": 18203.32421875, "learning_rate": 2.8120000000000004e-06, - "loss": 25.2963, + "loss": 76285.525, "step": 14060 }, { "epoch": 0.02842229018612863, - "grad_norm": 302.81036376953125, + "grad_norm": 27585.607421875, "learning_rate": 2.814e-06, - "loss": 37.4227, + "loss": 159604.9375, "step": 14070 }, { "epoch": 0.028442490818812446, - "grad_norm": 491.2273864746094, + "grad_norm": 430177.03125, "learning_rate": 2.8160000000000002e-06, - "loss": 30.6079, + "loss": 209726.5, "step": 14080 }, { "epoch": 0.028462691451496262, - "grad_norm": 604.860107421875, + "grad_norm": 504127.78125, "learning_rate": 2.8180000000000004e-06, - "loss": 42.9775, + "loss": 160018.675, "step": 14090 }, { "epoch": 0.028482892084180075, - "grad_norm": 283.94232177734375, + "grad_norm": 154604.0625, "learning_rate": 2.82e-06, - "loss": 50.5379, + "loss": 261694.55, "step": 14100 }, { "epoch": 0.028503092716863892, - "grad_norm": 268.5148620605469, + "grad_norm": 39492.37890625, "learning_rate": 2.8220000000000003e-06, - "loss": 41.9522, + "loss": 286116.625, "step": 14110 }, { "epoch": 0.02852329334954771, - "grad_norm": 196.3582763671875, + "grad_norm": 3938.881591796875, "learning_rate": 2.824e-06, - "loss": 52.5252, + "loss": 307835.875, "step": 14120 }, { "epoch": 0.028543493982231525, - "grad_norm": 121.21099090576172, + "grad_norm": 20167.61328125, "learning_rate": 2.8260000000000006e-06, - "loss": 14.3761, + "loss": 62568.35, "step": 14130 }, { "epoch": 0.028563694614915338, - "grad_norm": 354.08563232421875, + "grad_norm": 108122.75, "learning_rate": 2.8280000000000003e-06, - "loss": 46.1404, + "loss": 135593.525, "step": 14140 }, { "epoch": 0.028583895247599155, - "grad_norm": 183.05775451660156, + "grad_norm": 28992.70703125, "learning_rate": 2.83e-06, - "loss": 57.8285, + "loss": 329385.1, "step": 14150 }, { "epoch": 0.02860409588028297, - "grad_norm": 493.5589599609375, + "grad_norm": 434756.53125, "learning_rate": 2.832e-06, - "loss": 26.6448, + "loss": 111054.7625, "step": 14160 }, { "epoch": 0.028624296512966788, - "grad_norm": 64.14993286132812, + "grad_norm": 15267.380859375, "learning_rate": 2.834e-06, - "loss": 41.9451, + "loss": 209976.05, "step": 14170 }, { "epoch": 0.0286444971456506, - "grad_norm": 757.1309204101562, + "grad_norm": 571489.4375, "learning_rate": 2.8360000000000005e-06, - "loss": 47.0823, + "loss": 221856.625, "step": 14180 }, { "epoch": 0.028664697778334417, - "grad_norm": 608.0563354492188, + "grad_norm": 289114.8125, "learning_rate": 2.838e-06, - "loss": 57.3736, + "loss": 397767.475, "step": 14190 }, { "epoch": 0.028684898411018234, - "grad_norm": 236.17376708984375, + "grad_norm": 10996.333984375, "learning_rate": 2.84e-06, - "loss": 59.1326, + "loss": 328260.425, "step": 14200 }, { "epoch": 0.02870509904370205, - "grad_norm": 281.8260498046875, + "grad_norm": 170341.015625, "learning_rate": 2.8420000000000005e-06, - "loss": 50.2808, + "loss": 356347.3, "step": 14210 }, { "epoch": 0.028725299676385863, - "grad_norm": 173.17092895507812, + "grad_norm": 47909.96875, "learning_rate": 2.8440000000000002e-06, - "loss": 45.471, + "loss": 236474.5, "step": 14220 }, { "epoch": 0.02874550030906968, - "grad_norm": 157.9864959716797, + "grad_norm": 56036.6171875, "learning_rate": 2.8460000000000004e-06, - "loss": 53.8844, + "loss": 217826.775, "step": 14230 }, { "epoch": 0.028765700941753496, - "grad_norm": 162.28421020507812, + "grad_norm": 3117.171142578125, "learning_rate": 2.848e-06, - "loss": 43.7406, + "loss": 261306.475, "step": 14240 }, { "epoch": 0.028785901574437313, - "grad_norm": 693.994140625, + "grad_norm": 298129.96875, "learning_rate": 2.85e-06, - "loss": 50.75, + "loss": 268035.6, "step": 14250 }, { "epoch": 0.028806102207121126, - "grad_norm": 369.9490051269531, + "grad_norm": 94149.4453125, "learning_rate": 2.8520000000000004e-06, - "loss": 16.5204, + "loss": 34013.1562, "step": 14260 }, { "epoch": 0.028826302839804942, - "grad_norm": 135.34271240234375, + "grad_norm": 36427.9375, "learning_rate": 2.854e-06, - "loss": 37.1662, + "loss": 120268.4, "step": 14270 }, { "epoch": 0.02884650347248876, - "grad_norm": 149.12693786621094, + "grad_norm": 18915.083984375, "learning_rate": 2.8560000000000003e-06, - "loss": 62.8583, + "loss": 152732.2, "step": 14280 }, { "epoch": 0.028866704105172575, - "grad_norm": 259.700439453125, + "grad_norm": 75662.015625, "learning_rate": 2.8580000000000004e-06, - "loss": 35.1545, + "loss": 367288.0, "step": 14290 }, { "epoch": 0.02888690473785639, - "grad_norm": 567.5841674804688, + "grad_norm": 300413.5625, "learning_rate": 2.86e-06, - "loss": 66.6835, + "loss": 365527.45, "step": 14300 }, { "epoch": 0.028907105370540205, - "grad_norm": 286.7736511230469, + "grad_norm": 125549.7109375, "learning_rate": 2.8620000000000003e-06, - "loss": 52.2008, + "loss": 123016.825, "step": 14310 }, { "epoch": 0.02892730600322402, - "grad_norm": 507.8511962890625, + "grad_norm": 295946.90625, "learning_rate": 2.864e-06, - "loss": 59.1863, + "loss": 297653.225, "step": 14320 }, { "epoch": 0.028947506635907838, - "grad_norm": 450.0293273925781, + "grad_norm": 248871.703125, "learning_rate": 2.8660000000000006e-06, - "loss": 29.9697, + "loss": 116111.7, "step": 14330 }, { "epoch": 0.02896770726859165, - "grad_norm": 414.4989929199219, + "grad_norm": 482062.5, "learning_rate": 2.8680000000000003e-06, - "loss": 46.7118, + "loss": 312678.2, "step": 14340 }, { "epoch": 0.028987907901275468, - "grad_norm": 217.35693359375, + "grad_norm": 18638.46484375, "learning_rate": 2.87e-06, - "loss": 46.7959, + "loss": 234721.075, "step": 14350 }, { "epoch": 0.029008108533959284, - "grad_norm": 4409.21142578125, + "grad_norm": 0.0, "learning_rate": 2.872e-06, - "loss": 70.4103, + "loss": 285269.125, "step": 14360 }, { "epoch": 0.0290283091666431, - "grad_norm": 138.0399932861328, + "grad_norm": 47649.84375, "learning_rate": 2.874e-06, - "loss": 45.9032, + "loss": 156893.45, "step": 14370 }, { "epoch": 0.029048509799326914, - "grad_norm": 349.3572692871094, + "grad_norm": 179392.03125, "learning_rate": 2.8760000000000005e-06, - "loss": 51.5446, + "loss": 369039.375, "step": 14380 }, { "epoch": 0.02906871043201073, - "grad_norm": 427.65301513671875, + "grad_norm": 54099.44140625, "learning_rate": 2.8780000000000002e-06, - "loss": 23.6891, + "loss": 82225.7, "step": 14390 }, { "epoch": 0.029088911064694547, - "grad_norm": 1886.9266357421875, + "grad_norm": 0.0, "learning_rate": 2.88e-06, - "loss": 49.1044, + "loss": 119562.0375, "step": 14400 }, { "epoch": 0.029109111697378363, - "grad_norm": 296.0573425292969, + "grad_norm": 28313.111328125, "learning_rate": 2.8820000000000005e-06, - "loss": 25.1567, + "loss": 93878.0875, "step": 14410 }, { "epoch": 0.029129312330062176, - "grad_norm": 630.2744750976562, + "grad_norm": 396946.78125, "learning_rate": 2.8840000000000003e-06, - "loss": 37.2466, + "loss": 157276.725, "step": 14420 }, { "epoch": 0.029149512962745993, - "grad_norm": 167.78372192382812, + "grad_norm": 10863.1396484375, "learning_rate": 2.8860000000000004e-06, - "loss": 29.5391, + "loss": 119972.8875, "step": 14430 }, { "epoch": 0.02916971359542981, - "grad_norm": 144.763916015625, + "grad_norm": 29016.71875, "learning_rate": 2.888e-06, - "loss": 54.5164, + "loss": 284166.675, "step": 14440 }, { "epoch": 0.029189914228113626, - "grad_norm": 465.1449279785156, + "grad_norm": 334181.09375, "learning_rate": 2.89e-06, - "loss": 41.7386, + "loss": 149158.1, "step": 14450 }, { "epoch": 0.02921011486079744, - "grad_norm": 213.40628051757812, + "grad_norm": 44297.32421875, "learning_rate": 2.8920000000000004e-06, - "loss": 51.0694, + "loss": 368739.425, "step": 14460 }, { "epoch": 0.029230315493481256, - "grad_norm": 164.39657592773438, + "grad_norm": 2383.174560546875, "learning_rate": 2.894e-06, - "loss": 29.1384, + "loss": 116177.85, "step": 14470 }, { "epoch": 0.029250516126165072, - "grad_norm": 358.837646484375, + "grad_norm": 244489.640625, "learning_rate": 2.8960000000000003e-06, - "loss": 40.9785, + "loss": 326912.475, "step": 14480 }, { "epoch": 0.02927071675884889, - "grad_norm": 1034.663330078125, + "grad_norm": 834426.5625, "learning_rate": 2.8980000000000005e-06, - "loss": 58.5203, + "loss": 330364.05, "step": 14490 }, { "epoch": 0.029290917391532702, - "grad_norm": 121.1289291381836, + "grad_norm": 11286.1025390625, "learning_rate": 2.9e-06, - "loss": 42.9185, + "loss": 221363.45, "step": 14500 }, { "epoch": 0.02931111802421652, - "grad_norm": 165.76988220214844, + "grad_norm": 49631.109375, "learning_rate": 2.9020000000000003e-06, - "loss": 34.2855, + "loss": 173083.4125, "step": 14510 }, { "epoch": 0.029331318656900335, - "grad_norm": 544.7271118164062, + "grad_norm": 632601.25, "learning_rate": 2.904e-06, - "loss": 27.5391, + "loss": 155177.1125, "step": 14520 }, { "epoch": 0.02935151928958415, - "grad_norm": 201.6785430908203, + "grad_norm": 100950.5234375, "learning_rate": 2.9060000000000006e-06, - "loss": 29.0436, + "loss": 159663.925, "step": 14530 }, { "epoch": 0.029371719922267964, "grad_norm": 0.0, "learning_rate": 2.9080000000000004e-06, - "loss": 35.3046, + "loss": 159424.9125, "step": 14540 }, { "epoch": 0.02939192055495178, - "grad_norm": 423.13543701171875, + "grad_norm": 219858.828125, "learning_rate": 2.91e-06, - "loss": 39.1192, + "loss": 101631.475, "step": 14550 }, { "epoch": 0.029412121187635597, - "grad_norm": 303.783935546875, + "grad_norm": 11918.8212890625, "learning_rate": 2.9120000000000002e-06, - "loss": 45.4243, + "loss": 255535.45, "step": 14560 }, { "epoch": 0.029432321820319414, "grad_norm": 0.0, "learning_rate": 2.914e-06, - "loss": 27.2965, + "loss": 199540.775, "step": 14570 }, { "epoch": 0.029452522453003227, - "grad_norm": 199.57769775390625, + "grad_norm": 28887.00390625, "learning_rate": 2.9160000000000005e-06, - "loss": 60.3706, + "loss": 58442.7312, "step": 14580 }, { "epoch": 0.029472723085687044, - "grad_norm": 487.9112548828125, + "grad_norm": 88626.5546875, "learning_rate": 2.9180000000000003e-06, - "loss": 29.3064, + "loss": 81718.8875, "step": 14590 }, { "epoch": 0.02949292371837086, - "grad_norm": 652.5009765625, + "grad_norm": 24962.04296875, "learning_rate": 2.92e-06, - "loss": 36.7811, + "loss": 145301.6, "step": 14600 }, { "epoch": 0.029513124351054677, - "grad_norm": 147.73748779296875, + "grad_norm": 41230.578125, "learning_rate": 2.9220000000000006e-06, - "loss": 47.3381, + "loss": 99076.9563, "step": 14610 }, { "epoch": 0.02953332498373849, - "grad_norm": 523.43994140625, + "grad_norm": 204194.46875, "learning_rate": 2.9240000000000003e-06, - "loss": 45.4851, + "loss": 152933.3, "step": 14620 }, { "epoch": 0.029553525616422306, - "grad_norm": 569.4759521484375, + "grad_norm": 293958.09375, "learning_rate": 2.9260000000000004e-06, - "loss": 47.2319, + "loss": 190300.75, "step": 14630 }, { "epoch": 0.029573726249106123, - "grad_norm": 424.2410888671875, + "grad_norm": 291722.78125, "learning_rate": 2.928e-06, - "loss": 33.0208, + "loss": 189291.5375, "step": 14640 }, { "epoch": 0.02959392688178994, - "grad_norm": 616.6305541992188, + "grad_norm": 628343.125, "learning_rate": 2.93e-06, - "loss": 37.5734, + "loss": 169368.85, "step": 14650 }, { "epoch": 0.029614127514473752, - "grad_norm": 46.8990364074707, + "grad_norm": 4085.1396484375, "learning_rate": 2.9320000000000005e-06, - "loss": 57.7725, + "loss": 285031.75, "step": 14660 }, { "epoch": 0.02963432814715757, - "grad_norm": 533.009765625, + "grad_norm": 280526.90625, "learning_rate": 2.934e-06, - "loss": 38.208, + "loss": 97504.45, "step": 14670 }, { "epoch": 0.029654528779841385, - "grad_norm": 576.1964721679688, + "grad_norm": 345619.34375, "learning_rate": 2.9360000000000003e-06, - "loss": 46.621, + "loss": 190223.875, "step": 14680 }, { "epoch": 0.029674729412525202, - "grad_norm": 815.6459350585938, + "grad_norm": 294009.21875, "learning_rate": 2.9380000000000005e-06, - "loss": 39.1253, + "loss": 149748.7875, "step": 14690 }, { "epoch": 0.029694930045209015, - "grad_norm": 390.7784423828125, + "grad_norm": 47693.9375, "learning_rate": 2.9400000000000002e-06, - "loss": 17.9608, + "loss": 36859.3, "step": 14700 }, { "epoch": 0.02971513067789283, "grad_norm": 0.0, "learning_rate": 2.9420000000000004e-06, - "loss": 15.4903, + "loss": 67179.3625, "step": 14710 }, { "epoch": 0.029735331310576648, - "grad_norm": 207.04660034179688, + "grad_norm": 76314.5390625, "learning_rate": 2.944e-06, - "loss": 53.6643, + "loss": 366291.925, "step": 14720 }, { "epoch": 0.029755531943260465, - "grad_norm": 1821.5599365234375, + "grad_norm": 1301867.5, "learning_rate": 2.946e-06, - "loss": 31.776, + "loss": 213070.525, "step": 14730 }, { "epoch": 0.029775732575944278, - "grad_norm": 338.0540771484375, + "grad_norm": 42087.6171875, "learning_rate": 2.9480000000000004e-06, - "loss": 32.3723, + "loss": 116627.15, "step": 14740 }, { "epoch": 0.029795933208628094, - "grad_norm": 513.3118896484375, + "grad_norm": 213249.765625, "learning_rate": 2.95e-06, - "loss": 48.9721, + "loss": 188447.3875, "step": 14750 }, { "epoch": 0.02981613384131191, - "grad_norm": 262.4870300292969, + "grad_norm": 48263.4609375, "learning_rate": 2.9520000000000003e-06, - "loss": 33.7834, + "loss": 121147.125, "step": 14760 }, { "epoch": 0.029836334473995727, - "grad_norm": 559.300048828125, + "grad_norm": 94284.609375, "learning_rate": 2.954e-06, - "loss": 44.6366, + "loss": 107571.8875, "step": 14770 }, { "epoch": 0.02985653510667954, - "grad_norm": 817.7669067382812, + "grad_norm": 619601.75, "learning_rate": 2.956e-06, - "loss": 50.5415, + "loss": 312122.65, "step": 14780 }, { "epoch": 0.029876735739363357, - "grad_norm": 240.45497131347656, + "grad_norm": 13890.99609375, "learning_rate": 2.9580000000000003e-06, - "loss": 55.6818, + "loss": 225089.675, "step": 14790 }, { "epoch": 0.029896936372047173, - "grad_norm": 395.85089111328125, + "grad_norm": 395131.40625, "learning_rate": 2.96e-06, - "loss": 72.9545, + "loss": 379139.45, "step": 14800 }, { "epoch": 0.02991713700473099, - "grad_norm": 559.7106323242188, + "grad_norm": 334524.53125, "learning_rate": 2.9620000000000006e-06, - "loss": 38.409, + "loss": 197060.45, "step": 14810 }, { "epoch": 0.029937337637414803, - "grad_norm": 297.4722900390625, + "grad_norm": 79704.9375, "learning_rate": 2.9640000000000003e-06, - "loss": 41.8604, + "loss": 133238.525, "step": 14820 }, { "epoch": 0.02995753827009862, - "grad_norm": 503.2807312011719, + "grad_norm": 158291.8125, "learning_rate": 2.966e-06, - "loss": 48.5068, + "loss": 149411.1, "step": 14830 }, { "epoch": 0.029977738902782436, - "grad_norm": 398.6009521484375, + "grad_norm": 153806.0625, "learning_rate": 2.9680000000000002e-06, - "loss": 46.2417, + "loss": 225935.65, "step": 14840 }, { "epoch": 0.029997939535466252, - "grad_norm": 203.42825317382812, + "grad_norm": 6139.8017578125, "learning_rate": 2.97e-06, - "loss": 20.6895, + "loss": 64459.5125, "step": 14850 }, { "epoch": 0.030018140168150065, - "grad_norm": 663.2717895507812, + "grad_norm": 224546.5, "learning_rate": 2.9720000000000005e-06, - "loss": 36.1233, + "loss": 124724.2875, "step": 14860 }, { "epoch": 0.030038340800833882, - "grad_norm": 279.4349670410156, + "grad_norm": 46594.82421875, "learning_rate": 2.9740000000000002e-06, - "loss": 25.341, + "loss": 95226.0375, "step": 14870 }, { "epoch": 0.0300585414335177, - "grad_norm": 922.9779052734375, + "grad_norm": 307646.75, "learning_rate": 2.976e-06, - "loss": 42.0328, + "loss": 136582.7, "step": 14880 }, { "epoch": 0.030078742066201515, - "grad_norm": 205.27992248535156, + "grad_norm": 47158.9609375, "learning_rate": 2.9780000000000005e-06, - "loss": 52.5941, + "loss": 430235.1, "step": 14890 }, { "epoch": 0.030098942698885328, - "grad_norm": 301.19012451171875, + "grad_norm": 112286.328125, "learning_rate": 2.9800000000000003e-06, - "loss": 35.1652, + "loss": 112897.6375, "step": 14900 }, { "epoch": 0.030119143331569145, - "grad_norm": 284.6502380371094, + "grad_norm": 47442.18359375, "learning_rate": 2.9820000000000004e-06, - "loss": 47.6817, + "loss": 103870.1875, "step": 14910 }, { "epoch": 0.03013934396425296, - "grad_norm": 374.0673522949219, + "grad_norm": 273993.0625, "learning_rate": 2.984e-06, - "loss": 52.5961, + "loss": 260075.25, "step": 14920 }, { "epoch": 0.030159544596936778, - "grad_norm": 330.4620666503906, + "grad_norm": 140369.65625, "learning_rate": 2.986e-06, - "loss": 27.8789, + "loss": 76347.3687, "step": 14930 }, { "epoch": 0.03017974522962059, - "grad_norm": 617.0927734375, + "grad_norm": 239196.765625, "learning_rate": 2.9880000000000004e-06, - "loss": 50.3701, + "loss": 281558.8, "step": 14940 }, { "epoch": 0.030199945862304407, - "grad_norm": 89.50720977783203, + "grad_norm": 22460.666015625, "learning_rate": 2.99e-06, - "loss": 62.0245, + "loss": 189699.625, "step": 14950 }, { "epoch": 0.030220146494988224, - "grad_norm": 191.6051788330078, + "grad_norm": 9826.7998046875, "learning_rate": 2.9920000000000003e-06, - "loss": 31.4841, + "loss": 76610.6812, "step": 14960 }, { "epoch": 0.03024034712767204, - "grad_norm": 1258.4844970703125, + "grad_norm": 0.0, "learning_rate": 2.994e-06, - "loss": 51.323, + "loss": 174526.075, "step": 14970 }, { "epoch": 0.030260547760355853, - "grad_norm": 277.320068359375, + "grad_norm": 157236.484375, "learning_rate": 2.996e-06, - "loss": 51.4817, + "loss": 449372.05, "step": 14980 }, { "epoch": 0.03028074839303967, - "grad_norm": 68.63617706298828, + "grad_norm": 5685.97705078125, "learning_rate": 2.9980000000000003e-06, - "loss": 47.7153, + "loss": 225450.7, "step": 14990 }, { "epoch": 0.030300949025723486, - "grad_norm": 559.943359375, + "grad_norm": 696371.25, "learning_rate": 3e-06, - "loss": 48.4758, + "loss": 190982.3125, "step": 15000 }, { "epoch": 0.030321149658407303, - "grad_norm": 259.10101318359375, + "grad_norm": 150510.859375, "learning_rate": 3.0020000000000006e-06, - "loss": 47.9744, + "loss": 200957.075, "step": 15010 }, { "epoch": 0.030341350291091116, - "grad_norm": 147.1034698486328, + "grad_norm": 13057.84375, "learning_rate": 3.0040000000000004e-06, - "loss": 42.128, + "loss": 230866.75, "step": 15020 }, { "epoch": 0.030361550923774933, - "grad_norm": 605.0328979492188, + "grad_norm": 491994.40625, "learning_rate": 3.006e-06, - "loss": 40.8692, + "loss": 199639.6625, "step": 15030 }, { "epoch": 0.03038175155645875, - "grad_norm": 479.86383056640625, + "grad_norm": 379678.5, "learning_rate": 3.0080000000000003e-06, - "loss": 42.5204, + "loss": 235427.85, "step": 15040 }, { "epoch": 0.030401952189142566, - "grad_norm": 1080.4114990234375, + "grad_norm": 966072.625, "learning_rate": 3.01e-06, - "loss": 42.668, + "loss": 199396.65, "step": 15050 }, { "epoch": 0.03042215282182638, - "grad_norm": 212.96617126464844, + "grad_norm": 20642.115234375, "learning_rate": 3.0120000000000006e-06, - "loss": 38.3446, + "loss": 111723.7375, "step": 15060 }, { "epoch": 0.030442353454510195, - "grad_norm": 238.01953125, + "grad_norm": 21906.8125, "learning_rate": 3.0140000000000003e-06, - "loss": 47.0416, + "loss": 144883.8625, "step": 15070 }, { "epoch": 0.030462554087194012, - "grad_norm": 387.8447265625, + "grad_norm": 78640.8515625, "learning_rate": 3.016e-06, - "loss": 43.489, + "loss": 361553.65, "step": 15080 }, { "epoch": 0.03048275471987783, - "grad_norm": 168.1655731201172, + "grad_norm": 42411.1328125, "learning_rate": 3.0180000000000006e-06, - "loss": 26.384, + "loss": 122691.0125, "step": 15090 }, { "epoch": 0.03050295535256164, - "grad_norm": 1647.8919677734375, + "grad_norm": 1536951.25, "learning_rate": 3.0200000000000003e-06, - "loss": 38.7624, + "loss": 222863.45, "step": 15100 }, { "epoch": 0.030523155985245458, - "grad_norm": 257.6524658203125, + "grad_norm": 144936.578125, "learning_rate": 3.0220000000000005e-06, - "loss": 32.5818, + "loss": 249402.0, "step": 15110 }, { "epoch": 0.030543356617929274, - "grad_norm": 221.60166931152344, + "grad_norm": 67133.6640625, "learning_rate": 3.024e-06, - "loss": 26.2076, + "loss": 119949.4375, "step": 15120 }, { "epoch": 0.03056355725061309, - "grad_norm": 339.908447265625, + "grad_norm": 193259.875, "learning_rate": 3.026e-06, - "loss": 22.8417, + "loss": 65447.8812, "step": 15130 }, { "epoch": 0.030583757883296904, - "grad_norm": 379.72918701171875, + "grad_norm": 131465.75, "learning_rate": 3.0280000000000005e-06, - "loss": 26.9128, + "loss": 89526.125, "step": 15140 }, { "epoch": 0.03060395851598072, - "grad_norm": 402.3966369628906, + "grad_norm": 286535.125, "learning_rate": 3.0300000000000002e-06, - "loss": 58.0108, + "loss": 329051.675, "step": 15150 }, { "epoch": 0.030624159148664537, - "grad_norm": 639.5358276367188, + "grad_norm": 741642.1875, "learning_rate": 3.0320000000000004e-06, - "loss": 57.3743, + "loss": 302163.225, "step": 15160 }, { "epoch": 0.030644359781348354, - "grad_norm": 576.1785888671875, + "grad_norm": 270819.96875, "learning_rate": 3.034e-06, - "loss": 67.3909, + "loss": 224264.225, "step": 15170 }, { "epoch": 0.030664560414032167, - "grad_norm": 220.1876983642578, + "grad_norm": 23599.216796875, "learning_rate": 3.0360000000000002e-06, - "loss": 51.4226, + "loss": 180308.8625, "step": 15180 }, { "epoch": 0.030684761046715983, - "grad_norm": 364.67254638671875, + "grad_norm": 195541.90625, "learning_rate": 3.0380000000000004e-06, - "loss": 46.9077, + "loss": 218088.175, "step": 15190 }, { "epoch": 0.0307049616793998, - "grad_norm": 616.2078247070312, + "grad_norm": 292197.5625, "learning_rate": 3.04e-06, - "loss": 71.0844, + "loss": 359849.275, "step": 15200 }, { "epoch": 0.030725162312083613, - "grad_norm": 1710.62548828125, + "grad_norm": 1074418.5, "learning_rate": 3.0420000000000007e-06, - "loss": 86.3848, + "loss": 497262.3, "step": 15210 }, { "epoch": 0.03074536294476743, - "grad_norm": 251.0055389404297, + "grad_norm": 63500.83203125, "learning_rate": 3.0440000000000004e-06, - "loss": 31.3573, + "loss": 195592.9625, "step": 15220 }, { "epoch": 0.030765563577451246, - "grad_norm": 190.64108276367188, + "grad_norm": 59849.8828125, "learning_rate": 3.046e-06, - "loss": 39.0943, + "loss": 211622.4, "step": 15230 }, { "epoch": 0.030785764210135062, - "grad_norm": 393.06182861328125, + "grad_norm": 186580.203125, "learning_rate": 3.0480000000000003e-06, - "loss": 34.1155, + "loss": 121559.525, "step": 15240 }, { "epoch": 0.030805964842818875, - "grad_norm": 186.20863342285156, + "grad_norm": 20653.802734375, "learning_rate": 3.05e-06, - "loss": 60.104, + "loss": 484752.5, "step": 15250 }, { "epoch": 0.030826165475502692, - "grad_norm": 119.16156005859375, + "grad_norm": 1352.395751953125, "learning_rate": 3.0520000000000006e-06, - "loss": 29.3792, + "loss": 95537.4312, "step": 15260 }, { "epoch": 0.03084636610818651, - "grad_norm": 159.13607788085938, + "grad_norm": 6147.41455078125, "learning_rate": 3.0540000000000003e-06, - "loss": 31.66, + "loss": 125418.875, "step": 15270 }, { "epoch": 0.030866566740870325, - "grad_norm": 158.65013122558594, + "grad_norm": 37095.921875, "learning_rate": 3.056e-06, - "loss": 27.5204, + "loss": 95549.675, "step": 15280 }, { "epoch": 0.030886767373554138, - "grad_norm": 1228.241455078125, + "grad_norm": 971254.0625, "learning_rate": 3.0580000000000006e-06, - "loss": 40.2045, + "loss": 285949.925, "step": 15290 }, { "epoch": 0.030906968006237955, - "grad_norm": 446.7908020019531, + "grad_norm": 148390.765625, "learning_rate": 3.0600000000000003e-06, - "loss": 40.633, + "loss": 193213.7625, "step": 15300 }, { "epoch": 0.03092716863892177, - "grad_norm": 139.8188018798828, + "grad_norm": 52275.71875, "learning_rate": 3.0620000000000005e-06, - "loss": 37.843, + "loss": 76483.3125, "step": 15310 }, { "epoch": 0.030947369271605588, - "grad_norm": 285.90350341796875, + "grad_norm": 266445.59375, "learning_rate": 3.0640000000000002e-06, - "loss": 25.3681, + "loss": 99587.8875, "step": 15320 }, { "epoch": 0.0309675699042894, - "grad_norm": 202.5151824951172, + "grad_norm": 45814.265625, "learning_rate": 3.066e-06, - "loss": 45.6903, + "loss": 241233.1, "step": 15330 }, { "epoch": 0.030987770536973217, - "grad_norm": 403.198974609375, + "grad_norm": 112992.4921875, "learning_rate": 3.0680000000000005e-06, - "loss": 28.2395, + "loss": 166470.675, "step": 15340 }, { "epoch": 0.031007971169657034, - "grad_norm": 402.5394592285156, + "grad_norm": 546427.4375, "learning_rate": 3.0700000000000003e-06, - "loss": 70.7948, + "loss": 308355.25, "step": 15350 }, { "epoch": 0.03102817180234085, - "grad_norm": 492.13983154296875, + "grad_norm": 135486.40625, "learning_rate": 3.072e-06, - "loss": 19.5293, + "loss": 72084.1875, "step": 15360 }, { "epoch": 0.031048372435024663, - "grad_norm": 367.2123107910156, + "grad_norm": 145048.078125, "learning_rate": 3.074e-06, - "loss": 60.0171, + "loss": 230300.075, "step": 15370 }, { "epoch": 0.03106857306770848, - "grad_norm": 573.36083984375, + "grad_norm": 59876.515625, "learning_rate": 3.0760000000000003e-06, - "loss": 35.7587, + "loss": 148150.9375, "step": 15380 }, { "epoch": 0.031088773700392296, - "grad_norm": 432.49298095703125, + "grad_norm": 100640.1640625, "learning_rate": 3.0780000000000004e-06, - "loss": 50.5653, + "loss": 241763.025, "step": 15390 }, { "epoch": 0.031108974333076113, - "grad_norm": 334.3650817871094, + "grad_norm": 95582.78125, "learning_rate": 3.08e-06, - "loss": 29.9017, + "loss": 130287.7875, "step": 15400 }, { "epoch": 0.031129174965759926, - "grad_norm": 131.07797241210938, + "grad_norm": 1897.7166748046875, "learning_rate": 3.082e-06, - "loss": 34.834, + "loss": 131254.7625, "step": 15410 }, { "epoch": 0.031149375598443742, - "grad_norm": 2225.18115234375, + "grad_norm": 141134.078125, "learning_rate": 3.0840000000000005e-06, - "loss": 55.1716, + "loss": 187417.825, "step": 15420 }, { "epoch": 0.03116957623112756, - "grad_norm": 166.94090270996094, + "grad_norm": 22351.767578125, "learning_rate": 3.086e-06, - "loss": 30.1353, + "loss": 147228.2125, "step": 15430 }, { "epoch": 0.031189776863811376, - "grad_norm": 347.8710632324219, + "grad_norm": 70009.0703125, "learning_rate": 3.0880000000000003e-06, - "loss": 37.0008, + "loss": 152209.7375, "step": 15440 }, { "epoch": 0.03120997749649519, - "grad_norm": 350.5280456542969, + "grad_norm": 98408.9609375, "learning_rate": 3.09e-06, - "loss": 45.1672, + "loss": 237030.35, "step": 15450 }, { "epoch": 0.031230178129179005, - "grad_norm": 216.7772674560547, + "grad_norm": 21650.69921875, "learning_rate": 3.092e-06, - "loss": 40.7176, + "loss": 204884.975, "step": 15460 }, { "epoch": 0.03125037876186282, - "grad_norm": 456.4850769042969, + "grad_norm": 128783.15625, "learning_rate": 3.0940000000000004e-06, - "loss": 26.3409, + "loss": 97484.45, "step": 15470 }, { "epoch": 0.03127057939454664, - "grad_norm": 308.0962829589844, + "grad_norm": 30228.623046875, "learning_rate": 3.096e-06, - "loss": 26.7155, + "loss": 104216.7125, "step": 15480 }, { "epoch": 0.03129078002723045, - "grad_norm": 257.20489501953125, + "grad_norm": 67770.0078125, "learning_rate": 3.0980000000000007e-06, - "loss": 43.7936, + "loss": 176309.15, "step": 15490 }, { "epoch": 0.03131098065991427, - "grad_norm": 204.90631103515625, + "grad_norm": 129919.921875, "learning_rate": 3.1000000000000004e-06, - "loss": 65.203, + "loss": 312790.925, "step": 15500 }, { "epoch": 0.031331181292598084, - "grad_norm": 328.27532958984375, + "grad_norm": 36839.9453125, "learning_rate": 3.102e-06, - "loss": 55.6755, + "loss": 362397.925, "step": 15510 }, { "epoch": 0.0313513819252819, - "grad_norm": 717.0737915039062, + "grad_norm": 260980.234375, "learning_rate": 3.1040000000000003e-06, - "loss": 41.0839, + "loss": 184106.55, "step": 15520 }, { "epoch": 0.03137158255796572, - "grad_norm": 202.22677612304688, + "grad_norm": 40780.0859375, "learning_rate": 3.106e-06, - "loss": 38.1692, + "loss": 114512.025, "step": 15530 }, { "epoch": 0.03139178319064953, - "grad_norm": 233.7246856689453, + "grad_norm": 47530.94140625, "learning_rate": 3.1080000000000006e-06, - "loss": 32.9056, + "loss": 164274.8875, "step": 15540 }, { "epoch": 0.03141198382333334, - "grad_norm": 381.95672607421875, + "grad_norm": 238504.703125, "learning_rate": 3.1100000000000003e-06, - "loss": 45.9516, + "loss": 265469.7, "step": 15550 }, { "epoch": 0.03143218445601716, - "grad_norm": 187.48553466796875, + "grad_norm": 44439.9296875, "learning_rate": 3.112e-06, - "loss": 33.0008, + "loss": 138924.375, "step": 15560 }, { "epoch": 0.031452385088700976, - "grad_norm": 107.05130004882812, + "grad_norm": 105758.75, "learning_rate": 3.114e-06, - "loss": 20.2437, + "loss": 144622.225, "step": 15570 }, { "epoch": 0.031472585721384796, - "grad_norm": 278.2366638183594, + "grad_norm": 192396.84375, "learning_rate": 3.1160000000000003e-06, - "loss": 58.5014, + "loss": 171125.4875, "step": 15580 }, { "epoch": 0.03149278635406861, - "grad_norm": 335.544921875, + "grad_norm": 17721.328125, "learning_rate": 3.1180000000000005e-06, - "loss": 41.6651, + "loss": 220817.975, "step": 15590 }, { "epoch": 0.03151298698675242, - "grad_norm": 382.7352294921875, + "grad_norm": 20394.228515625, "learning_rate": 3.12e-06, - "loss": 39.9211, + "loss": 252738.4, "step": 15600 }, { "epoch": 0.03153318761943624, - "grad_norm": 273.2965393066406, + "grad_norm": 92268.828125, "learning_rate": 3.122e-06, - "loss": 24.5785, + "loss": 101360.7125, "step": 15610 }, { "epoch": 0.031553388252120056, - "grad_norm": 296.5772705078125, + "grad_norm": 147409.75, "learning_rate": 3.1240000000000005e-06, - "loss": 24.2458, + "loss": 71510.0125, "step": 15620 }, { "epoch": 0.03157358888480387, - "grad_norm": 1194.7259521484375, + "grad_norm": 1161304.875, "learning_rate": 3.1260000000000002e-06, - "loss": 43.3945, + "loss": 272020.45, "step": 15630 }, { "epoch": 0.03159378951748769, - "grad_norm": 527.0797119140625, + "grad_norm": 369108.34375, "learning_rate": 3.1280000000000004e-06, - "loss": 54.0321, + "loss": 207443.925, "step": 15640 }, { "epoch": 0.0316139901501715, - "grad_norm": 351.5915222167969, + "grad_norm": 275714.875, "learning_rate": 3.13e-06, - "loss": 33.3611, + "loss": 158445.75, "step": 15650 }, { "epoch": 0.03163419078285532, - "grad_norm": 237.34178161621094, + "grad_norm": 17373.583984375, "learning_rate": 3.132e-06, - "loss": 34.3106, + "loss": 147508.175, "step": 15660 }, { "epoch": 0.031654391415539135, - "grad_norm": 426.2868347167969, + "grad_norm": 187948.359375, "learning_rate": 3.1340000000000004e-06, - "loss": 35.8532, + "loss": 187817.325, "step": 15670 }, { "epoch": 0.03167459204822295, - "grad_norm": 716.2186889648438, + "grad_norm": 438933.78125, "learning_rate": 3.136e-06, - "loss": 46.1582, + "loss": 213266.1, "step": 15680 }, { "epoch": 0.03169479268090677, - "grad_norm": 682.427978515625, + "grad_norm": 497474.09375, "learning_rate": 3.1380000000000003e-06, - "loss": 53.3624, + "loss": 215634.65, "step": 15690 }, { "epoch": 0.03171499331359058, "grad_norm": 0.0, "learning_rate": 3.1400000000000004e-06, - "loss": 21.88, + "loss": 149464.1875, "step": 15700 }, { "epoch": 0.031735193946274394, - "grad_norm": 415.7381896972656, + "grad_norm": 127752.7890625, "learning_rate": 3.142e-06, - "loss": 19.6266, + "loss": 253660.575, "step": 15710 }, { "epoch": 0.031755394578958214, - "grad_norm": 445.79693603515625, + "grad_norm": 393963.90625, "learning_rate": 3.1440000000000003e-06, - "loss": 60.3774, + "loss": 410789.85, "step": 15720 }, { "epoch": 0.03177559521164203, - "grad_norm": 455.68914794921875, + "grad_norm": 64724.703125, "learning_rate": 3.146e-06, - "loss": 37.4059, + "loss": 151123.4, "step": 15730 }, { "epoch": 0.03179579584432585, - "grad_norm": 461.43548583984375, + "grad_norm": 265660.8125, "learning_rate": 3.1480000000000006e-06, - "loss": 33.2523, + "loss": 65454.0625, "step": 15740 }, { "epoch": 0.03181599647700966, - "grad_norm": 175.7362060546875, + "grad_norm": 52908.82421875, "learning_rate": 3.1500000000000003e-06, - "loss": 64.4959, + "loss": 435854.55, "step": 15750 }, { "epoch": 0.03183619710969347, - "grad_norm": 972.4329833984375, + "grad_norm": 959134.75, "learning_rate": 3.152e-06, - "loss": 83.9691, + "loss": 476708.05, "step": 15760 }, { "epoch": 0.03185639774237729, - "grad_norm": 227.48056030273438, + "grad_norm": 119333.515625, "learning_rate": 3.154e-06, - "loss": 24.3787, + "loss": 103059.15, "step": 15770 }, { "epoch": 0.031876598375061106, - "grad_norm": 348.4019775390625, + "grad_norm": 236422.359375, "learning_rate": 3.1560000000000004e-06, - "loss": 34.8591, + "loss": 150866.325, "step": 15780 }, { "epoch": 0.03189679900774492, - "grad_norm": 339.1818542480469, + "grad_norm": 106956.7421875, "learning_rate": 3.1580000000000005e-06, - "loss": 31.1008, + "loss": 115835.8625, "step": 15790 }, { "epoch": 0.03191699964042874, - "grad_norm": 173.91188049316406, + "grad_norm": 97389.734375, "learning_rate": 3.1600000000000002e-06, - "loss": 20.8794, + "loss": 128995.7125, "step": 15800 }, { "epoch": 0.03193720027311255, - "grad_norm": 227.97618103027344, + "grad_norm": 30681.72265625, "learning_rate": 3.162e-06, - "loss": 48.3481, + "loss": 274501.125, "step": 15810 }, { "epoch": 0.03195740090579637, - "grad_norm": 176.4508514404297, + "grad_norm": 73750.7109375, "learning_rate": 3.1640000000000005e-06, - "loss": 50.2529, + "loss": 172363.6375, "step": 15820 }, { "epoch": 0.031977601538480185, - "grad_norm": 282.0390930175781, + "grad_norm": 78256.7890625, "learning_rate": 3.1660000000000003e-06, - "loss": 30.7582, + "loss": 109426.375, "step": 15830 }, { "epoch": 0.031997802171164, - "grad_norm": 250.7970428466797, + "grad_norm": 8058.2734375, "learning_rate": 3.1680000000000004e-06, - "loss": 65.5735, + "loss": 303548.5, "step": 15840 }, { "epoch": 0.03201800280384782, - "grad_norm": 463.2630310058594, + "grad_norm": 443669.375, "learning_rate": 3.17e-06, - "loss": 41.1718, + "loss": 274234.2, "step": 15850 }, { "epoch": 0.03203820343653163, - "grad_norm": 230.82351684570312, + "grad_norm": 64187.80859375, "learning_rate": 3.172e-06, - "loss": 44.2215, + "loss": 179930.9, "step": 15860 }, { "epoch": 0.032058404069215445, - "grad_norm": 150.6900177001953, + "grad_norm": 5314.0380859375, "learning_rate": 3.1740000000000004e-06, - "loss": 41.6319, + "loss": 187561.15, "step": 15870 }, { "epoch": 0.032078604701899265, "grad_norm": 0.0, "learning_rate": 3.176e-06, - "loss": 27.2683, + "loss": 134795.325, "step": 15880 }, { "epoch": 0.03209880533458308, - "grad_norm": 1897.1505126953125, + "grad_norm": 1004571.4375, "learning_rate": 3.1780000000000003e-06, - "loss": 56.4001, + "loss": 317008.9, "step": 15890 }, { "epoch": 0.0321190059672669, - "grad_norm": 243.29824829101562, + "grad_norm": 22102.693359375, "learning_rate": 3.1800000000000005e-06, - "loss": 58.1014, + "loss": 380662.575, "step": 15900 }, { "epoch": 0.03213920659995071, - "grad_norm": 410.69708251953125, + "grad_norm": 130084.71875, "learning_rate": 3.182e-06, - "loss": 78.1212, + "loss": 300324.35, "step": 15910 }, { "epoch": 0.032159407232634524, - "grad_norm": 332.1102294921875, + "grad_norm": 186967.3125, "learning_rate": 3.1840000000000003e-06, - "loss": 55.9642, + "loss": 280885.35, "step": 15920 }, { "epoch": 0.032179607865318344, - "grad_norm": 249.78611755371094, + "grad_norm": 59414.8515625, "learning_rate": 3.186e-06, - "loss": 45.9191, + "loss": 145658.6875, "step": 15930 }, { "epoch": 0.03219980849800216, - "grad_norm": 627.4598999023438, + "grad_norm": 538410.25, "learning_rate": 3.188e-06, - "loss": 57.8621, + "loss": 415175.475, "step": 15940 }, { "epoch": 0.03222000913068597, - "grad_norm": 615.73095703125, + "grad_norm": 95745.90625, "learning_rate": 3.1900000000000004e-06, - "loss": 36.453, + "loss": 118163.2125, "step": 15950 }, { "epoch": 0.03224020976336979, - "grad_norm": 258.0998229980469, + "grad_norm": 94059.09375, "learning_rate": 3.192e-06, - "loss": 60.1629, + "loss": 176170.2875, "step": 15960 }, { "epoch": 0.0322604103960536, - "grad_norm": 264.0076599121094, + "grad_norm": 246701.421875, "learning_rate": 3.1940000000000003e-06, - "loss": 39.9636, + "loss": 239862.05, "step": 15970 }, { "epoch": 0.03228061102873742, - "grad_norm": 210.51654052734375, + "grad_norm": 290643.84375, "learning_rate": 3.1960000000000004e-06, - "loss": 37.5337, + "loss": 175221.425, "step": 15980 }, { "epoch": 0.032300811661421236, - "grad_norm": 260.99676513671875, + "grad_norm": 32107.185546875, "learning_rate": 3.198e-06, - "loss": 59.655, + "loss": 376559.0, "step": 15990 }, { "epoch": 0.03232101229410505, - "grad_norm": 175.38279724121094, + "grad_norm": 37459.6953125, "learning_rate": 3.2000000000000003e-06, - "loss": 42.2928, + "loss": 177136.4375, "step": 16000 }, { "epoch": 0.03234121292678887, - "grad_norm": 89.09351348876953, + "grad_norm": 15632.1806640625, "learning_rate": 3.202e-06, - "loss": 38.7274, + "loss": 198683.0625, "step": 16010 }, { "epoch": 0.03236141355947268, - "grad_norm": 310.6824035644531, + "grad_norm": 208434.421875, "learning_rate": 3.2040000000000006e-06, - "loss": 56.912, + "loss": 262985.275, "step": 16020 }, { "epoch": 0.032381614192156495, - "grad_norm": 0.0, + "grad_norm": 2559868.0, "learning_rate": 3.2060000000000003e-06, - "loss": 48.4325, + "loss": 564008.2, "step": 16030 }, { "epoch": 0.032401814824840315, - "grad_norm": 648.9747924804688, + "grad_norm": 712099.9375, "learning_rate": 3.208e-06, - "loss": 74.208, + "loss": 350147.6, "step": 16040 }, { "epoch": 0.03242201545752413, - "grad_norm": 137.7239532470703, + "grad_norm": 353808.0625, "learning_rate": 3.21e-06, - "loss": 56.8868, + "loss": 284649.2, "step": 16050 }, { "epoch": 0.03244221609020795, - "grad_norm": 91.45613098144531, + "grad_norm": 259167.4375, "learning_rate": 3.212e-06, - "loss": 42.6986, + "loss": 286055.55, "step": 16060 }, { "epoch": 0.03246241672289176, - "grad_norm": 833.3985595703125, + "grad_norm": 516096.46875, "learning_rate": 3.2140000000000005e-06, - "loss": 53.7786, + "loss": 261945.85, "step": 16070 }, { "epoch": 0.032482617355575574, - "grad_norm": 86.16954803466797, + "grad_norm": 10423.8759765625, "learning_rate": 3.216e-06, - "loss": 31.6678, + "loss": 159433.5875, "step": 16080 }, { "epoch": 0.032502817988259394, - "grad_norm": 334.82904052734375, + "grad_norm": 48457.6796875, "learning_rate": 3.218e-06, - "loss": 44.6531, + "loss": 263181.125, "step": 16090 }, { "epoch": 0.03252301862094321, - "grad_norm": 778.0987548828125, + "grad_norm": 13101.0224609375, "learning_rate": 3.2200000000000005e-06, - "loss": 25.9405, + "loss": 121936.7875, "step": 16100 }, { "epoch": 0.03254321925362702, - "grad_norm": 216.55364990234375, + "grad_norm": 80016.8984375, "learning_rate": 3.2220000000000002e-06, - "loss": 51.9026, + "loss": 420936.05, "step": 16110 }, { "epoch": 0.03256341988631084, - "grad_norm": 377.78668212890625, + "grad_norm": 276122.5625, "learning_rate": 3.2240000000000004e-06, - "loss": 71.3147, + "loss": 297331.4, "step": 16120 }, { "epoch": 0.03258362051899465, - "grad_norm": 212.40968322753906, + "grad_norm": 36348.08984375, "learning_rate": 3.226e-06, - "loss": 13.3245, + "loss": 44609.7594, "step": 16130 }, { "epoch": 0.03260382115167847, - "grad_norm": 768.0552978515625, + "grad_norm": 1487770.75, "learning_rate": 3.228e-06, - "loss": 38.3474, + "loss": 272056.975, "step": 16140 }, { "epoch": 0.032624021784362287, - "grad_norm": 285.4230651855469, + "grad_norm": 63965.08203125, "learning_rate": 3.2300000000000004e-06, - "loss": 49.9638, + "loss": 141300.7875, "step": 16150 }, { "epoch": 0.0326442224170461, - "grad_norm": 239.81356811523438, + "grad_norm": 426072.0, "learning_rate": 3.232e-06, - "loss": 35.6777, + "loss": 194282.575, "step": 16160 }, { "epoch": 0.03266442304972992, "grad_norm": 0.0, "learning_rate": 3.2340000000000003e-06, - "loss": 30.5213, + "loss": 148997.4, "step": 16170 }, { "epoch": 0.03268462368241373, - "grad_norm": 123.41769409179688, + "grad_norm": 22263.03515625, "learning_rate": 3.2360000000000004e-06, - "loss": 24.7122, + "loss": 130542.35, "step": 16180 }, { "epoch": 0.032704824315097546, - "grad_norm": 381.268798828125, + "grad_norm": 53827.296875, "learning_rate": 3.238e-06, - "loss": 20.959, + "loss": 77802.1375, "step": 16190 }, { "epoch": 0.032725024947781366, - "grad_norm": 698.3445434570312, + "grad_norm": 771077.625, "learning_rate": 3.2400000000000003e-06, - "loss": 40.5753, + "loss": 268066.35, "step": 16200 }, { "epoch": 0.03274522558046518, - "grad_norm": 353.4059753417969, + "grad_norm": 117244.0078125, "learning_rate": 3.242e-06, - "loss": 49.6511, + "loss": 250700.775, "step": 16210 }, { "epoch": 0.032765426213149, - "grad_norm": 504.5810546875, + "grad_norm": 475754.15625, "learning_rate": 3.2440000000000006e-06, - "loss": 42.3416, + "loss": 268729.1, "step": 16220 }, { "epoch": 0.03278562684583281, - "grad_norm": 357.67529296875, + "grad_norm": 100340.09375, "learning_rate": 3.2460000000000003e-06, - "loss": 51.2753, + "loss": 370280.875, "step": 16230 }, { "epoch": 0.032805827478516625, - "grad_norm": 194.2034454345703, + "grad_norm": 31991.5703125, "learning_rate": 3.248e-06, - "loss": 32.8198, + "loss": 181648.125, "step": 16240 }, { "epoch": 0.032826028111200445, - "grad_norm": 577.6695556640625, + "grad_norm": 315697.125, "learning_rate": 3.2500000000000002e-06, - "loss": 39.3762, + "loss": 284707.675, "step": 16250 }, { "epoch": 0.03284622874388426, - "grad_norm": 155.1973876953125, + "grad_norm": 38336.07421875, "learning_rate": 3.252e-06, - "loss": 43.677, + "loss": 217316.0, "step": 16260 }, { "epoch": 0.03286642937656807, - "grad_norm": 473.38983154296875, + "grad_norm": 354054.5, "learning_rate": 3.2540000000000005e-06, - "loss": 23.7344, + "loss": 179678.7875, "step": 16270 }, { "epoch": 0.03288663000925189, - "grad_norm": 112.81297302246094, + "grad_norm": 13319.865234375, "learning_rate": 3.2560000000000003e-06, - "loss": 35.8927, + "loss": 209813.15, "step": 16280 }, { "epoch": 0.032906830641935704, - "grad_norm": 408.118408203125, + "grad_norm": 24385.4453125, "learning_rate": 3.258e-06, - "loss": 29.1927, + "loss": 78685.7875, "step": 16290 }, { "epoch": 0.032927031274619524, - "grad_norm": 229.55616760253906, + "grad_norm": 34535.16796875, "learning_rate": 3.2600000000000006e-06, - "loss": 25.7656, + "loss": 156036.625, "step": 16300 }, { "epoch": 0.03294723190730334, - "grad_norm": 387.787109375, + "grad_norm": 4440.43798828125, "learning_rate": 3.2620000000000003e-06, - "loss": 43.1744, + "loss": 343472.425, "step": 16310 }, { "epoch": 0.03296743253998715, - "grad_norm": 583.57421875, + "grad_norm": 417266.28125, "learning_rate": 3.2640000000000004e-06, - "loss": 35.2856, + "loss": 142619.8, "step": 16320 }, { "epoch": 0.03298763317267097, - "grad_norm": 414.252197265625, + "grad_norm": 385702.9375, "learning_rate": 3.266e-06, - "loss": 34.5608, + "loss": 194620.775, "step": 16330 }, { "epoch": 0.03300783380535478, - "grad_norm": 311.0862731933594, + "grad_norm": 38346.03515625, "learning_rate": 3.268e-06, - "loss": 24.4663, + "loss": 52077.3812, "step": 16340 }, { "epoch": 0.033028034438038596, - "grad_norm": 437.1070251464844, + "grad_norm": 367681.125, "learning_rate": 3.2700000000000005e-06, - "loss": 54.5875, + "loss": 322857.45, "step": 16350 }, { "epoch": 0.033048235070722416, - "grad_norm": 202.70462036132812, + "grad_norm": 18469.31640625, "learning_rate": 3.272e-06, - "loss": 33.8347, + "loss": 161753.9375, "step": 16360 }, { "epoch": 0.03306843570340623, - "grad_norm": 350.0523986816406, + "grad_norm": 10151.4833984375, "learning_rate": 3.2740000000000003e-06, - "loss": 42.476, + "loss": 273824.55, "step": 16370 }, { "epoch": 0.03308863633609005, - "grad_norm": 269.7728576660156, + "grad_norm": 133100.328125, "learning_rate": 3.2760000000000005e-06, - "loss": 32.9094, + "loss": 188306.775, "step": 16380 }, { "epoch": 0.03310883696877386, - "grad_norm": 129.224365234375, + "grad_norm": 14241.181640625, "learning_rate": 3.278e-06, - "loss": 36.1762, + "loss": 117327.7625, "step": 16390 }, { "epoch": 0.033129037601457675, - "grad_norm": 638.2075805664062, + "grad_norm": 455727.6875, "learning_rate": 3.2800000000000004e-06, - "loss": 76.1486, + "loss": 358813.225, "step": 16400 }, { "epoch": 0.033149238234141495, - "grad_norm": 419.1002502441406, + "grad_norm": 539220.3125, "learning_rate": 3.282e-06, - "loss": 45.8922, + "loss": 228620.3, "step": 16410 }, { "epoch": 0.03316943886682531, - "grad_norm": 201.15699768066406, + "grad_norm": 23126.47265625, "learning_rate": 3.2840000000000007e-06, - "loss": 32.3097, + "loss": 124437.325, "step": 16420 }, { "epoch": 0.03318963949950912, - "grad_norm": 63.118438720703125, + "grad_norm": 16358.5400390625, "learning_rate": 3.2860000000000004e-06, - "loss": 26.3156, + "loss": 82766.175, "step": 16430 }, { "epoch": 0.03320984013219294, - "grad_norm": 225.0209197998047, + "grad_norm": 4548.08251953125, "learning_rate": 3.288e-06, - "loss": 30.3701, + "loss": 90043.4875, "step": 16440 }, { "epoch": 0.033230040764876755, - "grad_norm": 275.6959228515625, + "grad_norm": 129745.65625, "learning_rate": 3.2900000000000003e-06, - "loss": 41.4858, + "loss": 293578.225, "step": 16450 }, { "epoch": 0.033250241397560575, - "grad_norm": 345.9579162597656, + "grad_norm": 49361.3515625, "learning_rate": 3.292e-06, - "loss": 34.586, + "loss": 166564.2375, "step": 16460 }, { "epoch": 0.03327044203024439, - "grad_norm": 121.23335266113281, + "grad_norm": 25951.005859375, "learning_rate": 3.2940000000000006e-06, - "loss": 25.4494, + "loss": 99148.0875, "step": 16470 }, { "epoch": 0.0332906426629282, - "grad_norm": 160.68568420410156, + "grad_norm": 6100.12646484375, "learning_rate": 3.2960000000000003e-06, - "loss": 42.3377, + "loss": 275439.0, "step": 16480 }, { "epoch": 0.03331084329561202, - "grad_norm": 163.48208618164062, + "grad_norm": 30184.255859375, "learning_rate": 3.298e-06, - "loss": 24.216, + "loss": 162096.8125, "step": 16490 }, { "epoch": 0.033331043928295834, - "grad_norm": 267.0227966308594, + "grad_norm": 42879.78515625, "learning_rate": 3.3000000000000006e-06, - "loss": 52.5298, + "loss": 338837.575, "step": 16500 }, { "epoch": 0.03335124456097965, - "grad_norm": 200.9279022216797, + "grad_norm": 52754.50390625, "learning_rate": 3.3020000000000003e-06, - "loss": 44.8166, + "loss": 118913.2375, "step": 16510 }, { "epoch": 0.03337144519366347, - "grad_norm": 193.29397583007812, + "grad_norm": 44857.05859375, "learning_rate": 3.3040000000000005e-06, - "loss": 24.0343, + "loss": 71392.7875, "step": 16520 }, { "epoch": 0.03339164582634728, - "grad_norm": 358.73712158203125, + "grad_norm": 57272.0546875, "learning_rate": 3.306e-06, - "loss": 34.2328, + "loss": 132684.1625, "step": 16530 }, { "epoch": 0.0334118464590311, - "grad_norm": 891.9193725585938, + "grad_norm": 392734.9375, "learning_rate": 3.308e-06, - "loss": 70.6687, + "loss": 228055.6, "step": 16540 }, { "epoch": 0.03343204709171491, - "grad_norm": 439.7184753417969, + "grad_norm": 144459.109375, "learning_rate": 3.3100000000000005e-06, - "loss": 49.6626, + "loss": 284715.9, "step": 16550 }, { "epoch": 0.033452247724398726, - "grad_norm": 284.67901611328125, + "grad_norm": 133741.828125, "learning_rate": 3.3120000000000002e-06, - "loss": 65.2468, + "loss": 343388.275, "step": 16560 }, { "epoch": 0.033472448357082546, - "grad_norm": 202.26576232910156, + "grad_norm": 20880.283203125, "learning_rate": 3.314e-06, - "loss": 53.6101, + "loss": 378253.825, "step": 16570 }, { "epoch": 0.03349264898976636, - "grad_norm": 310.427490234375, + "grad_norm": 155760.5, "learning_rate": 3.3160000000000005e-06, - "loss": 44.8462, + "loss": 117619.15, "step": 16580 }, { "epoch": 0.03351284962245017, - "grad_norm": 257.5505676269531, + "grad_norm": 9805.955078125, "learning_rate": 3.3180000000000003e-06, - "loss": 37.5404, + "loss": 146300.7375, "step": 16590 }, { "epoch": 0.03353305025513399, - "grad_norm": 169.76937866210938, + "grad_norm": 11711.1806640625, "learning_rate": 3.3200000000000004e-06, - "loss": 31.974, + "loss": 129549.4125, "step": 16600 }, { "epoch": 0.033553250887817805, - "grad_norm": 228.67799377441406, + "grad_norm": 69140.171875, "learning_rate": 3.322e-06, - "loss": 23.8297, + "loss": 92528.3125, "step": 16610 }, { "epoch": 0.033573451520501625, - "grad_norm": 188.8607177734375, + "grad_norm": 54184.93359375, "learning_rate": 3.324e-06, - "loss": 43.8962, + "loss": 226613.85, "step": 16620 }, { "epoch": 0.03359365215318544, - "grad_norm": 166.12493896484375, + "grad_norm": 125306.53125, "learning_rate": 3.3260000000000004e-06, - "loss": 36.9752, + "loss": 153434.15, "step": 16630 }, { "epoch": 0.03361385278586925, - "grad_norm": 607.380126953125, + "grad_norm": 195304.65625, "learning_rate": 3.328e-06, - "loss": 49.2453, + "loss": 142778.8, "step": 16640 }, { "epoch": 0.03363405341855307, - "grad_norm": 312.77899169921875, + "grad_norm": 86365.328125, "learning_rate": 3.3300000000000003e-06, - "loss": 38.7439, + "loss": 154335.075, "step": 16650 }, { "epoch": 0.033654254051236884, - "grad_norm": 126.633544921875, + "grad_norm": 24696.828125, "learning_rate": 3.332e-06, - "loss": 35.1778, + "loss": 214549.4, "step": 16660 }, { "epoch": 0.0336744546839207, - "grad_norm": 562.5142211914062, + "grad_norm": 302580.34375, "learning_rate": 3.334e-06, - "loss": 38.7624, + "loss": 110847.525, "step": 16670 }, { "epoch": 0.03369465531660452, - "grad_norm": 81.54524993896484, + "grad_norm": 15101.453125, "learning_rate": 3.3360000000000003e-06, - "loss": 33.2061, + "loss": 168665.9375, "step": 16680 }, { "epoch": 0.03371485594928833, - "grad_norm": 341.4804382324219, + "grad_norm": 71133.9765625, "learning_rate": 3.338e-06, - "loss": 36.1934, + "loss": 177642.8625, "step": 16690 }, { "epoch": 0.03373505658197215, - "grad_norm": 896.6447143554688, + "grad_norm": 1151725.0, "learning_rate": 3.3400000000000006e-06, - "loss": 62.4341, + "loss": 413502.8, "step": 16700 }, { "epoch": 0.03375525721465596, - "grad_norm": 135.6630096435547, + "grad_norm": 12703.390625, "learning_rate": 3.3420000000000004e-06, - "loss": 45.3039, + "loss": 389874.65, "step": 16710 }, { "epoch": 0.033775457847339777, - "grad_norm": 156.99526977539062, + "grad_norm": 42628.36328125, "learning_rate": 3.344e-06, - "loss": 39.8221, + "loss": 283109.4, "step": 16720 }, { "epoch": 0.033795658480023597, - "grad_norm": 135.8438262939453, + "grad_norm": 12845.5625, "learning_rate": 3.3460000000000002e-06, - "loss": 26.9488, + "loss": 107589.3375, "step": 16730 }, { "epoch": 0.03381585911270741, - "grad_norm": 511.6795654296875, + "grad_norm": 553354.375, "learning_rate": 3.348e-06, - "loss": 40.9199, + "loss": 205656.575, "step": 16740 }, { "epoch": 0.03383605974539122, - "grad_norm": 196.5582733154297, + "grad_norm": 67760.171875, "learning_rate": 3.3500000000000005e-06, - "loss": 37.2834, + "loss": 366444.575, "step": 16750 }, { "epoch": 0.03385626037807504, - "grad_norm": 154.54776000976562, + "grad_norm": 25834.5625, "learning_rate": 3.3520000000000003e-06, - "loss": 22.4306, + "loss": 48338.8438, "step": 16760 }, { "epoch": 0.033876461010758856, - "grad_norm": 441.3468017578125, + "grad_norm": 29223.42578125, "learning_rate": 3.354e-06, - "loss": 32.6407, + "loss": 124797.2, "step": 16770 }, { "epoch": 0.033896661643442676, - "grad_norm": 1111.8704833984375, + "grad_norm": 1391466.875, "learning_rate": 3.3560000000000006e-06, - "loss": 36.1129, + "loss": 223909.35, "step": 16780 }, { "epoch": 0.03391686227612649, - "grad_norm": 342.1679382324219, + "grad_norm": 158126.828125, "learning_rate": 3.3580000000000003e-06, - "loss": 37.9355, + "loss": 233282.45, "step": 16790 }, { "epoch": 0.0339370629088103, - "grad_norm": 736.2267456054688, + "grad_norm": 562497.6875, "learning_rate": 3.3600000000000004e-06, - "loss": 22.4489, + "loss": 97490.7, "step": 16800 }, { "epoch": 0.03395726354149412, - "grad_norm": 324.1629333496094, + "grad_norm": 98271.5546875, "learning_rate": 3.362e-06, - "loss": 51.4796, + "loss": 249685.25, "step": 16810 }, { "epoch": 0.033977464174177935, - "grad_norm": 140.27993774414062, + "grad_norm": 24145.146484375, "learning_rate": 3.364e-06, - "loss": 40.0226, + "loss": 179336.7625, "step": 16820 }, { "epoch": 0.03399766480686175, - "grad_norm": 676.4741821289062, + "grad_norm": 372776.78125, "learning_rate": 3.3660000000000005e-06, - "loss": 45.9886, + "loss": 276174.225, "step": 16830 }, { "epoch": 0.03401786543954557, - "grad_norm": 186.6973419189453, + "grad_norm": 42927.859375, "learning_rate": 3.368e-06, - "loss": 22.1625, + "loss": 62777.5813, "step": 16840 }, { "epoch": 0.03403806607222938, - "grad_norm": 271.2355651855469, + "grad_norm": 12779.8642578125, "learning_rate": 3.3700000000000003e-06, - "loss": 45.0189, + "loss": 300452.275, "step": 16850 }, { "epoch": 0.0340582667049132, - "grad_norm": 142.524169921875, + "grad_norm": 14827.9794921875, "learning_rate": 3.372e-06, - "loss": 32.5946, + "loss": 186035.6, "step": 16860 }, { "epoch": 0.034078467337597014, - "grad_norm": 349.8798522949219, + "grad_norm": 163818.34375, "learning_rate": 3.3740000000000002e-06, - "loss": 45.842, + "loss": 241051.325, "step": 16870 }, { "epoch": 0.03409866797028083, - "grad_norm": 237.0596923828125, + "grad_norm": 122020.8359375, "learning_rate": 3.3760000000000004e-06, - "loss": 32.2833, + "loss": 110189.2125, "step": 16880 }, { "epoch": 0.03411886860296465, - "grad_norm": 137.74659729003906, + "grad_norm": 10168.8896484375, "learning_rate": 3.378e-06, - "loss": 15.1257, + "loss": 49938.6781, "step": 16890 }, { "epoch": 0.03413906923564846, - "grad_norm": 391.73956298828125, + "grad_norm": 207901.046875, "learning_rate": 3.3800000000000007e-06, - "loss": 34.1628, + "loss": 174017.675, "step": 16900 }, { "epoch": 0.03415926986833227, - "grad_norm": 246.07106018066406, + "grad_norm": 46044.78515625, "learning_rate": 3.3820000000000004e-06, - "loss": 30.6648, + "loss": 168726.3625, "step": 16910 }, { "epoch": 0.03417947050101609, - "grad_norm": 373.9600524902344, + "grad_norm": 179664.53125, "learning_rate": 3.384e-06, - "loss": 36.3212, + "loss": 83257.475, "step": 16920 }, { "epoch": 0.034199671133699906, - "grad_norm": 476.1013488769531, + "grad_norm": 485901.6875, "learning_rate": 3.3860000000000003e-06, - "loss": 44.096, + "loss": 345601.15, "step": 16930 }, { "epoch": 0.034219871766383726, - "grad_norm": 321.2939758300781, + "grad_norm": 16103.1513671875, "learning_rate": 3.388e-06, - "loss": 28.3317, + "loss": 176214.2, "step": 16940 }, { "epoch": 0.03424007239906754, - "grad_norm": 189.6025390625, + "grad_norm": 3196.087890625, "learning_rate": 3.3900000000000006e-06, - "loss": 32.3839, + "loss": 80612.8875, "step": 16950 }, { "epoch": 0.03426027303175135, - "grad_norm": 605.8056030273438, + "grad_norm": 567317.25, "learning_rate": 3.3920000000000003e-06, - "loss": 56.5322, + "loss": 358992.575, "step": 16960 }, { "epoch": 0.03428047366443517, - "grad_norm": 283.5815734863281, + "grad_norm": 70069.5390625, "learning_rate": 3.394e-06, - "loss": 78.796, + "loss": 398660.15, "step": 16970 }, { "epoch": 0.034300674297118985, - "grad_norm": 148.4521026611328, + "grad_norm": 27831.09765625, "learning_rate": 3.3960000000000006e-06, - "loss": 17.5358, + "loss": 115248.6125, "step": 16980 }, { "epoch": 0.0343208749298028, - "grad_norm": 240.6928253173828, + "grad_norm": 3849.91015625, "learning_rate": 3.3980000000000003e-06, - "loss": 36.2812, + "loss": 160651.65, "step": 16990 }, { "epoch": 0.03434107556248662, - "grad_norm": 97.40995788574219, + "grad_norm": 10808.2099609375, "learning_rate": 3.4000000000000005e-06, - "loss": 20.1188, + "loss": 68147.9312, "step": 17000 }, { "epoch": 0.03436127619517043, - "grad_norm": 1046.69384765625, + "grad_norm": 311962.5, "learning_rate": 3.402e-06, - "loss": 73.4175, + "loss": 384140.25, "step": 17010 }, { "epoch": 0.03438147682785425, - "grad_norm": 184.30926513671875, + "grad_norm": 161639.359375, "learning_rate": 3.404e-06, - "loss": 34.2268, + "loss": 134107.5625, "step": 17020 }, { "epoch": 0.034401677460538065, - "grad_norm": 411.9843444824219, + "grad_norm": 86474.4296875, "learning_rate": 3.4060000000000005e-06, - "loss": 25.879, + "loss": 58712.4875, "step": 17030 }, { "epoch": 0.03442187809322188, - "grad_norm": 65.47348022460938, + "grad_norm": 13070.6220703125, "learning_rate": 3.4080000000000002e-06, - "loss": 51.1356, + "loss": 184941.925, "step": 17040 }, { "epoch": 0.0344420787259057, - "grad_norm": 508.4545593261719, + "grad_norm": 465746.375, "learning_rate": 3.4100000000000004e-06, - "loss": 43.9626, + "loss": 181545.35, "step": 17050 }, { "epoch": 0.03446227935858951, - "grad_norm": 125.83358764648438, + "grad_norm": 17593.40625, "learning_rate": 3.412e-06, - "loss": 39.0611, + "loss": 189948.3875, "step": 17060 }, { "epoch": 0.034482479991273324, - "grad_norm": 250.63418579101562, + "grad_norm": 14838.3447265625, "learning_rate": 3.4140000000000003e-06, - "loss": 25.4197, + "loss": 59141.6125, "step": 17070 }, { "epoch": 0.034502680623957144, - "grad_norm": 873.7561645507812, + "grad_norm": 74617.765625, "learning_rate": 3.4160000000000004e-06, - "loss": 37.4879, + "loss": 360971.85, "step": 17080 }, { "epoch": 0.03452288125664096, - "grad_norm": 656.0178833007812, + "grad_norm": 868304.25, "learning_rate": 3.418e-06, - "loss": 55.6403, + "loss": 313817.85, "step": 17090 }, { "epoch": 0.03454308188932478, - "grad_norm": 639.9075317382812, + "grad_norm": 601502.25, "learning_rate": 3.4200000000000007e-06, - "loss": 55.0274, + "loss": 324130.475, "step": 17100 }, { "epoch": 0.03456328252200859, - "grad_norm": 238.6270751953125, + "grad_norm": 83670.3828125, "learning_rate": 3.4220000000000004e-06, - "loss": 35.4899, + "loss": 332076.15, "step": 17110 }, { "epoch": 0.0345834831546924, - "grad_norm": 408.190673828125, + "grad_norm": 91588.359375, "learning_rate": 3.424e-06, - "loss": 25.7717, + "loss": 113157.525, "step": 17120 }, { "epoch": 0.03460368378737622, - "grad_norm": 302.14007568359375, + "grad_norm": 93852.4765625, "learning_rate": 3.4260000000000003e-06, - "loss": 26.0166, + "loss": 62506.975, "step": 17130 }, { "epoch": 0.034623884420060036, - "grad_norm": 255.47164916992188, + "grad_norm": 110252.1640625, "learning_rate": 3.428e-06, - "loss": 55.1155, + "loss": 176113.075, "step": 17140 }, { "epoch": 0.03464408505274385, - "grad_norm": 342.7286682128906, + "grad_norm": 266876.09375, "learning_rate": 3.4300000000000006e-06, - "loss": 41.5852, + "loss": 167265.4125, "step": 17150 }, { "epoch": 0.03466428568542767, - "grad_norm": 793.4915771484375, + "grad_norm": 291763.625, "learning_rate": 3.4320000000000003e-06, - "loss": 39.5082, + "loss": 235541.2, "step": 17160 }, { "epoch": 0.03468448631811148, - "grad_norm": 1343.6824951171875, + "grad_norm": 0.0, "learning_rate": 3.434e-06, - "loss": 59.9334, + "loss": 266156.4, "step": 17170 }, { "epoch": 0.0347046869507953, - "grad_norm": 110.863037109375, + "grad_norm": 28556.375, "learning_rate": 3.4360000000000006e-06, - "loss": 30.01, + "loss": 125424.9875, "step": 17180 }, { "epoch": 0.034724887583479115, - "grad_norm": 594.4992065429688, + "grad_norm": 334768.15625, "learning_rate": 3.4380000000000004e-06, - "loss": 47.8779, + "loss": 151395.2625, "step": 17190 }, { "epoch": 0.03474508821616293, - "grad_norm": 195.91082763671875, + "grad_norm": 55137.00390625, "learning_rate": 3.44e-06, - "loss": 79.9307, + "loss": 525022.0, "step": 17200 }, { "epoch": 0.03476528884884675, - "grad_norm": 2141.370361328125, + "grad_norm": 773638.375, "learning_rate": 3.4420000000000002e-06, - "loss": 65.2812, + "loss": 453798.95, "step": 17210 }, { "epoch": 0.03478548948153056, - "grad_norm": 209.10577392578125, + "grad_norm": 34639.75, "learning_rate": 3.444e-06, - "loss": 38.383, + "loss": 323292.7, "step": 17220 }, { "epoch": 0.034805690114214374, - "grad_norm": 520.1636962890625, + "grad_norm": 263553.5, "learning_rate": 3.4460000000000005e-06, - "loss": 28.8499, + "loss": 135423.1, "step": 17230 }, { "epoch": 0.034825890746898194, - "grad_norm": 161.81329345703125, + "grad_norm": 13715.515625, "learning_rate": 3.4480000000000003e-06, - "loss": 27.9141, + "loss": 136921.55, "step": 17240 }, { "epoch": 0.03484609137958201, - "grad_norm": 359.35028076171875, + "grad_norm": 156106.84375, "learning_rate": 3.45e-06, - "loss": 39.2735, + "loss": 153656.7125, "step": 17250 }, { "epoch": 0.03486629201226583, "grad_norm": 0.0, "learning_rate": 3.452e-06, - "loss": 31.4644, + "loss": 142803.6625, "step": 17260 }, { "epoch": 0.03488649264494964, - "grad_norm": 223.4273681640625, + "grad_norm": 101815.859375, "learning_rate": 3.4540000000000003e-06, - "loss": 45.5818, + "loss": 190646.4625, "step": 17270 }, { "epoch": 0.034906693277633453, - "grad_norm": 461.3404846191406, + "grad_norm": 228526.90625, "learning_rate": 3.4560000000000005e-06, - "loss": 49.441, + "loss": 311266.575, "step": 17280 }, { "epoch": 0.034926893910317273, - "grad_norm": 369.4361267089844, + "grad_norm": 339408.0625, "learning_rate": 3.458e-06, - "loss": 35.239, + "loss": 146621.5875, "step": 17290 }, { "epoch": 0.03494709454300109, - "grad_norm": 209.47914123535156, + "grad_norm": 92775.09375, "learning_rate": 3.46e-06, - "loss": 30.6322, + "loss": 228517.375, "step": 17300 }, { "epoch": 0.0349672951756849, - "grad_norm": 376.3097229003906, + "grad_norm": 168584.765625, "learning_rate": 3.4620000000000005e-06, - "loss": 44.23, + "loss": 195757.55, "step": 17310 }, { "epoch": 0.03498749580836872, - "grad_norm": 350.3291015625, + "grad_norm": 225164.421875, "learning_rate": 3.464e-06, - "loss": 31.5281, + "loss": 188009.6875, "step": 17320 }, { "epoch": 0.03500769644105253, - "grad_norm": 232.93167114257812, + "grad_norm": 113717.640625, "learning_rate": 3.4660000000000004e-06, - "loss": 42.9872, + "loss": 268401.6, "step": 17330 }, { "epoch": 0.03502789707373635, - "grad_norm": 349.1705627441406, + "grad_norm": 44413.73046875, "learning_rate": 3.468e-06, - "loss": 25.706, + "loss": 157700.475, "step": 17340 }, { "epoch": 0.035048097706420166, - "grad_norm": 593.7544555664062, + "grad_norm": 286293.53125, "learning_rate": 3.4700000000000002e-06, - "loss": 24.9243, + "loss": 79297.3375, "step": 17350 }, { "epoch": 0.03506829833910398, - "grad_norm": 451.97021484375, + "grad_norm": 144848.828125, "learning_rate": 3.4720000000000004e-06, - "loss": 28.3188, + "loss": 175429.325, "step": 17360 }, { "epoch": 0.0350884989717878, - "grad_norm": 1406.9215087890625, + "grad_norm": 1267468.375, "learning_rate": 3.474e-06, - "loss": 41.1951, + "loss": 287653.4, "step": 17370 }, { "epoch": 0.03510869960447161, - "grad_norm": 82.0578384399414, + "grad_norm": 4845.20751953125, "learning_rate": 3.4760000000000007e-06, - "loss": 12.8265, + "loss": 77484.4312, "step": 17380 }, { "epoch": 0.035128900237155425, - "grad_norm": 403.41461181640625, + "grad_norm": 148259.046875, "learning_rate": 3.4780000000000004e-06, - "loss": 49.1423, + "loss": 202792.0, "step": 17390 }, { "epoch": 0.035149100869839245, - "grad_norm": 279.8601379394531, + "grad_norm": 48949.734375, "learning_rate": 3.48e-06, - "loss": 35.7291, + "loss": 206400.15, "step": 17400 }, { "epoch": 0.03516930150252306, - "grad_norm": 294.01068115234375, + "grad_norm": 89980.0, "learning_rate": 3.4820000000000003e-06, - "loss": 26.5494, + "loss": 135225.4625, "step": 17410 }, { "epoch": 0.03518950213520688, - "grad_norm": 843.3993530273438, + "grad_norm": 973398.875, "learning_rate": 3.484e-06, - "loss": 57.4442, + "loss": 328315.95, "step": 17420 }, { "epoch": 0.03520970276789069, - "grad_norm": 415.5447082519531, + "grad_norm": 166388.890625, "learning_rate": 3.4860000000000006e-06, - "loss": 112.6446, + "loss": 474967.9, "step": 17430 }, { "epoch": 0.035229903400574504, - "grad_norm": 273.3185729980469, + "grad_norm": 174318.53125, "learning_rate": 3.4880000000000003e-06, - "loss": 27.5304, + "loss": 153884.6, "step": 17440 }, { "epoch": 0.035250104033258324, - "grad_norm": 491.176513671875, + "grad_norm": 106971.8046875, "learning_rate": 3.49e-06, - "loss": 34.6601, + "loss": 188443.0875, "step": 17450 }, { "epoch": 0.03527030466594214, - "grad_norm": 128.51788330078125, + "grad_norm": 18112.39453125, "learning_rate": 3.492e-06, - "loss": 52.4764, + "loss": 279470.975, "step": 17460 }, { "epoch": 0.03529050529862595, - "grad_norm": 308.6649475097656, + "grad_norm": 65093.26171875, "learning_rate": 3.4940000000000003e-06, - "loss": 59.0693, + "loss": 336978.425, "step": 17470 }, { "epoch": 0.03531070593130977, - "grad_norm": 624.1464233398438, + "grad_norm": 312457.90625, "learning_rate": 3.4960000000000005e-06, - "loss": 53.6715, + "loss": 342747.65, "step": 17480 }, { "epoch": 0.03533090656399358, - "grad_norm": 205.8033447265625, + "grad_norm": 40473.94921875, "learning_rate": 3.4980000000000002e-06, - "loss": 67.5194, + "loss": 360095.625, "step": 17490 }, { "epoch": 0.0353511071966774, - "grad_norm": 452.337890625, + "grad_norm": 78686.3359375, "learning_rate": 3.5e-06, - "loss": 28.7642, + "loss": 84546.8687, "step": 17500 }, { "epoch": 0.035371307829361216, - "grad_norm": 343.1424865722656, + "grad_norm": 143225.34375, "learning_rate": 3.5020000000000005e-06, - "loss": 34.6037, + "loss": 105327.675, "step": 17510 }, { "epoch": 0.03539150846204503, - "grad_norm": 125.72163391113281, + "grad_norm": 18540.916015625, "learning_rate": 3.5040000000000002e-06, - "loss": 36.9692, + "loss": 212281.25, "step": 17520 }, { "epoch": 0.03541170909472885, - "grad_norm": 268.17449951171875, + "grad_norm": 79827.1875, "learning_rate": 3.5060000000000004e-06, - "loss": 26.5976, + "loss": 193528.95, "step": 17530 }, { "epoch": 0.03543190972741266, - "grad_norm": 173.89942932128906, + "grad_norm": 16985.091796875, "learning_rate": 3.508e-06, - "loss": 40.6933, + "loss": 182536.025, "step": 17540 }, { "epoch": 0.035452110360096475, - "grad_norm": 133.52862548828125, + "grad_norm": 6157.38720703125, "learning_rate": 3.5100000000000003e-06, - "loss": 45.627, + "loss": 303330.525, "step": 17550 }, { "epoch": 0.035472310992780295, - "grad_norm": 151.24571228027344, + "grad_norm": 143816.453125, "learning_rate": 3.5120000000000004e-06, - "loss": 18.7689, + "loss": 102640.0562, "step": 17560 }, { "epoch": 0.03549251162546411, - "grad_norm": 223.38760375976562, + "grad_norm": 57471.80859375, "learning_rate": 3.514e-06, - "loss": 34.6075, + "loss": 163855.8875, "step": 17570 }, { "epoch": 0.03551271225814793, - "grad_norm": 413.1390380859375, + "grad_norm": 241219.078125, "learning_rate": 3.5160000000000007e-06, - "loss": 40.4633, + "loss": 165813.925, "step": 17580 }, { "epoch": 0.03553291289083174, - "grad_norm": 353.8850402832031, + "grad_norm": 107589.8125, "learning_rate": 3.5180000000000005e-06, - "loss": 39.9123, + "loss": 150830.675, "step": 17590 }, { "epoch": 0.035553113523515555, - "grad_norm": 368.04547119140625, + "grad_norm": 238970.859375, "learning_rate": 3.52e-06, - "loss": 35.7904, + "loss": 170422.85, "step": 17600 }, { "epoch": 0.035573314156199375, - "grad_norm": 1732.456298828125, + "grad_norm": 86644.0078125, "learning_rate": 3.5220000000000003e-06, - "loss": 73.0335, + "loss": 150401.025, "step": 17610 }, { "epoch": 0.03559351478888319, - "grad_norm": 662.8062133789062, + "grad_norm": 607095.75, "learning_rate": 3.524e-06, - "loss": 45.0234, + "loss": 211050.675, "step": 17620 }, { "epoch": 0.035613715421567, - "grad_norm": 212.84527587890625, + "grad_norm": 41626.2109375, "learning_rate": 3.5260000000000006e-06, - "loss": 28.5163, + "loss": 118568.275, "step": 17630 }, { "epoch": 0.03563391605425082, - "grad_norm": 204.98377990722656, + "grad_norm": 20389.31640625, "learning_rate": 3.5280000000000004e-06, - "loss": 29.7388, + "loss": 90545.6, "step": 17640 }, { "epoch": 0.035654116686934634, - "grad_norm": 218.87576293945312, + "grad_norm": 39080.59765625, "learning_rate": 3.53e-06, - "loss": 71.4667, + "loss": 259223.6, "step": 17650 }, { "epoch": 0.035674317319618454, - "grad_norm": 275.1972961425781, + "grad_norm": 262809.5625, "learning_rate": 3.5320000000000002e-06, - "loss": 14.5579, + "loss": 69186.575, "step": 17660 }, { "epoch": 0.03569451795230227, - "grad_norm": 191.7329864501953, + "grad_norm": 40216.515625, "learning_rate": 3.5340000000000004e-06, - "loss": 23.105, + "loss": 63950.175, "step": 17670 }, { "epoch": 0.03571471858498608, - "grad_norm": 291.3621826171875, + "grad_norm": 48430.76171875, "learning_rate": 3.5360000000000005e-06, - "loss": 41.967, + "loss": 174936.7875, "step": 17680 }, { "epoch": 0.0357349192176699, - "grad_norm": 84.14144134521484, + "grad_norm": 8992.6455078125, "learning_rate": 3.5380000000000003e-06, - "loss": 32.4411, + "loss": 215070.775, "step": 17690 }, { "epoch": 0.03575511985035371, - "grad_norm": 49.37529754638672, + "grad_norm": 192603.90625, "learning_rate": 3.54e-06, - "loss": 37.8053, + "loss": 224313.85, "step": 17700 }, { "epoch": 0.035775320483037526, - "grad_norm": 131.99440002441406, + "grad_norm": 1124.17919921875, "learning_rate": 3.5420000000000006e-06, - "loss": 38.8888, + "loss": 147994.275, "step": 17710 }, { "epoch": 0.035795521115721346, - "grad_norm": 157.5868682861328, + "grad_norm": 10622.400390625, "learning_rate": 3.5440000000000003e-06, - "loss": 41.2273, + "loss": 306738.325, "step": 17720 }, { "epoch": 0.03581572174840516, - "grad_norm": 983.6537475585938, + "grad_norm": 555205.625, "learning_rate": 3.5460000000000004e-06, - "loss": 48.0623, + "loss": 168439.75, "step": 17730 }, { "epoch": 0.03583592238108898, - "grad_norm": 211.2615203857422, + "grad_norm": 342201.4375, "learning_rate": 3.548e-06, - "loss": 19.4675, + "loss": 108397.75, "step": 17740 }, { "epoch": 0.03585612301377279, - "grad_norm": 310.442626953125, + "grad_norm": 19547.076171875, "learning_rate": 3.5500000000000003e-06, - "loss": 49.0647, + "loss": 419068.5, "step": 17750 }, { "epoch": 0.035876323646456605, - "grad_norm": 118.88805389404297, + "grad_norm": 26630.47265625, "learning_rate": 3.5520000000000005e-06, - "loss": 38.1289, + "loss": 141939.325, "step": 17760 }, { "epoch": 0.035896524279140425, - "grad_norm": 271.6127624511719, + "grad_norm": 29464.3046875, "learning_rate": 3.554e-06, - "loss": 62.4351, + "loss": 490156.05, "step": 17770 }, { "epoch": 0.03591672491182424, - "grad_norm": 200.02017211914062, + "grad_norm": 74796.4609375, "learning_rate": 3.5560000000000008e-06, - "loss": 32.174, + "loss": 94542.1812, "step": 17780 }, { "epoch": 0.03593692554450805, - "grad_norm": 69.3891372680664, + "grad_norm": 2532.306884765625, "learning_rate": 3.5580000000000005e-06, - "loss": 20.9871, + "loss": 101565.4, "step": 17790 }, { "epoch": 0.03595712617719187, - "grad_norm": 257.371337890625, + "grad_norm": 202872.140625, "learning_rate": 3.5600000000000002e-06, - "loss": 57.4669, + "loss": 285453.875, "step": 17800 }, { "epoch": 0.035977326809875684, - "grad_norm": 259.77752685546875, + "grad_norm": 183610.890625, "learning_rate": 3.5620000000000004e-06, - "loss": 32.3087, + "loss": 167782.8625, "step": 17810 }, { "epoch": 0.035997527442559504, - "grad_norm": 963.5701293945312, + "grad_norm": 437505.9375, "learning_rate": 3.564e-06, - "loss": 47.1171, + "loss": 170810.45, "step": 17820 }, { "epoch": 0.03601772807524332, - "grad_norm": 334.3813171386719, + "grad_norm": 187024.296875, "learning_rate": 3.566e-06, - "loss": 37.5225, + "loss": 154614.225, "step": 17830 }, { "epoch": 0.03603792870792713, - "grad_norm": 299.8929748535156, + "grad_norm": 281112.625, "learning_rate": 3.5680000000000004e-06, - "loss": 30.5628, + "loss": 130826.0, "step": 17840 }, { "epoch": 0.03605812934061095, - "grad_norm": 1397.25439453125, + "grad_norm": 1568842.5, "learning_rate": 3.57e-06, - "loss": 67.5392, + "loss": 335015.35, "step": 17850 }, { "epoch": 0.036078329973294763, - "grad_norm": 224.2978057861328, + "grad_norm": 26498.080078125, "learning_rate": 3.5720000000000003e-06, - "loss": 26.4263, + "loss": 117290.95, "step": 17860 }, { "epoch": 0.03609853060597858, - "grad_norm": 442.4417724609375, + "grad_norm": 147373.09375, "learning_rate": 3.5740000000000004e-06, - "loss": 36.9202, + "loss": 226185.825, "step": 17870 }, { "epoch": 0.0361187312386624, - "grad_norm": 215.83665466308594, + "grad_norm": 20973.2578125, "learning_rate": 3.576e-06, - "loss": 32.0088, + "loss": 185422.425, "step": 17880 }, { "epoch": 0.03613893187134621, - "grad_norm": 408.61474609375, + "grad_norm": 27635.13671875, "learning_rate": 3.5780000000000003e-06, - "loss": 24.6973, + "loss": 103286.1875, "step": 17890 }, { "epoch": 0.03615913250403003, - "grad_norm": 439.76324462890625, + "grad_norm": 401350.03125, "learning_rate": 3.58e-06, - "loss": 46.6527, + "loss": 189835.725, "step": 17900 }, { "epoch": 0.03617933313671384, - "grad_norm": 156.62017822265625, + "grad_norm": 19275.01171875, "learning_rate": 3.5820000000000006e-06, - "loss": 30.6185, + "loss": 208777.1375, "step": 17910 }, { "epoch": 0.036199533769397656, - "grad_norm": 325.8275146484375, + "grad_norm": 178012.5625, "learning_rate": 3.5840000000000003e-06, - "loss": 69.6333, + "loss": 493500.7, "step": 17920 }, { "epoch": 0.036219734402081476, - "grad_norm": 231.5528564453125, + "grad_norm": 12401.9150390625, "learning_rate": 3.586e-06, - "loss": 36.4243, + "loss": 225133.725, "step": 17930 }, { "epoch": 0.03623993503476529, - "grad_norm": 387.59759521484375, + "grad_norm": 68602.2734375, "learning_rate": 3.588e-06, - "loss": 45.8942, + "loss": 88372.0125, "step": 17940 }, { "epoch": 0.0362601356674491, - "grad_norm": 327.5767517089844, + "grad_norm": 15237.81640625, "learning_rate": 3.5900000000000004e-06, - "loss": 21.8096, + "loss": 41896.4219, "step": 17950 }, { "epoch": 0.03628033630013292, - "grad_norm": 118.24665069580078, + "grad_norm": 33311.44140625, "learning_rate": 3.5920000000000005e-06, - "loss": 28.9579, + "loss": 240834.95, "step": 17960 }, { "epoch": 0.036300536932816735, - "grad_norm": 1558.099365234375, + "grad_norm": 551141.5, "learning_rate": 3.5940000000000002e-06, - "loss": 54.743, + "loss": 187790.1625, "step": 17970 }, { "epoch": 0.036320737565500555, - "grad_norm": 611.4698486328125, + "grad_norm": 51884.8203125, "learning_rate": 3.596e-06, - "loss": 44.5819, + "loss": 312083.85, "step": 17980 }, { "epoch": 0.03634093819818437, - "grad_norm": 270.1828918457031, + "grad_norm": 2215.132080078125, "learning_rate": 3.5980000000000005e-06, - "loss": 45.0569, + "loss": 137771.525, "step": 17990 }, { "epoch": 0.03636113883086818, - "grad_norm": 991.5865478515625, + "grad_norm": 952691.6875, "learning_rate": 3.6000000000000003e-06, - "loss": 40.8509, + "loss": 253770.4, "step": 18000 }, { "epoch": 0.036381339463552, - "grad_norm": 331.2337341308594, + "grad_norm": 289219.0625, "learning_rate": 3.6020000000000004e-06, - "loss": 34.891, + "loss": 334610.625, "step": 18010 }, { "epoch": 0.036401540096235814, - "grad_norm": 145.94773864746094, + "grad_norm": 18289.7109375, "learning_rate": 3.604e-06, - "loss": 36.5176, + "loss": 153534.45, "step": 18020 }, { "epoch": 0.03642174072891963, - "grad_norm": 154.4604034423828, + "grad_norm": 10704.8642578125, "learning_rate": 3.606e-06, - "loss": 38.0011, + "loss": 129407.5625, "step": 18030 }, { "epoch": 0.03644194136160345, - "grad_norm": 172.3933868408203, + "grad_norm": 109461.734375, "learning_rate": 3.6080000000000004e-06, - "loss": 26.1416, + "loss": 185537.3, "step": 18040 }, { "epoch": 0.03646214199428726, - "grad_norm": 332.6356506347656, + "grad_norm": 175825.375, "learning_rate": 3.61e-06, - "loss": 18.5461, + "loss": 78587.725, "step": 18050 }, { "epoch": 0.03648234262697108, - "grad_norm": 231.57098388671875, + "grad_norm": 164487.6875, "learning_rate": 3.6120000000000003e-06, - "loss": 15.0167, + "loss": 105265.7, "step": 18060 }, { "epoch": 0.03650254325965489, - "grad_norm": 308.06817626953125, + "grad_norm": 50482.30859375, "learning_rate": 3.6140000000000005e-06, - "loss": 34.6697, + "loss": 158736.0, "step": 18070 }, { "epoch": 0.036522743892338706, - "grad_norm": 394.8856201171875, + "grad_norm": 167902.0625, "learning_rate": 3.616e-06, - "loss": 39.539, + "loss": 364351.875, "step": 18080 }, { "epoch": 0.036542944525022526, - "grad_norm": 270.607666015625, + "grad_norm": 32333.90625, "learning_rate": 3.6180000000000003e-06, - "loss": 32.6353, + "loss": 104581.9, "step": 18090 }, { "epoch": 0.03656314515770634, - "grad_norm": 243.99295043945312, + "grad_norm": 65804.25, "learning_rate": 3.62e-06, - "loss": 14.8716, + "loss": 53803.6625, "step": 18100 }, { "epoch": 0.03658334579039015, - "grad_norm": 828.5248413085938, + "grad_norm": 658062.375, "learning_rate": 3.6220000000000006e-06, - "loss": 50.8561, + "loss": 430505.9, "step": 18110 }, { "epoch": 0.03660354642307397, - "grad_norm": 736.1989135742188, + "grad_norm": 685318.5, "learning_rate": 3.6240000000000004e-06, - "loss": 43.5971, + "loss": 176160.625, "step": 18120 }, { "epoch": 0.036623747055757785, - "grad_norm": 153.7969970703125, + "grad_norm": 13899.9970703125, "learning_rate": 3.626e-06, - "loss": 61.2382, + "loss": 257443.625, "step": 18130 }, { "epoch": 0.036643947688441605, - "grad_norm": 940.6776733398438, + "grad_norm": 410764.96875, "learning_rate": 3.6280000000000002e-06, - "loss": 47.3117, + "loss": 107756.0875, "step": 18140 }, { "epoch": 0.03666414832112542, - "grad_norm": 250.22854614257812, + "grad_norm": 34326.51953125, "learning_rate": 3.6300000000000004e-06, - "loss": 37.743, + "loss": 251337.825, "step": 18150 }, { "epoch": 0.03668434895380923, - "grad_norm": 309.1924133300781, + "grad_norm": 144119.65625, "learning_rate": 3.6320000000000005e-06, - "loss": 25.3199, + "loss": 86765.4625, "step": 18160 }, { "epoch": 0.03670454958649305, - "grad_norm": 501.50518798828125, + "grad_norm": 289383.6875, "learning_rate": 3.6340000000000003e-06, - "loss": 30.2039, + "loss": 89283.65, "step": 18170 }, { "epoch": 0.036724750219176865, - "grad_norm": 60.217529296875, + "grad_norm": 95329.859375, "learning_rate": 3.636e-06, - "loss": 9.346, + "loss": 49122.1312, "step": 18180 }, { "epoch": 0.03674495085186068, - "grad_norm": 106.04991149902344, + "grad_norm": 32345.302734375, "learning_rate": 3.6380000000000006e-06, - "loss": 46.2859, + "loss": 202446.9875, "step": 18190 }, { "epoch": 0.0367651514845445, - "grad_norm": 685.65625, + "grad_norm": 830782.625, "learning_rate": 3.6400000000000003e-06, - "loss": 47.04, + "loss": 265690.55, "step": 18200 }, { "epoch": 0.03678535211722831, - "grad_norm": 374.1886901855469, + "grad_norm": 179328.640625, "learning_rate": 3.6420000000000005e-06, - "loss": 40.1893, + "loss": 279421.4, "step": 18210 }, { "epoch": 0.03680555274991213, - "grad_norm": 205.88824462890625, + "grad_norm": 31922.85546875, "learning_rate": 3.644e-06, - "loss": 15.6478, + "loss": 39838.8281, "step": 18220 }, { "epoch": 0.036825753382595944, - "grad_norm": 478.465576171875, + "grad_norm": 415436.53125, "learning_rate": 3.646e-06, - "loss": 28.0025, + "loss": 112358.0, "step": 18230 }, { "epoch": 0.03684595401527976, - "grad_norm": 466.0864562988281, + "grad_norm": 354155.5, "learning_rate": 3.6480000000000005e-06, - "loss": 31.9528, + "loss": 118113.75, "step": 18240 }, { "epoch": 0.03686615464796358, - "grad_norm": 164.4373779296875, + "grad_norm": 63853.3671875, "learning_rate": 3.65e-06, - "loss": 32.278, + "loss": 149257.925, "step": 18250 }, { "epoch": 0.03688635528064739, - "grad_norm": 155.95420837402344, + "grad_norm": 4505.4091796875, "learning_rate": 3.6520000000000004e-06, - "loss": 22.3556, + "loss": 89088.8687, "step": 18260 }, { "epoch": 0.0369065559133312, - "grad_norm": 208.10311889648438, + "grad_norm": 140203.484375, "learning_rate": 3.6540000000000005e-06, - "loss": 39.1646, + "loss": 291930.95, "step": 18270 }, { "epoch": 0.03692675654601502, - "grad_norm": 372.9681091308594, + "grad_norm": 196403.484375, "learning_rate": 3.6560000000000002e-06, - "loss": 27.5049, + "loss": 87631.6125, "step": 18280 }, { "epoch": 0.036946957178698836, - "grad_norm": 378.6905212402344, + "grad_norm": 88555.21875, "learning_rate": 3.6580000000000004e-06, - "loss": 46.154, + "loss": 192134.3, "step": 18290 }, { "epoch": 0.03696715781138265, - "grad_norm": 129.78546142578125, + "grad_norm": 23701.1640625, "learning_rate": 3.66e-06, - "loss": 28.114, + "loss": 128436.4125, "step": 18300 }, { "epoch": 0.03698735844406647, - "grad_norm": 826.9152221679688, + "grad_norm": 153279.8125, "learning_rate": 3.6620000000000007e-06, - "loss": 86.4093, + "loss": 387313.575, "step": 18310 }, { "epoch": 0.03700755907675028, - "grad_norm": 418.3246765136719, + "grad_norm": 33921.671875, "learning_rate": 3.6640000000000004e-06, - "loss": 47.3192, + "loss": 158979.2375, "step": 18320 }, { "epoch": 0.0370277597094341, - "grad_norm": 442.64215087890625, + "grad_norm": 61032.84765625, "learning_rate": 3.666e-06, - "loss": 37.9958, + "loss": 247846.025, "step": 18330 }, { "epoch": 0.037047960342117915, - "grad_norm": 605.8897094726562, + "grad_norm": 270255.375, "learning_rate": 3.6680000000000003e-06, - "loss": 44.1922, + "loss": 211271.475, "step": 18340 }, { "epoch": 0.03706816097480173, - "grad_norm": 195.3629608154297, + "grad_norm": 3672.9638671875, "learning_rate": 3.6700000000000004e-06, - "loss": 25.8802, + "loss": 100840.4812, "step": 18350 }, { "epoch": 0.03708836160748555, - "grad_norm": 530.2905883789062, + "grad_norm": 154443.625, "learning_rate": 3.6720000000000006e-06, - "loss": 36.1684, + "loss": 111787.3, "step": 18360 }, { "epoch": 0.03710856224016936, - "grad_norm": 350.49359130859375, + "grad_norm": 110158.625, "learning_rate": 3.6740000000000003e-06, - "loss": 32.8999, + "loss": 140855.275, "step": 18370 }, { "epoch": 0.037128762872853174, - "grad_norm": 394.1460876464844, + "grad_norm": 196474.8125, "learning_rate": 3.676e-06, - "loss": 34.0506, + "loss": 182386.65, "step": 18380 }, { "epoch": 0.037148963505536994, - "grad_norm": 252.3183135986328, + "grad_norm": 4507.55908203125, "learning_rate": 3.6780000000000006e-06, - "loss": 35.0942, + "loss": 115828.275, "step": 18390 }, { "epoch": 0.03716916413822081, - "grad_norm": 575.96875, + "grad_norm": 274857.15625, "learning_rate": 3.6800000000000003e-06, - "loss": 28.3975, + "loss": 133943.2875, "step": 18400 }, { "epoch": 0.03718936477090463, - "grad_norm": 507.2201843261719, + "grad_norm": 193088.453125, "learning_rate": 3.6820000000000005e-06, - "loss": 26.9428, + "loss": 95634.2063, "step": 18410 }, { "epoch": 0.03720956540358844, - "grad_norm": 1575.86962890625, + "grad_norm": 502047.875, "learning_rate": 3.6840000000000002e-06, - "loss": 63.2215, + "loss": 188303.8625, "step": 18420 }, { "epoch": 0.037229766036272254, - "grad_norm": 203.66307067871094, + "grad_norm": 9371.421875, "learning_rate": 3.686e-06, - "loss": 39.4096, + "loss": 203146.7625, "step": 18430 }, { "epoch": 0.037249966668956074, - "grad_norm": 281.33795166015625, + "grad_norm": 42314.41015625, "learning_rate": 3.6880000000000005e-06, - "loss": 25.0655, + "loss": 103721.9187, "step": 18440 }, { "epoch": 0.03727016730163989, - "grad_norm": 365.227294921875, + "grad_norm": 138022.359375, "learning_rate": 3.6900000000000002e-06, - "loss": 30.2813, + "loss": 163417.475, "step": 18450 }, { "epoch": 0.0372903679343237, - "grad_norm": 180.12376403808594, + "grad_norm": 14413.5712890625, "learning_rate": 3.692e-06, - "loss": 40.4301, + "loss": 132555.5, "step": 18460 }, { "epoch": 0.03731056856700752, - "grad_norm": 231.57568359375, + "grad_norm": 37774.5078125, "learning_rate": 3.6940000000000005e-06, - "loss": 32.5852, + "loss": 189894.175, "step": 18470 }, { "epoch": 0.03733076919969133, - "grad_norm": 151.62686157226562, + "grad_norm": 24710.6328125, "learning_rate": 3.6960000000000003e-06, - "loss": 51.2365, + "loss": 376899.6, "step": 18480 }, { "epoch": 0.03735096983237515, - "grad_norm": 111.27426147460938, + "grad_norm": 38629.48046875, "learning_rate": 3.6980000000000004e-06, - "loss": 34.1496, + "loss": 135261.15, "step": 18490 }, { "epoch": 0.037371170465058966, - "grad_norm": 374.16009521484375, + "grad_norm": 98849.0390625, "learning_rate": 3.7e-06, - "loss": 46.3465, + "loss": 236901.025, "step": 18500 }, { "epoch": 0.03739137109774278, - "grad_norm": 199.30868530273438, + "grad_norm": 66186.15625, "learning_rate": 3.702e-06, - "loss": 23.4019, + "loss": 53138.8375, "step": 18510 }, { "epoch": 0.0374115717304266, - "grad_norm": 734.0659790039062, + "grad_norm": 78971.046875, "learning_rate": 3.7040000000000005e-06, - "loss": 43.1304, + "loss": 138701.9, "step": 18520 }, { "epoch": 0.03743177236311041, - "grad_norm": 490.39727783203125, + "grad_norm": 253397.828125, "learning_rate": 3.706e-06, - "loss": 55.5179, + "loss": 235100.625, "step": 18530 }, { "epoch": 0.037451972995794225, - "grad_norm": 69.4140853881836, + "grad_norm": 5610.97314453125, "learning_rate": 3.7080000000000003e-06, - "loss": 32.6496, + "loss": 162868.6875, "step": 18540 }, { "epoch": 0.037472173628478045, - "grad_norm": 128.406005859375, + "grad_norm": 11577.2607421875, "learning_rate": 3.7100000000000005e-06, - "loss": 37.9034, + "loss": 164680.3875, "step": 18550 }, { "epoch": 0.03749237426116186, - "grad_norm": 171.88510131835938, + "grad_norm": 78414.9140625, "learning_rate": 3.712e-06, - "loss": 37.4928, + "loss": 236848.35, "step": 18560 }, { "epoch": 0.03751257489384568, - "grad_norm": 320.34429931640625, + "grad_norm": 27133.814453125, "learning_rate": 3.7140000000000004e-06, - "loss": 66.9242, + "loss": 196520.7125, "step": 18570 }, { "epoch": 0.03753277552652949, - "grad_norm": 182.16502380371094, + "grad_norm": 23583.15234375, "learning_rate": 3.716e-06, - "loss": 35.0712, + "loss": 183722.875, "step": 18580 }, { "epoch": 0.037552976159213304, - "grad_norm": 104.04318237304688, + "grad_norm": 107406.8203125, "learning_rate": 3.7180000000000007e-06, - "loss": 35.1017, + "loss": 172641.6375, "step": 18590 }, { "epoch": 0.037573176791897124, - "grad_norm": 202.28363037109375, + "grad_norm": 57399.515625, "learning_rate": 3.7200000000000004e-06, - "loss": 50.3686, + "loss": 320759.45, "step": 18600 }, { "epoch": 0.03759337742458094, - "grad_norm": 307.9399108886719, + "grad_norm": 347817.21875, "learning_rate": 3.722e-06, - "loss": 21.9995, + "loss": 102098.175, "step": 18610 }, { "epoch": 0.03761357805726475, - "grad_norm": 183.11651611328125, + "grad_norm": 110261.1640625, "learning_rate": 3.7240000000000003e-06, - "loss": 30.2521, + "loss": 207996.725, "step": 18620 }, { "epoch": 0.03763377868994857, - "grad_norm": 281.4105224609375, + "grad_norm": 338747.21875, "learning_rate": 3.726e-06, - "loss": 40.7905, + "loss": 304552.55, "step": 18630 }, { "epoch": 0.03765397932263238, - "grad_norm": 99.5924072265625, + "grad_norm": 10586.4306640625, "learning_rate": 3.7280000000000006e-06, - "loss": 63.7335, + "loss": 400220.05, "step": 18640 }, { "epoch": 0.0376741799553162, - "grad_norm": 782.6028442382812, + "grad_norm": 1114078.25, "learning_rate": 3.7300000000000003e-06, - "loss": 50.5182, + "loss": 273563.2, "step": 18650 }, { "epoch": 0.037694380588000016, - "grad_norm": 302.54986572265625, + "grad_norm": 55422.25, "learning_rate": 3.732e-06, - "loss": 47.7087, + "loss": 270326.825, "step": 18660 }, { "epoch": 0.03771458122068383, - "grad_norm": 593.9015502929688, + "grad_norm": 146659.65625, "learning_rate": 3.7340000000000006e-06, - "loss": 44.0974, + "loss": 216095.25, "step": 18670 }, { "epoch": 0.03773478185336765, - "grad_norm": 272.08441162109375, + "grad_norm": 181912.0, "learning_rate": 3.7360000000000003e-06, - "loss": 32.0538, + "loss": 101014.4438, "step": 18680 }, { "epoch": 0.03775498248605146, - "grad_norm": 92.66593170166016, + "grad_norm": 46617.17578125, "learning_rate": 3.7380000000000005e-06, - "loss": 21.6147, + "loss": 108514.6625, "step": 18690 }, { "epoch": 0.037775183118735275, - "grad_norm": 264.2496337890625, + "grad_norm": 91531.3828125, "learning_rate": 3.74e-06, - "loss": 17.452, + "loss": 73279.7812, "step": 18700 }, { "epoch": 0.037795383751419095, - "grad_norm": 172.50340270996094, + "grad_norm": 2434.103759765625, "learning_rate": 3.742e-06, - "loss": 50.3505, + "loss": 132235.6125, "step": 18710 }, { "epoch": 0.03781558438410291, - "grad_norm": 380.6317138671875, + "grad_norm": 231971.359375, "learning_rate": 3.7440000000000005e-06, - "loss": 54.0348, + "loss": 377736.275, "step": 18720 }, { "epoch": 0.03783578501678673, - "grad_norm": 388.1368408203125, + "grad_norm": 450312.65625, "learning_rate": 3.7460000000000002e-06, - "loss": 27.777, + "loss": 179901.8125, "step": 18730 }, { "epoch": 0.03785598564947054, - "grad_norm": 367.97998046875, + "grad_norm": 112583.3671875, "learning_rate": 3.7480000000000004e-06, - "loss": 31.0645, + "loss": 153298.5375, "step": 18740 }, { "epoch": 0.037876186282154355, - "grad_norm": 290.3291320800781, + "grad_norm": 25019.587890625, "learning_rate": 3.7500000000000005e-06, - "loss": 45.9563, + "loss": 309602.275, "step": 18750 }, { "epoch": 0.037896386914838175, - "grad_norm": 260.09832763671875, + "grad_norm": 84728.4375, "learning_rate": 3.7520000000000002e-06, - "loss": 41.8088, + "loss": 175170.0625, "step": 18760 }, { "epoch": 0.03791658754752199, - "grad_norm": 217.5902557373047, + "grad_norm": 85300.515625, "learning_rate": 3.7540000000000004e-06, - "loss": 20.3382, + "loss": 63250.975, "step": 18770 }, { "epoch": 0.0379367881802058, - "grad_norm": 260.53369140625, + "grad_norm": 62271.4296875, "learning_rate": 3.756e-06, - "loss": 33.0869, + "loss": 201993.2125, "step": 18780 }, { "epoch": 0.03795698881288962, - "grad_norm": 604.4205932617188, + "grad_norm": 859880.375, "learning_rate": 3.7580000000000007e-06, - "loss": 30.6045, + "loss": 187892.875, "step": 18790 }, { "epoch": 0.037977189445573434, - "grad_norm": 228.3231658935547, + "grad_norm": 102939.828125, "learning_rate": 3.7600000000000004e-06, - "loss": 32.694, + "loss": 79795.175, "step": 18800 }, { "epoch": 0.037997390078257254, - "grad_norm": 378.7990417480469, + "grad_norm": 62562.31640625, "learning_rate": 3.762e-06, - "loss": 42.5527, + "loss": 180835.1875, "step": 18810 }, { "epoch": 0.03801759071094107, - "grad_norm": 115.02411651611328, + "grad_norm": 33557.7578125, "learning_rate": 3.7640000000000003e-06, - "loss": 44.9506, + "loss": 248129.075, "step": 18820 }, { "epoch": 0.03803779134362488, - "grad_norm": 80.56550598144531, + "grad_norm": 11874.439453125, "learning_rate": 3.766e-06, - "loss": 23.5099, + "loss": 105034.4625, "step": 18830 }, { "epoch": 0.0380579919763087, - "grad_norm": 436.3177185058594, + "grad_norm": 184295.359375, "learning_rate": 3.7680000000000006e-06, - "loss": 39.0631, + "loss": 259039.6, "step": 18840 }, { "epoch": 0.03807819260899251, - "grad_norm": 465.4303894042969, + "grad_norm": 637095.8125, "learning_rate": 3.7700000000000003e-06, - "loss": 32.9901, + "loss": 161637.9, "step": 18850 }, { "epoch": 0.038098393241676326, - "grad_norm": 42.8822135925293, + "grad_norm": 99550.8984375, "learning_rate": 3.772e-06, - "loss": 20.7256, + "loss": 97269.3375, "step": 18860 }, { "epoch": 0.038118593874360146, - "grad_norm": 255.4981231689453, + "grad_norm": 31999.787109375, "learning_rate": 3.7740000000000006e-06, - "loss": 40.881, + "loss": 342080.825, "step": 18870 }, { "epoch": 0.03813879450704396, - "grad_norm": 243.9823760986328, + "grad_norm": 61725.57421875, "learning_rate": 3.7760000000000004e-06, - "loss": 20.6702, + "loss": 104033.2125, "step": 18880 }, { "epoch": 0.03815899513972778, - "grad_norm": 254.540771484375, + "grad_norm": 38686.8046875, "learning_rate": 3.7780000000000005e-06, - "loss": 41.1104, + "loss": 231455.15, "step": 18890 }, { "epoch": 0.03817919577241159, - "grad_norm": 738.5131225585938, + "grad_norm": 769202.875, "learning_rate": 3.7800000000000002e-06, - "loss": 34.5111, + "loss": 206229.1375, "step": 18900 }, { "epoch": 0.038199396405095405, - "grad_norm": 392.9207763671875, + "grad_norm": 1078744.0, "learning_rate": 3.782e-06, - "loss": 46.206, + "loss": 390319.75, "step": 18910 }, { "epoch": 0.038219597037779225, - "grad_norm": 220.42340087890625, + "grad_norm": 102986.4609375, "learning_rate": 3.7840000000000005e-06, - "loss": 41.3556, + "loss": 288423.725, "step": 18920 }, { "epoch": 0.03823979767046304, - "grad_norm": 540.8204345703125, + "grad_norm": 546241.9375, "learning_rate": 3.7860000000000003e-06, - "loss": 41.6181, + "loss": 191428.0875, "step": 18930 }, { "epoch": 0.03825999830314685, - "grad_norm": 602.7286376953125, + "grad_norm": 204848.28125, "learning_rate": 3.7880000000000004e-06, - "loss": 28.7854, + "loss": 169333.95, "step": 18940 }, { "epoch": 0.03828019893583067, - "grad_norm": 204.54617309570312, + "grad_norm": 66294.3359375, "learning_rate": 3.79e-06, - "loss": 44.5053, + "loss": 136290.725, "step": 18950 }, { "epoch": 0.038300399568514484, - "grad_norm": 1068.0478515625, + "grad_norm": 1078942.25, "learning_rate": 3.7920000000000003e-06, - "loss": 48.5698, + "loss": 311180.125, "step": 18960 }, { "epoch": 0.038320600201198304, - "grad_norm": 254.20535278320312, + "grad_norm": 92192.515625, "learning_rate": 3.7940000000000004e-06, - "loss": 37.5327, + "loss": 187603.825, "step": 18970 }, { "epoch": 0.03834080083388212, - "grad_norm": 199.22528076171875, + "grad_norm": 47521.0625, "learning_rate": 3.796e-06, - "loss": 31.7081, + "loss": 213676.275, "step": 18980 }, { "epoch": 0.03836100146656593, - "grad_norm": 270.05950927734375, + "grad_norm": 132607.625, "learning_rate": 3.7980000000000007e-06, - "loss": 38.2194, + "loss": 261539.875, "step": 18990 }, { "epoch": 0.03838120209924975, - "grad_norm": 207.53485107421875, + "grad_norm": 48378.2890625, "learning_rate": 3.8000000000000005e-06, - "loss": 39.8086, + "loss": 248237.25, "step": 19000 }, { "epoch": 0.038401402731933564, - "grad_norm": 256.2655334472656, + "grad_norm": 7292.751953125, "learning_rate": 3.802e-06, - "loss": 10.8553, + "loss": 75635.425, "step": 19010 }, { "epoch": 0.03842160336461738, - "grad_norm": 258.2910461425781, + "grad_norm": 125793.2421875, "learning_rate": 3.8040000000000003e-06, - "loss": 45.2078, + "loss": 211227.25, "step": 19020 }, { "epoch": 0.0384418039973012, - "grad_norm": 1221.8621826171875, + "grad_norm": 1420750.875, "learning_rate": 3.806e-06, - "loss": 51.1951, + "loss": 328891.7, "step": 19030 }, { "epoch": 0.03846200462998501, - "grad_norm": 332.594482421875, + "grad_norm": 191265.171875, "learning_rate": 3.8080000000000006e-06, - "loss": 34.0059, + "loss": 145327.575, "step": 19040 }, { "epoch": 0.03848220526266883, - "grad_norm": 186.73208618164062, + "grad_norm": 281213.28125, "learning_rate": 3.8100000000000004e-06, - "loss": 25.5198, + "loss": 141200.05, "step": 19050 }, { "epoch": 0.03850240589535264, - "grad_norm": 1239.5872802734375, + "grad_norm": 1457341.375, "learning_rate": 3.812e-06, - "loss": 40.8371, + "loss": 289617.0, "step": 19060 }, { "epoch": 0.038522606528036456, - "grad_norm": 607.7129516601562, + "grad_norm": 605732.875, "learning_rate": 3.8140000000000007e-06, - "loss": 33.0845, + "loss": 222137.425, "step": 19070 }, { "epoch": 0.038542807160720276, - "grad_norm": 178.96774291992188, + "grad_norm": 9038.2626953125, "learning_rate": 3.816e-06, - "loss": 18.2914, + "loss": 86483.3625, "step": 19080 }, { "epoch": 0.03856300779340409, - "grad_norm": 282.399658203125, + "grad_norm": 150307.078125, "learning_rate": 3.818e-06, - "loss": 43.1347, + "loss": 268961.7, "step": 19090 }, { "epoch": 0.0385832084260879, - "grad_norm": 428.16314697265625, + "grad_norm": 18789.1015625, "learning_rate": 3.820000000000001e-06, - "loss": 41.8014, + "loss": 202387.975, "step": 19100 }, { "epoch": 0.03860340905877172, - "grad_norm": 726.791259765625, + "grad_norm": 1439396.125, "learning_rate": 3.822e-06, - "loss": 46.029, + "loss": 502295.5, "step": 19110 }, { "epoch": 0.038623609691455535, - "grad_norm": 297.4860534667969, + "grad_norm": 168544.296875, "learning_rate": 3.824e-06, - "loss": 30.1749, + "loss": 191063.675, "step": 19120 }, { "epoch": 0.038643810324139355, - "grad_norm": 485.90966796875, + "grad_norm": 236881.65625, "learning_rate": 3.826e-06, - "loss": 27.3783, + "loss": 165837.65, "step": 19130 }, { "epoch": 0.03866401095682317, - "grad_norm": 333.77703857421875, + "grad_norm": 10036.7568359375, "learning_rate": 3.8280000000000004e-06, - "loss": 79.9587, + "loss": 98306.7188, "step": 19140 }, { "epoch": 0.03868421158950698, - "grad_norm": 271.51959228515625, + "grad_norm": 72038.65625, "learning_rate": 3.830000000000001e-06, - "loss": 39.4978, + "loss": 253036.85, "step": 19150 }, { "epoch": 0.0387044122221908, - "grad_norm": 104.92560577392578, + "grad_norm": 33458.3125, "learning_rate": 3.832e-06, - "loss": 28.8335, + "loss": 219436.7, "step": 19160 }, { "epoch": 0.038724612854874614, - "grad_norm": 203.7222442626953, + "grad_norm": 222112.359375, "learning_rate": 3.834000000000001e-06, - "loss": 17.9835, + "loss": 116863.3375, "step": 19170 }, { "epoch": 0.03874481348755843, - "grad_norm": 149.12640380859375, + "grad_norm": 82014.3203125, "learning_rate": 3.836e-06, - "loss": 39.6809, + "loss": 228258.575, "step": 19180 }, { "epoch": 0.03876501412024225, - "grad_norm": 345.8726806640625, + "grad_norm": 260857.515625, "learning_rate": 3.838e-06, - "loss": 23.4372, + "loss": 109230.3875, "step": 19190 }, { "epoch": 0.03878521475292606, - "grad_norm": 335.72503662109375, + "grad_norm": 96829.1484375, "learning_rate": 3.8400000000000005e-06, - "loss": 48.739, + "loss": 216249.9, "step": 19200 }, { "epoch": 0.03880541538560988, - "grad_norm": 757.5699462890625, + "grad_norm": 692633.9375, "learning_rate": 3.842e-06, - "loss": 33.6431, + "loss": 175476.7125, "step": 19210 }, { "epoch": 0.03882561601829369, - "grad_norm": 196.40480041503906, + "grad_norm": 58387.45703125, "learning_rate": 3.844000000000001e-06, - "loss": 26.9213, + "loss": 105967.8625, "step": 19220 }, { "epoch": 0.038845816650977506, - "grad_norm": 252.12100219726562, + "grad_norm": 78041.5546875, "learning_rate": 3.846e-06, - "loss": 24.7606, + "loss": 175966.8875, "step": 19230 }, { "epoch": 0.038866017283661326, - "grad_norm": 334.61639404296875, + "grad_norm": 272301.28125, "learning_rate": 3.848e-06, - "loss": 45.8019, + "loss": 193201.6875, "step": 19240 }, { "epoch": 0.03888621791634514, - "grad_norm": 472.4421691894531, + "grad_norm": 373714.625, "learning_rate": 3.85e-06, - "loss": 55.9571, + "loss": 251211.2, "step": 19250 }, { "epoch": 0.03890641854902895, - "grad_norm": 0.0, + "grad_norm": 1686335.0, "learning_rate": 3.8520000000000006e-06, - "loss": 23.9849, + "loss": 273210.075, "step": 19260 }, { "epoch": 0.03892661918171277, - "grad_norm": 550.5303955078125, + "grad_norm": 8439.3388671875, "learning_rate": 3.854000000000001e-06, - "loss": 35.8352, + "loss": 107787.8125, "step": 19270 }, { "epoch": 0.038946819814396585, - "grad_norm": 248.30999755859375, + "grad_norm": 226764.921875, "learning_rate": 3.856e-06, - "loss": 35.0166, + "loss": 162412.4625, "step": 19280 }, { "epoch": 0.038967020447080405, - "grad_norm": 83.90570831298828, + "grad_norm": 8831.8857421875, "learning_rate": 3.858e-06, - "loss": 23.6561, + "loss": 68394.1625, "step": 19290 }, { "epoch": 0.03898722107976422, - "grad_norm": 414.4070739746094, + "grad_norm": 574353.0625, "learning_rate": 3.86e-06, - "loss": 52.0783, + "loss": 306317.325, "step": 19300 }, { "epoch": 0.03900742171244803, - "grad_norm": 292.39593505859375, + "grad_norm": 11183.0107421875, "learning_rate": 3.8620000000000005e-06, - "loss": 39.1817, + "loss": 162717.3125, "step": 19310 }, { "epoch": 0.03902762234513185, - "grad_norm": 139.72998046875, + "grad_norm": 21511.697265625, "learning_rate": 3.864000000000001e-06, - "loss": 30.7452, + "loss": 178350.0, "step": 19320 }, { "epoch": 0.039047822977815665, - "grad_norm": 310.08502197265625, + "grad_norm": 89469.734375, "learning_rate": 3.866e-06, - "loss": 37.7892, + "loss": 213802.575, "step": 19330 }, { "epoch": 0.03906802361049948, - "grad_norm": 251.57972717285156, + "grad_norm": 32198.44140625, "learning_rate": 3.868e-06, - "loss": 25.9636, + "loss": 98785.6375, "step": 19340 }, { "epoch": 0.0390882242431833, - "grad_norm": 76.23115539550781, + "grad_norm": 3419.81298828125, "learning_rate": 3.87e-06, - "loss": 35.7111, + "loss": 97282.9937, "step": 19350 }, { "epoch": 0.03910842487586711, - "grad_norm": 293.5985107421875, + "grad_norm": 79789.2578125, "learning_rate": 3.872e-06, - "loss": 36.1539, + "loss": 88111.7688, "step": 19360 }, { "epoch": 0.03912862550855093, - "grad_norm": 266.607421875, + "grad_norm": 112849.4296875, "learning_rate": 3.8740000000000005e-06, - "loss": 48.2347, + "loss": 286967.975, "step": 19370 }, { "epoch": 0.039148826141234744, - "grad_norm": 551.7935791015625, + "grad_norm": 140781.765625, "learning_rate": 3.876000000000001e-06, - "loss": 34.6203, + "loss": 95586.25, "step": 19380 }, { "epoch": 0.03916902677391856, - "grad_norm": 342.7557373046875, + "grad_norm": 104308.6484375, "learning_rate": 3.878e-06, - "loss": 36.8132, + "loss": 142873.525, "step": 19390 }, { "epoch": 0.03918922740660238, - "grad_norm": 201.66127014160156, + "grad_norm": 18383.400390625, "learning_rate": 3.88e-06, - "loss": 47.7345, + "loss": 216393.125, "step": 19400 }, { "epoch": 0.03920942803928619, - "grad_norm": 0.0, + "grad_norm": 1513990.125, "learning_rate": 3.882e-06, - "loss": 41.6927, + "loss": 324143.65, "step": 19410 }, { "epoch": 0.03922962867197, - "grad_norm": 162.31491088867188, + "grad_norm": 45034.98828125, "learning_rate": 3.884e-06, - "loss": 39.1061, + "loss": 78312.5188, "step": 19420 }, { "epoch": 0.03924982930465382, - "grad_norm": 111.3863754272461, + "grad_norm": 18670.669921875, "learning_rate": 3.8860000000000006e-06, - "loss": 27.7708, + "loss": 121271.6, "step": 19430 }, { "epoch": 0.039270029937337636, - "grad_norm": 634.6728515625, + "grad_norm": 702356.9375, "learning_rate": 3.888e-06, - "loss": 55.6152, + "loss": 281061.75, "step": 19440 }, { "epoch": 0.039290230570021456, - "grad_norm": 243.98507690429688, + "grad_norm": 48460.78125, "learning_rate": 3.89e-06, - "loss": 17.5636, + "loss": 47023.1094, "step": 19450 }, { "epoch": 0.03931043120270527, - "grad_norm": 144.79913330078125, + "grad_norm": 77540.453125, "learning_rate": 3.892e-06, - "loss": 29.8347, + "loss": 151009.1125, "step": 19460 }, { "epoch": 0.03933063183538908, - "grad_norm": 434.5455627441406, + "grad_norm": 43047.2734375, "learning_rate": 3.894e-06, - "loss": 38.3994, + "loss": 223671.575, "step": 19470 }, { "epoch": 0.0393508324680729, - "grad_norm": 176.39202880859375, + "grad_norm": 9265.78125, "learning_rate": 3.8960000000000005e-06, - "loss": 31.0559, + "loss": 149392.6625, "step": 19480 }, { "epoch": 0.039371033100756715, - "grad_norm": 381.365966796875, + "grad_norm": 167641.046875, "learning_rate": 3.898e-06, - "loss": 28.1513, + "loss": 101668.55, "step": 19490 }, { "epoch": 0.03939123373344053, - "grad_norm": 617.5662841796875, + "grad_norm": 871307.125, "learning_rate": 3.900000000000001e-06, - "loss": 56.9999, + "loss": 408131.15, "step": 19500 }, { "epoch": 0.03941143436612435, - "grad_norm": 226.3895263671875, + "grad_norm": 25376.48046875, "learning_rate": 3.902e-06, - "loss": 38.5456, + "loss": 201629.575, "step": 19510 }, { "epoch": 0.03943163499880816, "grad_norm": 0.0, "learning_rate": 3.904e-06, - "loss": 26.5102, + "loss": 109752.9625, "step": 19520 }, { "epoch": 0.03945183563149198, - "grad_norm": 605.03076171875, + "grad_norm": 720852.4375, "learning_rate": 3.906e-06, - "loss": 23.3018, + "loss": 200199.5375, "step": 19530 }, { "epoch": 0.039472036264175794, - "grad_norm": 428.1797180175781, + "grad_norm": 103227.0234375, "learning_rate": 3.9080000000000005e-06, - "loss": 38.2838, + "loss": 206215.75, "step": 19540 }, { "epoch": 0.03949223689685961, - "grad_norm": 484.2371520996094, + "grad_norm": 271479.84375, "learning_rate": 3.910000000000001e-06, - "loss": 36.6108, + "loss": 153826.0875, "step": 19550 }, { "epoch": 0.03951243752954343, - "grad_norm": 664.4325561523438, + "grad_norm": 51618.15625, "learning_rate": 3.912e-06, - "loss": 13.3125, + "loss": 56332.025, "step": 19560 }, { "epoch": 0.03953263816222724, - "grad_norm": 555.9738159179688, + "grad_norm": 545284.625, "learning_rate": 3.914000000000001e-06, - "loss": 35.6404, + "loss": 109067.2875, "step": 19570 }, { "epoch": 0.039552838794911054, - "grad_norm": 265.75592041015625, + "grad_norm": 18253.142578125, "learning_rate": 3.916e-06, - "loss": 55.2272, + "loss": 380469.6, "step": 19580 }, { "epoch": 0.039573039427594874, - "grad_norm": 237.5424041748047, + "grad_norm": 89557.4609375, "learning_rate": 3.9180000000000004e-06, - "loss": 24.2227, + "loss": 104297.8125, "step": 19590 }, { "epoch": 0.03959324006027869, - "grad_norm": 214.67730712890625, + "grad_norm": 139190.65625, "learning_rate": 3.920000000000001e-06, - "loss": 28.2488, + "loss": 147701.8, "step": 19600 }, { "epoch": 0.03961344069296251, - "grad_norm": 308.2847900390625, + "grad_norm": 160873.375, "learning_rate": 3.922e-06, - "loss": 68.5181, + "loss": 468485.1, "step": 19610 }, { "epoch": 0.03963364132564632, - "grad_norm": 445.2687072753906, + "grad_norm": 246451.5625, "learning_rate": 3.924000000000001e-06, - "loss": 31.5809, + "loss": 236759.6, "step": 19620 }, { "epoch": 0.03965384195833013, - "grad_norm": 302.645263671875, + "grad_norm": 155179.75, "learning_rate": 3.926e-06, - "loss": 28.3605, + "loss": 189072.8375, "step": 19630 }, { "epoch": 0.03967404259101395, - "grad_norm": 317.1869812011719, + "grad_norm": 4742.72119140625, "learning_rate": 3.928e-06, - "loss": 42.65, + "loss": 273608.5, "step": 19640 }, { "epoch": 0.039694243223697766, - "grad_norm": 402.7669982910156, + "grad_norm": 301253.625, "learning_rate": 3.9300000000000005e-06, - "loss": 40.776, + "loss": 215498.525, "step": 19650 }, { "epoch": 0.03971444385638158, - "grad_norm": 142.29428100585938, + "grad_norm": 5848.54052734375, "learning_rate": 3.932000000000001e-06, - "loss": 25.2051, + "loss": 93507.3, "step": 19660 }, { "epoch": 0.0397346444890654, - "grad_norm": 301.6438293457031, + "grad_norm": 317253.4375, "learning_rate": 3.934000000000001e-06, - "loss": 52.8674, + "loss": 299964.1, "step": 19670 }, { "epoch": 0.03975484512174921, - "grad_norm": 1422.6112060546875, + "grad_norm": 1862501.625, "learning_rate": 3.936e-06, - "loss": 40.2403, + "loss": 266082.75, "step": 19680 }, { "epoch": 0.03977504575443303, - "grad_norm": 905.2265014648438, + "grad_norm": 72186.8515625, "learning_rate": 3.938e-06, - "loss": 29.4386, + "loss": 166989.8875, "step": 19690 }, { "epoch": 0.039795246387116845, - "grad_norm": 179.0693359375, + "grad_norm": 74187.2109375, "learning_rate": 3.94e-06, - "loss": 31.673, + "loss": 149978.0, "step": 19700 }, { "epoch": 0.03981544701980066, - "grad_norm": 218.84153747558594, + "grad_norm": 47652.94921875, "learning_rate": 3.9420000000000005e-06, - "loss": 42.9457, + "loss": 247172.45, "step": 19710 }, { "epoch": 0.03983564765248448, - "grad_norm": 414.6861877441406, + "grad_norm": 150668.890625, "learning_rate": 3.944e-06, - "loss": 34.0905, + "loss": 154353.05, "step": 19720 }, { "epoch": 0.03985584828516829, - "grad_norm": 168.8384246826172, + "grad_norm": 141542.53125, "learning_rate": 3.946e-06, - "loss": 34.8271, + "loss": 260180.5, "step": 19730 }, { "epoch": 0.039876048917852104, - "grad_norm": 237.5606231689453, + "grad_norm": 85614.1171875, "learning_rate": 3.948e-06, - "loss": 44.4601, + "loss": 226323.975, "step": 19740 }, { "epoch": 0.039896249550535924, - "grad_norm": 650.702392578125, + "grad_norm": 621005.75, "learning_rate": 3.95e-06, - "loss": 27.0659, + "loss": 134725.6, "step": 19750 }, { "epoch": 0.03991645018321974, - "grad_norm": 201.0650177001953, + "grad_norm": 42525.16015625, "learning_rate": 3.9520000000000004e-06, - "loss": 27.1294, + "loss": 162153.1375, "step": 19760 }, { "epoch": 0.03993665081590356, - "grad_norm": 222.05848693847656, + "grad_norm": 111882.390625, "learning_rate": 3.954e-06, - "loss": 22.5971, + "loss": 82374.7375, "step": 19770 }, { "epoch": 0.03995685144858737, - "grad_norm": 172.0639190673828, + "grad_norm": 17125.775390625, "learning_rate": 3.956000000000001e-06, - "loss": 34.0177, + "loss": 258780.975, "step": 19780 }, { "epoch": 0.03997705208127118, - "grad_norm": 319.3443603515625, + "grad_norm": 89056.21875, "learning_rate": 3.958e-06, - "loss": 26.7054, + "loss": 130740.2125, "step": 19790 }, { "epoch": 0.039997252713955, - "grad_norm": 315.76092529296875, + "grad_norm": 3354.158447265625, "learning_rate": 3.96e-06, - "loss": 32.5478, + "loss": 181222.9125, "step": 19800 }, { "epoch": 0.040017453346638816, - "grad_norm": 337.47698974609375, + "grad_norm": 40820.6953125, "learning_rate": 3.962e-06, - "loss": 40.6681, + "loss": 278299.225, "step": 19810 }, { "epoch": 0.04003765397932263, - "grad_norm": 237.56832885742188, + "grad_norm": 15772.2255859375, "learning_rate": 3.964e-06, - "loss": 32.7305, + "loss": 177602.5375, "step": 19820 }, { "epoch": 0.04005785461200645, - "grad_norm": 257.64666748046875, + "grad_norm": 91924.4609375, "learning_rate": 3.966000000000001e-06, - "loss": 45.6443, + "loss": 320809.8, "step": 19830 }, { "epoch": 0.04007805524469026, - "grad_norm": 678.7630615234375, + "grad_norm": 822199.8125, "learning_rate": 3.968e-06, - "loss": 40.3314, + "loss": 253962.6, "step": 19840 }, { "epoch": 0.04009825587737408, - "grad_norm": 185.98243713378906, + "grad_norm": 23125.58984375, "learning_rate": 3.97e-06, - "loss": 39.6857, + "loss": 178664.5, "step": 19850 }, { "epoch": 0.040118456510057895, - "grad_norm": 437.4056396484375, + "grad_norm": 333476.90625, "learning_rate": 3.972e-06, - "loss": 58.7708, + "loss": 340268.025, "step": 19860 }, { "epoch": 0.04013865714274171, - "grad_norm": 17.874475479125977, + "grad_norm": 5532.1552734375, "learning_rate": 3.974e-06, - "loss": 13.7634, + "loss": 60662.525, "step": 19870 }, { "epoch": 0.04015885777542553, - "grad_norm": 218.99484252929688, + "grad_norm": 7888.1318359375, "learning_rate": 3.9760000000000006e-06, - "loss": 24.1919, + "loss": 83816.2437, "step": 19880 }, { "epoch": 0.04017905840810934, - "grad_norm": 493.0852355957031, + "grad_norm": 475145.5625, "learning_rate": 3.978e-06, - "loss": 23.345, + "loss": 103199.225, "step": 19890 }, { "epoch": 0.040199259040793155, - "grad_norm": 309.98028564453125, + "grad_norm": 179327.6875, "learning_rate": 3.980000000000001e-06, - "loss": 35.111, + "loss": 211183.475, "step": 19900 }, { "epoch": 0.040219459673476975, - "grad_norm": 321.9464416503906, + "grad_norm": 287849.78125, "learning_rate": 3.982e-06, - "loss": 54.7974, + "loss": 331515.55, "step": 19910 }, { "epoch": 0.04023966030616079, - "grad_norm": 233.89456176757812, + "grad_norm": 105664.59375, "learning_rate": 3.984e-06, - "loss": 23.0257, + "loss": 75956.15, "step": 19920 }, { "epoch": 0.04025986093884461, - "grad_norm": 183.26454162597656, + "grad_norm": 67113.953125, "learning_rate": 3.9860000000000005e-06, - "loss": 56.3536, + "loss": 348314.925, "step": 19930 }, { "epoch": 0.04028006157152842, - "grad_norm": 447.9790954589844, + "grad_norm": 32987.66796875, "learning_rate": 3.988000000000001e-06, - "loss": 26.1546, + "loss": 110760.25, "step": 19940 }, { "epoch": 0.040300262204212234, - "grad_norm": 263.4811706542969, + "grad_norm": 81114.8828125, "learning_rate": 3.990000000000001e-06, - "loss": 35.8895, + "loss": 143602.675, "step": 19950 }, { "epoch": 0.040320462836896054, - "grad_norm": 179.5793914794922, + "grad_norm": 55982.640625, "learning_rate": 3.992e-06, - "loss": 25.181, + "loss": 246443.675, "step": 19960 }, { "epoch": 0.04034066346957987, - "grad_norm": 2456.2373046875, + "grad_norm": 80549.7734375, "learning_rate": 3.994e-06, - "loss": 34.2655, + "loss": 178812.25, "step": 19970 }, { "epoch": 0.04036086410226368, - "grad_norm": 188.26966857910156, + "grad_norm": 30850.314453125, "learning_rate": 3.996e-06, - "loss": 41.0587, + "loss": 256951.35, "step": 19980 }, { "epoch": 0.0403810647349475, - "grad_norm": 215.05946350097656, + "grad_norm": 59132.3515625, "learning_rate": 3.9980000000000005e-06, - "loss": 24.28, + "loss": 90099.3813, "step": 19990 }, { "epoch": 0.04040126536763131, - "grad_norm": 189.59005737304688, + "grad_norm": 3285.249755859375, "learning_rate": 4.000000000000001e-06, - "loss": 26.4226, + "loss": 122881.525, "step": 20000 }, { "epoch": 0.04042146600031513, - "grad_norm": 307.7025146484375, + "grad_norm": 117129.4453125, "learning_rate": 4.002e-06, - "loss": 32.5957, + "loss": 206926.425, "step": 20010 }, { "epoch": 0.040441666632998946, - "grad_norm": 141.05995178222656, + "grad_norm": 14356.806640625, "learning_rate": 4.004e-06, - "loss": 23.7471, + "loss": 65280.4688, "step": 20020 }, { "epoch": 0.04046186726568276, - "grad_norm": 291.870849609375, + "grad_norm": 137708.703125, "learning_rate": 4.006e-06, - "loss": 31.9492, + "loss": 222991.525, "step": 20030 }, { "epoch": 0.04048206789836658, - "grad_norm": 417.44512939453125, + "grad_norm": 276571.125, "learning_rate": 4.008e-06, - "loss": 41.5067, + "loss": 272834.35, "step": 20040 }, { "epoch": 0.04050226853105039, - "grad_norm": 232.98028564453125, + "grad_norm": 80574.84375, "learning_rate": 4.0100000000000006e-06, - "loss": 38.7123, + "loss": 180759.5, "step": 20050 }, { "epoch": 0.040522469163734205, - "grad_norm": 147.53305053710938, + "grad_norm": 10358.234375, "learning_rate": 4.012000000000001e-06, - "loss": 54.9667, + "loss": 308925.35, "step": 20060 }, { "epoch": 0.040542669796418025, - "grad_norm": 233.66880798339844, + "grad_norm": 32575.38671875, "learning_rate": 4.014e-06, - "loss": 44.2462, + "loss": 243003.775, "step": 20070 }, { "epoch": 0.04056287042910184, - "grad_norm": 373.2441101074219, + "grad_norm": 154310.109375, "learning_rate": 4.016e-06, - "loss": 29.1594, + "loss": 152770.65, "step": 20080 }, { "epoch": 0.04058307106178566, - "grad_norm": 569.9129638671875, + "grad_norm": 384075.59375, "learning_rate": 4.018e-06, - "loss": 39.5939, + "loss": 144259.125, "step": 20090 }, { "epoch": 0.04060327169446947, - "grad_norm": 19.329669952392578, + "grad_norm": 1143.2174072265625, "learning_rate": 4.0200000000000005e-06, - "loss": 20.1258, + "loss": 79743.2437, "step": 20100 }, { "epoch": 0.040623472327153284, - "grad_norm": 180.48561096191406, + "grad_norm": 76022.2734375, "learning_rate": 4.022000000000001e-06, - "loss": 29.2828, + "loss": 180812.3875, "step": 20110 }, { "epoch": 0.040643672959837104, - "grad_norm": 1575.479736328125, + "grad_norm": 2579083.0, "learning_rate": 4.024e-06, - "loss": 37.484, + "loss": 467383.3, "step": 20120 }, { "epoch": 0.04066387359252092, - "grad_norm": 228.2759552001953, + "grad_norm": 8431.720703125, "learning_rate": 4.026e-06, - "loss": 37.5349, + "loss": 165509.0, "step": 20130 }, { "epoch": 0.04068407422520473, - "grad_norm": 428.0335998535156, + "grad_norm": 557933.625, "learning_rate": 4.028e-06, - "loss": 40.8519, + "loss": 202245.9625, "step": 20140 }, { "epoch": 0.04070427485788855, - "grad_norm": 82.67201232910156, + "grad_norm": 150924.234375, "learning_rate": 4.03e-06, - "loss": 34.8081, + "loss": 207329.525, "step": 20150 }, { "epoch": 0.040724475490572364, - "grad_norm": 812.8284912109375, + "grad_norm": 983321.75, "learning_rate": 4.0320000000000005e-06, - "loss": 52.8438, + "loss": 249571.65, "step": 20160 }, { "epoch": 0.040744676123256184, - "grad_norm": 102.69956970214844, + "grad_norm": 44310.73828125, "learning_rate": 4.034e-06, - "loss": 34.6039, + "loss": 83702.1562, "step": 20170 }, { "epoch": 0.04076487675594, - "grad_norm": 264.369873046875, + "grad_norm": 7575.91845703125, "learning_rate": 4.036000000000001e-06, - "loss": 44.1913, + "loss": 199224.2875, "step": 20180 }, { "epoch": 0.04078507738862381, - "grad_norm": 375.61328125, + "grad_norm": 143514.375, "learning_rate": 4.038e-06, - "loss": 37.198, + "loss": 191411.2625, "step": 20190 }, { "epoch": 0.04080527802130763, - "grad_norm": 197.2830352783203, + "grad_norm": 7397.8076171875, "learning_rate": 4.04e-06, - "loss": 36.5702, + "loss": 245189.45, "step": 20200 }, { "epoch": 0.04082547865399144, - "grad_norm": 387.1900329589844, + "grad_norm": 423143.75, "learning_rate": 4.0420000000000004e-06, - "loss": 34.2572, + "loss": 163166.5625, "step": 20210 }, { "epoch": 0.040845679286675256, - "grad_norm": 841.2315673828125, + "grad_norm": 69764.5390625, "learning_rate": 4.044e-06, - "loss": 41.9452, + "loss": 184948.8375, "step": 20220 }, { "epoch": 0.040865879919359076, - "grad_norm": 112.06974792480469, + "grad_norm": 7327.2998046875, "learning_rate": 4.046000000000001e-06, - "loss": 16.1985, + "loss": 64712.6562, "step": 20230 }, { "epoch": 0.04088608055204289, - "grad_norm": 448.9200134277344, + "grad_norm": 8757.2607421875, "learning_rate": 4.048e-06, - "loss": 27.3424, + "loss": 127447.7375, "step": 20240 }, { "epoch": 0.04090628118472671, - "grad_norm": 741.1703491210938, + "grad_norm": 841845.6875, "learning_rate": 4.05e-06, - "loss": 38.0227, + "loss": 181338.775, "step": 20250 }, { "epoch": 0.04092648181741052, - "grad_norm": 327.67950439453125, + "grad_norm": 133831.890625, "learning_rate": 4.052e-06, - "loss": 48.0524, + "loss": 176170.95, "step": 20260 }, { "epoch": 0.040946682450094335, - "grad_norm": 608.6464233398438, + "grad_norm": 71780.4609375, "learning_rate": 4.0540000000000005e-06, - "loss": 28.6052, + "loss": 207922.8625, "step": 20270 }, { "epoch": 0.040966883082778155, - "grad_norm": 540.9326171875, + "grad_norm": 608580.0625, "learning_rate": 4.056000000000001e-06, - "loss": 46.8294, + "loss": 435174.1, "step": 20280 }, { "epoch": 0.04098708371546197, - "grad_norm": 273.62530517578125, + "grad_norm": 151008.078125, "learning_rate": 4.058e-06, - "loss": 40.8854, + "loss": 330534.0, "step": 20290 }, { "epoch": 0.04100728434814578, - "grad_norm": 326.904052734375, + "grad_norm": 88091.4609375, "learning_rate": 4.060000000000001e-06, - "loss": 29.1135, + "loss": 112368.675, "step": 20300 }, { "epoch": 0.0410274849808296, - "grad_norm": 458.88018798828125, + "grad_norm": 179622.78125, "learning_rate": 4.062e-06, - "loss": 41.2102, + "loss": 179323.575, "step": 20310 }, { "epoch": 0.041047685613513414, - "grad_norm": 667.4222412109375, + "grad_norm": 423327.4375, "learning_rate": 4.064e-06, - "loss": 35.2916, + "loss": 190677.8625, "step": 20320 }, { "epoch": 0.041067886246197234, - "grad_norm": 315.7451477050781, + "grad_norm": 96111.875, "learning_rate": 4.0660000000000005e-06, - "loss": 37.1972, + "loss": 372900.45, "step": 20330 }, { "epoch": 0.04108808687888105, - "grad_norm": 428.32080078125, + "grad_norm": 375305.875, "learning_rate": 4.068000000000001e-06, - "loss": 40.0218, + "loss": 167257.55, "step": 20340 }, { "epoch": 0.04110828751156486, - "grad_norm": 124.36223602294922, + "grad_norm": 32469.22265625, "learning_rate": 4.07e-06, - "loss": 40.3568, + "loss": 109155.45, "step": 20350 }, { "epoch": 0.04112848814424868, - "grad_norm": 251.20816040039062, + "grad_norm": 29323.6875, "learning_rate": 4.072e-06, - "loss": 29.3377, + "loss": 88895.1, "step": 20360 }, { "epoch": 0.04114868877693249, - "grad_norm": 212.65249633789062, + "grad_norm": 7510.166015625, "learning_rate": 4.074e-06, - "loss": 45.3342, + "loss": 321586.325, "step": 20370 }, { "epoch": 0.041168889409616306, - "grad_norm": 693.0851440429688, + "grad_norm": 320432.78125, "learning_rate": 4.0760000000000004e-06, - "loss": 58.4595, + "loss": 305091.85, "step": 20380 }, { "epoch": 0.041189090042300126, - "grad_norm": 163.91427612304688, + "grad_norm": 69812.046875, "learning_rate": 4.078000000000001e-06, - "loss": 23.256, + "loss": 100092.525, "step": 20390 }, { "epoch": 0.04120929067498394, - "grad_norm": 285.9828186035156, + "grad_norm": 56367.15234375, "learning_rate": 4.08e-06, - "loss": 34.4963, + "loss": 246176.85, "step": 20400 }, { "epoch": 0.04122949130766776, - "grad_norm": 500.408935546875, + "grad_norm": 373121.65625, "learning_rate": 4.082e-06, - "loss": 35.7673, + "loss": 177296.0, "step": 20410 }, { "epoch": 0.04124969194035157, - "grad_norm": 388.328125, + "grad_norm": 80121.8125, "learning_rate": 4.084e-06, - "loss": 25.7379, + "loss": 71314.0813, "step": 20420 }, { "epoch": 0.041269892573035385, - "grad_norm": 348.4654541015625, + "grad_norm": 66675.7265625, "learning_rate": 4.086e-06, - "loss": 45.9121, + "loss": 305385.65, "step": 20430 }, { "epoch": 0.041290093205719205, - "grad_norm": 545.4322509765625, + "grad_norm": 709703.125, "learning_rate": 4.0880000000000005e-06, - "loss": 42.0894, + "loss": 230637.6, "step": 20440 }, { "epoch": 0.04131029383840302, - "grad_norm": 122.1491470336914, + "grad_norm": 22322.912109375, "learning_rate": 4.09e-06, - "loss": 22.5061, + "loss": 171084.0125, "step": 20450 }, { "epoch": 0.04133049447108683, - "grad_norm": 311.8683776855469, + "grad_norm": 136229.375, "learning_rate": 4.092000000000001e-06, - "loss": 25.4118, + "loss": 101380.7, "step": 20460 }, { "epoch": 0.04135069510377065, - "grad_norm": 106.4710464477539, + "grad_norm": 6904.98291015625, "learning_rate": 4.094e-06, - "loss": 33.3134, + "loss": 155460.075, "step": 20470 }, { "epoch": 0.041370895736454465, - "grad_norm": 696.3756103515625, + "grad_norm": 453368.8125, "learning_rate": 4.096e-06, - "loss": 41.182, + "loss": 185694.5, "step": 20480 }, { "epoch": 0.041391096369138285, - "grad_norm": 259.6597900390625, + "grad_norm": 198072.140625, "learning_rate": 4.098e-06, - "loss": 50.8422, + "loss": 275025.425, "step": 20490 }, { "epoch": 0.0414112970018221, - "grad_norm": 339.96966552734375, + "grad_norm": 89308.2265625, "learning_rate": 4.1e-06, - "loss": 63.607, + "loss": 390838.05, "step": 20500 }, { "epoch": 0.04143149763450591, - "grad_norm": 174.30322265625, + "grad_norm": 5546.2890625, "learning_rate": 4.102000000000001e-06, - "loss": 17.6622, + "loss": 47977.1469, "step": 20510 }, { "epoch": 0.04145169826718973, - "grad_norm": 403.02362060546875, + "grad_norm": 132826.5625, "learning_rate": 4.104e-06, - "loss": 31.6906, + "loss": 127121.875, "step": 20520 }, { "epoch": 0.041471898899873544, - "grad_norm": 305.7235412597656, + "grad_norm": 89892.1796875, "learning_rate": 4.106e-06, - "loss": 33.6001, + "loss": 155624.775, "step": 20530 }, { "epoch": 0.04149209953255736, - "grad_norm": 162.15066528320312, + "grad_norm": 47810.0, "learning_rate": 4.108e-06, - "loss": 28.2985, + "loss": 149075.3375, "step": 20540 }, { "epoch": 0.04151230016524118, - "grad_norm": 868.5485229492188, + "grad_norm": 847439.375, "learning_rate": 4.1100000000000005e-06, - "loss": 34.2084, + "loss": 220855.9, "step": 20550 }, { "epoch": 0.04153250079792499, - "grad_norm": 251.6429443359375, + "grad_norm": 154607.53125, "learning_rate": 4.112000000000001e-06, - "loss": 48.9287, + "loss": 157530.825, "step": 20560 }, { "epoch": 0.04155270143060881, - "grad_norm": 243.0496063232422, + "grad_norm": 172940.515625, "learning_rate": 4.114e-06, - "loss": 24.6031, + "loss": 92906.5625, "step": 20570 }, { "epoch": 0.04157290206329262, - "grad_norm": 292.49139404296875, + "grad_norm": 12026.310546875, "learning_rate": 4.116000000000001e-06, - "loss": 22.1322, + "loss": 102247.4438, "step": 20580 }, { "epoch": 0.041593102695976436, - "grad_norm": 453.16912841796875, + "grad_norm": 524584.5, "learning_rate": 4.118e-06, - "loss": 39.9018, + "loss": 216553.175, "step": 20590 }, { "epoch": 0.041613303328660256, - "grad_norm": 231.3798065185547, + "grad_norm": 231378.46875, "learning_rate": 4.12e-06, - "loss": 46.0094, + "loss": 318254.0, "step": 20600 }, { "epoch": 0.04163350396134407, - "grad_norm": 300.7149963378906, + "grad_norm": 88775.296875, "learning_rate": 4.1220000000000005e-06, - "loss": 26.9751, + "loss": 120581.35, "step": 20610 }, { "epoch": 0.04165370459402788, - "grad_norm": 321.439453125, + "grad_norm": 451333.0, "learning_rate": 4.124e-06, - "loss": 33.3333, + "loss": 243791.2, "step": 20620 }, { "epoch": 0.0416739052267117, - "grad_norm": 167.4238739013672, + "grad_norm": 87063.9375, "learning_rate": 4.126000000000001e-06, - "loss": 30.8674, + "loss": 57914.9812, "step": 20630 }, { "epoch": 0.041694105859395515, - "grad_norm": 167.2349395751953, + "grad_norm": 4976.40966796875, "learning_rate": 4.128e-06, - "loss": 43.9423, + "loss": 248096.5, "step": 20640 }, { "epoch": 0.041714306492079335, - "grad_norm": 277.2059020996094, + "grad_norm": 60433.828125, "learning_rate": 4.13e-06, - "loss": 30.5669, + "loss": 129218.05, "step": 20650 }, { "epoch": 0.04173450712476315, - "grad_norm": 130.9712677001953, + "grad_norm": 58145.07421875, "learning_rate": 4.132e-06, - "loss": 19.1888, + "loss": 89261.1187, "step": 20660 }, { "epoch": 0.04175470775744696, - "grad_norm": 289.81243896484375, + "grad_norm": 325393.90625, "learning_rate": 4.1340000000000006e-06, - "loss": 22.6288, + "loss": 125159.325, "step": 20670 }, { "epoch": 0.04177490839013078, - "grad_norm": 627.8983764648438, + "grad_norm": 112861.0, "learning_rate": 4.136000000000001e-06, - "loss": 30.9941, + "loss": 192226.1875, "step": 20680 }, { "epoch": 0.041795109022814594, - "grad_norm": 271.6875305175781, + "grad_norm": 137502.984375, "learning_rate": 4.138e-06, - "loss": 29.0029, + "loss": 191833.9, "step": 20690 }, { "epoch": 0.04181530965549841, - "grad_norm": 438.3844909667969, + "grad_norm": 171883.375, "learning_rate": 4.14e-06, - "loss": 37.4831, + "loss": 94605.4375, "step": 20700 }, { "epoch": 0.04183551028818223, - "grad_norm": 429.591796875, + "grad_norm": 261273.84375, "learning_rate": 4.142e-06, - "loss": 26.2826, + "loss": 119576.9875, "step": 20710 }, { "epoch": 0.04185571092086604, - "grad_norm": 241.58526611328125, + "grad_norm": 73343.0625, "learning_rate": 4.1440000000000005e-06, - "loss": 29.2798, + "loss": 260947.7, "step": 20720 }, { "epoch": 0.04187591155354986, - "grad_norm": 260.7231140136719, + "grad_norm": 92255.9296875, "learning_rate": 4.146000000000001e-06, - "loss": 24.107, + "loss": 223314.1, "step": 20730 }, { "epoch": 0.041896112186233674, - "grad_norm": 878.5065307617188, + "grad_norm": 288809.0625, "learning_rate": 4.148000000000001e-06, - "loss": 60.2322, + "loss": 209185.55, "step": 20740 }, { "epoch": 0.04191631281891749, - "grad_norm": 421.5115051269531, + "grad_norm": 442399.78125, "learning_rate": 4.15e-06, - "loss": 44.0793, + "loss": 242999.7, "step": 20750 }, { "epoch": 0.04193651345160131, - "grad_norm": 294.0861511230469, + "grad_norm": 23783.009765625, "learning_rate": 4.152e-06, - "loss": 40.8335, + "loss": 177507.4875, "step": 20760 }, { "epoch": 0.04195671408428512, - "grad_norm": 168.69158935546875, + "grad_norm": 108893.4296875, "learning_rate": 4.154e-06, - "loss": 35.5561, + "loss": 290279.825, "step": 20770 }, { "epoch": 0.04197691471696893, - "grad_norm": 96.60639953613281, + "grad_norm": 11033.2626953125, "learning_rate": 4.1560000000000005e-06, - "loss": 28.9656, + "loss": 35536.375, "step": 20780 }, { "epoch": 0.04199711534965275, - "grad_norm": 311.1852722167969, + "grad_norm": 128958.71875, "learning_rate": 4.158000000000001e-06, - "loss": 35.748, + "loss": 224576.55, "step": 20790 }, { "epoch": 0.042017315982336566, - "grad_norm": 188.48680114746094, + "grad_norm": 10747.2119140625, "learning_rate": 4.16e-06, - "loss": 34.3928, + "loss": 208133.2625, "step": 20800 }, { "epoch": 0.042037516615020386, - "grad_norm": 283.5986328125, + "grad_norm": 314111.3125, "learning_rate": 4.162e-06, - "loss": 40.0365, + "loss": 279436.15, "step": 20810 }, { "epoch": 0.0420577172477042, - "grad_norm": 244.67515563964844, + "grad_norm": 22928.982421875, "learning_rate": 4.164e-06, - "loss": 21.4508, + "loss": 61757.7188, "step": 20820 }, { "epoch": 0.04207791788038801, - "grad_norm": 229.06175231933594, + "grad_norm": 8061.4248046875, "learning_rate": 4.1660000000000004e-06, - "loss": 44.8772, + "loss": 228880.425, "step": 20830 }, { "epoch": 0.04209811851307183, - "grad_norm": 272.0743713378906, + "grad_norm": 11680.5546875, "learning_rate": 4.168000000000001e-06, - "loss": 23.9264, + "loss": 155006.225, "step": 20840 }, { "epoch": 0.042118319145755645, - "grad_norm": 368.3594665527344, + "grad_norm": 456339.1875, "learning_rate": 4.17e-06, - "loss": 38.5595, + "loss": 225132.125, "step": 20850 }, { "epoch": 0.04213851977843946, - "grad_norm": 233.93699645996094, + "grad_norm": 58191.62109375, "learning_rate": 4.172000000000001e-06, - "loss": 33.6996, + "loss": 190154.775, "step": 20860 }, { "epoch": 0.04215872041112328, - "grad_norm": 296.39434814453125, + "grad_norm": 27807.228515625, "learning_rate": 4.174e-06, - "loss": 26.6863, + "loss": 127401.65, "step": 20870 }, { "epoch": 0.04217892104380709, - "grad_norm": 456.282470703125, + "grad_norm": 188062.25, "learning_rate": 4.176e-06, - "loss": 49.657, + "loss": 301297.375, "step": 20880 }, { "epoch": 0.04219912167649091, - "grad_norm": 402.9183044433594, + "grad_norm": 511455.625, "learning_rate": 4.1780000000000005e-06, - "loss": 44.8408, + "loss": 209639.6, "step": 20890 }, { "epoch": 0.042219322309174724, - "grad_norm": 324.6549072265625, + "grad_norm": 33258.09375, "learning_rate": 4.18e-06, - "loss": 27.7556, + "loss": 100192.3625, "step": 20900 }, { "epoch": 0.04223952294185854, - "grad_norm": 376.4916687011719, + "grad_norm": 47021.76171875, "learning_rate": 4.182000000000001e-06, - "loss": 32.6511, + "loss": 140592.1375, "step": 20910 }, { "epoch": 0.04225972357454236, - "grad_norm": 205.3048095703125, + "grad_norm": 117558.5546875, "learning_rate": 4.184e-06, - "loss": 30.0404, + "loss": 205921.7375, "step": 20920 }, { "epoch": 0.04227992420722617, - "grad_norm": 162.9635467529297, + "grad_norm": 12186.7666015625, "learning_rate": 4.186e-06, - "loss": 33.686, + "loss": 211891.4, "step": 20930 }, { "epoch": 0.04230012483990998, - "grad_norm": 80.64946746826172, + "grad_norm": 1115.6932373046875, "learning_rate": 4.188e-06, - "loss": 15.7464, + "loss": 46311.2094, "step": 20940 }, { "epoch": 0.0423203254725938, - "grad_norm": 138.9059600830078, + "grad_norm": 32692.306640625, "learning_rate": 4.1900000000000005e-06, - "loss": 18.966, + "loss": 192540.2625, "step": 20950 }, { "epoch": 0.042340526105277616, - "grad_norm": 290.88262939453125, + "grad_norm": 118151.671875, "learning_rate": 4.192000000000001e-06, - "loss": 34.8412, + "loss": 145606.6125, "step": 20960 }, { "epoch": 0.042360726737961436, - "grad_norm": 630.651123046875, + "grad_norm": 610280.375, "learning_rate": 4.194e-06, - "loss": 24.099, + "loss": 112734.925, "step": 20970 }, { "epoch": 0.04238092737064525, - "grad_norm": 153.69415283203125, + "grad_norm": 30258.138671875, "learning_rate": 4.196e-06, - "loss": 50.662, + "loss": 206370.1125, "step": 20980 }, { "epoch": 0.04240112800332906, - "grad_norm": 249.28179931640625, + "grad_norm": 121608.2578125, "learning_rate": 4.198e-06, - "loss": 37.3011, + "loss": 237031.5, "step": 20990 }, { "epoch": 0.04242132863601288, - "grad_norm": 586.7985229492188, + "grad_norm": 546106.3125, "learning_rate": 4.2000000000000004e-06, - "loss": 36.2117, + "loss": 216086.8, "step": 21000 }, { "epoch": 0.042441529268696696, - "grad_norm": 380.9582824707031, + "grad_norm": 121044.1171875, "learning_rate": 4.202000000000001e-06, - "loss": 29.2529, + "loss": 102309.8062, "step": 21010 }, { "epoch": 0.04246172990138051, - "grad_norm": 85.7302474975586, + "grad_norm": 6741.36376953125, "learning_rate": 4.204e-06, - "loss": 40.0706, + "loss": 203461.4375, "step": 21020 }, { "epoch": 0.04248193053406433, - "grad_norm": 385.5627136230469, + "grad_norm": 165230.1875, "learning_rate": 4.206e-06, - "loss": 35.7786, + "loss": 100224.325, "step": 21030 }, { "epoch": 0.04250213116674814, - "grad_norm": 458.6239929199219, + "grad_norm": 143435.21875, "learning_rate": 4.208e-06, - "loss": 35.8708, + "loss": 167776.1375, "step": 21040 }, { "epoch": 0.04252233179943196, - "grad_norm": 304.5676574707031, + "grad_norm": 135983.03125, "learning_rate": 4.21e-06, - "loss": 36.7744, + "loss": 222309.25, "step": 21050 }, { "epoch": 0.042542532432115775, - "grad_norm": 145.11141967773438, + "grad_norm": 24099.2578125, "learning_rate": 4.2120000000000005e-06, - "loss": 28.0551, + "loss": 144219.0625, "step": 21060 }, { "epoch": 0.04256273306479959, - "grad_norm": 402.4499206542969, + "grad_norm": 158372.71875, "learning_rate": 4.214000000000001e-06, - "loss": 28.9746, + "loss": 110535.0875, "step": 21070 }, { "epoch": 0.04258293369748341, - "grad_norm": 191.5008087158203, + "grad_norm": 29983.37890625, "learning_rate": 4.216e-06, - "loss": 37.7505, + "loss": 140535.85, "step": 21080 }, { "epoch": 0.04260313433016722, - "grad_norm": 68.45462799072266, + "grad_norm": 12339.326171875, "learning_rate": 4.218e-06, - "loss": 30.1864, + "loss": 121892.5625, "step": 21090 }, { "epoch": 0.042623334962851034, - "grad_norm": 105.98591613769531, + "grad_norm": 25251.90625, "learning_rate": 4.22e-06, - "loss": 18.1124, + "loss": 135178.825, "step": 21100 }, { "epoch": 0.042643535595534854, - "grad_norm": 761.5296020507812, + "grad_norm": 1005490.125, "learning_rate": 4.222e-06, - "loss": 27.7599, + "loss": 168391.15, "step": 21110 }, { "epoch": 0.04266373622821867, - "grad_norm": 469.4954833984375, + "grad_norm": 204320.234375, "learning_rate": 4.2240000000000006e-06, - "loss": 32.8871, + "loss": 145653.275, "step": 21120 }, { "epoch": 0.04268393686090249, - "grad_norm": 398.43231201171875, + "grad_norm": 91259.59375, "learning_rate": 4.226e-06, - "loss": 21.9109, + "loss": 98689.0625, "step": 21130 }, { "epoch": 0.0427041374935863, - "grad_norm": 367.3377685546875, + "grad_norm": 174490.625, "learning_rate": 4.228000000000001e-06, - "loss": 27.4366, + "loss": 125381.95, "step": 21140 }, { "epoch": 0.04272433812627011, - "grad_norm": 390.81591796875, + "grad_norm": 132021.125, "learning_rate": 4.23e-06, - "loss": 35.5517, + "loss": 201597.5, "step": 21150 }, { "epoch": 0.04274453875895393, - "grad_norm": 303.1268310546875, + "grad_norm": 269794.84375, "learning_rate": 4.232e-06, - "loss": 32.5997, + "loss": 254045.7, "step": 21160 }, { "epoch": 0.042764739391637746, - "grad_norm": 332.6908264160156, + "grad_norm": 355913.5625, "learning_rate": 4.2340000000000005e-06, - "loss": 31.0615, + "loss": 168312.6, "step": 21170 }, { "epoch": 0.04278494002432156, - "grad_norm": 357.15362548828125, + "grad_norm": 65445.26953125, "learning_rate": 4.236e-06, - "loss": 27.4738, + "loss": 62055.0125, "step": 21180 }, { "epoch": 0.04280514065700538, - "grad_norm": 206.97450256347656, + "grad_norm": 37030.11328125, "learning_rate": 4.238000000000001e-06, - "loss": 20.7976, + "loss": 68170.7812, "step": 21190 }, { "epoch": 0.04282534128968919, - "grad_norm": 279.89178466796875, + "grad_norm": 91860.859375, "learning_rate": 4.24e-06, - "loss": 43.8803, + "loss": 311185.1, "step": 21200 }, { "epoch": 0.04284554192237301, - "grad_norm": 87.10535430908203, + "grad_norm": 333007.9375, "learning_rate": 4.242e-06, - "loss": 30.7336, + "loss": 275369.4, "step": 21210 }, { "epoch": 0.042865742555056825, - "grad_norm": 318.570556640625, + "grad_norm": 84978.1953125, "learning_rate": 4.244e-06, - "loss": 61.2978, + "loss": 317367.75, "step": 21220 }, { "epoch": 0.04288594318774064, - "grad_norm": 500.3152160644531, + "grad_norm": 461653.6875, "learning_rate": 4.2460000000000005e-06, - "loss": 50.3606, + "loss": 361267.1, "step": 21230 }, { "epoch": 0.04290614382042446, - "grad_norm": 409.52978515625, + "grad_norm": 122309.5234375, "learning_rate": 4.248000000000001e-06, - "loss": 40.0934, + "loss": 213029.25, "step": 21240 }, { "epoch": 0.04292634445310827, - "grad_norm": 196.73036193847656, + "grad_norm": 32014.68359375, "learning_rate": 4.25e-06, - "loss": 30.2571, + "loss": 173303.0875, "step": 21250 }, { "epoch": 0.042946545085792084, - "grad_norm": 550.4962158203125, + "grad_norm": 249233.421875, "learning_rate": 4.252000000000001e-06, - "loss": 30.2084, + "loss": 99330.5625, "step": 21260 }, { "epoch": 0.042966745718475904, - "grad_norm": 99.06867218017578, + "grad_norm": 5850.85205078125, "learning_rate": 4.254e-06, - "loss": 39.2887, + "loss": 237859.85, "step": 21270 }, { "epoch": 0.04298694635115972, - "grad_norm": 682.33154296875, + "grad_norm": 388238.9375, "learning_rate": 4.256e-06, - "loss": 40.1178, + "loss": 116477.8125, "step": 21280 }, { "epoch": 0.04300714698384353, - "grad_norm": 335.2230224609375, + "grad_norm": 116017.6015625, "learning_rate": 4.2580000000000006e-06, - "loss": 40.0422, + "loss": 333378.15, "step": 21290 }, { "epoch": 0.04302734761652735, - "grad_norm": 356.4090576171875, + "grad_norm": 163121.84375, "learning_rate": 4.26e-06, - "loss": 31.7668, + "loss": 181210.8125, "step": 21300 }, { "epoch": 0.043047548249211164, - "grad_norm": 387.0377502441406, + "grad_norm": 124069.3671875, "learning_rate": 4.262000000000001e-06, - "loss": 36.7466, + "loss": 245501.6, "step": 21310 }, { "epoch": 0.043067748881894984, - "grad_norm": 118.68424224853516, + "grad_norm": 6785.57275390625, "learning_rate": 4.264e-06, - "loss": 39.0243, + "loss": 176446.0, "step": 21320 }, { "epoch": 0.0430879495145788, - "grad_norm": 229.70225524902344, + "grad_norm": 171183.921875, "learning_rate": 4.266e-06, - "loss": 26.9583, + "loss": 152685.9875, "step": 21330 }, { "epoch": 0.04310815014726261, - "grad_norm": 334.6313781738281, + "grad_norm": 6319.408203125, "learning_rate": 4.2680000000000005e-06, - "loss": 37.4017, + "loss": 220136.875, "step": 21340 }, { "epoch": 0.04312835077994643, - "grad_norm": 145.9667510986328, + "grad_norm": 10843.7509765625, "learning_rate": 4.270000000000001e-06, - "loss": 24.935, + "loss": 100709.35, "step": 21350 }, { "epoch": 0.04314855141263024, - "grad_norm": 218.7997283935547, + "grad_norm": 80432.5078125, "learning_rate": 4.272000000000001e-06, - "loss": 27.2796, + "loss": 82761.1187, "step": 21360 }, { "epoch": 0.043168752045314056, - "grad_norm": 209.17771911621094, + "grad_norm": 46250.59375, "learning_rate": 4.274e-06, - "loss": 34.5368, + "loss": 167177.3875, "step": 21370 }, { "epoch": 0.043188952677997876, - "grad_norm": 258.9840393066406, + "grad_norm": 9839.619140625, "learning_rate": 4.276e-06, - "loss": 29.2008, + "loss": 124389.5125, "step": 21380 }, { "epoch": 0.04320915331068169, - "grad_norm": 283.368408203125, + "grad_norm": 457368.0625, "learning_rate": 4.278e-06, - "loss": 52.7408, + "loss": 349019.825, "step": 21390 }, { "epoch": 0.04322935394336551, - "grad_norm": 309.7923583984375, + "grad_norm": 121816.6640625, "learning_rate": 4.2800000000000005e-06, - "loss": 27.4384, + "loss": 117340.8, "step": 21400 }, { "epoch": 0.04324955457604932, - "grad_norm": 275.92950439453125, + "grad_norm": 333285.90625, "learning_rate": 4.282000000000001e-06, - "loss": 35.0403, + "loss": 157099.425, "step": 21410 }, { "epoch": 0.043269755208733135, - "grad_norm": 69.03273010253906, + "grad_norm": 6896.1220703125, "learning_rate": 4.284e-06, - "loss": 45.1349, + "loss": 226818.25, "step": 21420 }, { "epoch": 0.043289955841416955, - "grad_norm": 198.91543579101562, + "grad_norm": 12212.171875, "learning_rate": 4.286e-06, - "loss": 43.4459, + "loss": 265353.0, "step": 21430 }, { "epoch": 0.04331015647410077, - "grad_norm": 220.49310302734375, + "grad_norm": 162066.015625, "learning_rate": 4.288e-06, - "loss": 28.2382, + "loss": 164990.525, "step": 21440 }, { "epoch": 0.04333035710678458, - "grad_norm": 447.1377868652344, + "grad_norm": 61208.41015625, "learning_rate": 4.2900000000000004e-06, - "loss": 28.5718, + "loss": 97644.5437, "step": 21450 }, { "epoch": 0.0433505577394684, - "grad_norm": 493.10675048828125, + "grad_norm": 335652.78125, "learning_rate": 4.292000000000001e-06, - "loss": 43.5559, + "loss": 356103.65, "step": 21460 }, { "epoch": 0.043370758372152214, - "grad_norm": 463.8993225097656, + "grad_norm": 286237.875, "learning_rate": 4.294000000000001e-06, - "loss": 30.0089, + "loss": 134400.9, "step": 21470 }, { "epoch": 0.043390959004836034, - "grad_norm": 132.20335388183594, + "grad_norm": 65817.1796875, "learning_rate": 4.296e-06, - "loss": 36.3238, + "loss": 190906.0375, "step": 21480 }, { "epoch": 0.04341115963751985, - "grad_norm": 255.9840850830078, + "grad_norm": 81050.796875, "learning_rate": 4.298e-06, - "loss": 49.801, + "loss": 306841.875, "step": 21490 }, { "epoch": 0.04343136027020366, - "grad_norm": 36.15985870361328, + "grad_norm": 87952.6328125, "learning_rate": 4.3e-06, - "loss": 43.9862, + "loss": 312908.55, "step": 21500 }, { "epoch": 0.04345156090288748, - "grad_norm": 467.6071472167969, + "grad_norm": 128196.9921875, "learning_rate": 4.3020000000000005e-06, - "loss": 24.1195, + "loss": 101537.4688, "step": 21510 }, { "epoch": 0.04347176153557129, - "grad_norm": 1086.1910400390625, + "grad_norm": 1388941.5, "learning_rate": 4.304000000000001e-06, - "loss": 81.2888, + "loss": 622814.55, "step": 21520 }, { "epoch": 0.043491962168255106, - "grad_norm": 307.3807067871094, + "grad_norm": 72728.109375, "learning_rate": 4.306e-06, - "loss": 30.767, + "loss": 135993.8375, "step": 21530 }, { "epoch": 0.043512162800938926, - "grad_norm": 321.68975830078125, + "grad_norm": 61806.86328125, "learning_rate": 4.308000000000001e-06, - "loss": 42.3517, + "loss": 269072.875, "step": 21540 }, { "epoch": 0.04353236343362274, - "grad_norm": 282.8983459472656, + "grad_norm": 25730.072265625, "learning_rate": 4.31e-06, - "loss": 30.696, + "loss": 153953.2625, "step": 21550 }, { "epoch": 0.04355256406630656, - "grad_norm": 210.27423095703125, + "grad_norm": 37087.26953125, "learning_rate": 4.312e-06, - "loss": 18.0205, + "loss": 77559.6187, "step": 21560 }, { "epoch": 0.04357276469899037, - "grad_norm": 36.78266906738281, + "grad_norm": 87978.6875, "learning_rate": 4.3140000000000005e-06, - "loss": 32.5776, + "loss": 188781.7375, "step": 21570 }, { "epoch": 0.043592965331674186, - "grad_norm": 416.0316162109375, + "grad_norm": 192064.9375, "learning_rate": 4.316e-06, - "loss": 42.8172, + "loss": 204829.1, "step": 21580 }, { "epoch": 0.043613165964358006, - "grad_norm": 762.2164916992188, + "grad_norm": 653275.0625, "learning_rate": 4.318000000000001e-06, - "loss": 33.9217, + "loss": 225568.25, "step": 21590 }, { "epoch": 0.04363336659704182, - "grad_norm": 359.6988525390625, + "grad_norm": 391062.03125, "learning_rate": 4.32e-06, - "loss": 32.7206, + "loss": 198186.1625, "step": 21600 }, { "epoch": 0.04365356722972563, - "grad_norm": 395.9289855957031, + "grad_norm": 259952.703125, "learning_rate": 4.322e-06, - "loss": 35.3105, + "loss": 285046.575, "step": 21610 }, { "epoch": 0.04367376786240945, - "grad_norm": 169.65855407714844, + "grad_norm": 42848.24609375, "learning_rate": 4.3240000000000004e-06, - "loss": 53.9002, + "loss": 279096.95, "step": 21620 }, { "epoch": 0.043693968495093265, - "grad_norm": 235.93019104003906, + "grad_norm": 83974.984375, "learning_rate": 4.326000000000001e-06, - "loss": 44.7782, + "loss": 379263.0, "step": 21630 }, { "epoch": 0.043714169127777085, - "grad_norm": 285.8537902832031, + "grad_norm": 251933.75, "learning_rate": 4.328000000000001e-06, - "loss": 34.2965, + "loss": 167646.425, "step": 21640 }, { "epoch": 0.0437343697604609, - "grad_norm": 101.22665405273438, + "grad_norm": 12439.4072265625, "learning_rate": 4.33e-06, - "loss": 40.3858, + "loss": 232307.15, "step": 21650 }, { "epoch": 0.04375457039314471, - "grad_norm": 259.66943359375, + "grad_norm": 7514.208984375, "learning_rate": 4.332e-06, - "loss": 42.1844, + "loss": 245631.85, "step": 21660 }, { "epoch": 0.04377477102582853, "grad_norm": 0.0, "learning_rate": 4.334e-06, - "loss": 22.2301, + "loss": 141283.4125, "step": 21670 }, { "epoch": 0.043794971658512344, - "grad_norm": 1392.5623779296875, + "grad_norm": 1242608.5, "learning_rate": 4.3360000000000005e-06, - "loss": 44.3904, + "loss": 207149.2, "step": 21680 }, { "epoch": 0.04381517229119616, - "grad_norm": 336.18157958984375, + "grad_norm": 94926.21875, "learning_rate": 4.338000000000001e-06, - "loss": 39.7264, + "loss": 361653.875, "step": 21690 }, { "epoch": 0.04383537292387998, - "grad_norm": 374.5557861328125, + "grad_norm": 77670.234375, "learning_rate": 4.34e-06, - "loss": 49.7355, + "loss": 314235.675, "step": 21700 }, { "epoch": 0.04385557355656379, - "grad_norm": 280.235595703125, + "grad_norm": 105866.0, "learning_rate": 4.342e-06, - "loss": 36.2126, + "loss": 189378.15, "step": 21710 }, { "epoch": 0.04387577418924761, - "grad_norm": 228.18426513671875, + "grad_norm": 89760.15625, "learning_rate": 4.344e-06, - "loss": 27.9623, + "loss": 128675.125, "step": 21720 }, { "epoch": 0.04389597482193142, - "grad_norm": 472.9103698730469, + "grad_norm": 662868.6875, "learning_rate": 4.346e-06, - "loss": 47.5958, + "loss": 316271.1, "step": 21730 }, { "epoch": 0.043916175454615236, - "grad_norm": 307.89508056640625, + "grad_norm": 79410.8671875, "learning_rate": 4.3480000000000006e-06, - "loss": 41.9201, + "loss": 278385.375, "step": 21740 }, { "epoch": 0.043936376087299056, - "grad_norm": 601.6486206054688, + "grad_norm": 580056.3125, "learning_rate": 4.350000000000001e-06, - "loss": 47.6524, + "loss": 219053.8, "step": 21750 }, { "epoch": 0.04395657671998287, - "grad_norm": 507.53900146484375, + "grad_norm": 718199.4375, "learning_rate": 4.352e-06, - "loss": 36.4503, + "loss": 192170.325, "step": 21760 }, { "epoch": 0.04397677735266668, - "grad_norm": 192.6024932861328, + "grad_norm": 33811.4375, "learning_rate": 4.354e-06, - "loss": 36.7982, + "loss": 218253.95, "step": 21770 }, { "epoch": 0.0439969779853505, - "grad_norm": 127.53495788574219, + "grad_norm": 7985.701171875, "learning_rate": 4.356e-06, - "loss": 36.497, + "loss": 125563.55, "step": 21780 }, { "epoch": 0.044017178618034315, - "grad_norm": 300.4747009277344, + "grad_norm": 93240.8203125, "learning_rate": 4.3580000000000005e-06, - "loss": 32.1423, + "loss": 178930.675, "step": 21790 }, { "epoch": 0.044037379250718135, - "grad_norm": 238.71231079101562, + "grad_norm": 90367.3828125, "learning_rate": 4.360000000000001e-06, - "loss": 38.0991, + "loss": 210335.4, "step": 21800 }, { "epoch": 0.04405757988340195, - "grad_norm": 84.91683959960938, + "grad_norm": 2482.411865234375, "learning_rate": 4.362e-06, - "loss": 22.3992, + "loss": 64180.9375, "step": 21810 }, { "epoch": 0.04407778051608576, - "grad_norm": 438.54864501953125, + "grad_norm": 229102.765625, "learning_rate": 4.364e-06, - "loss": 29.2043, + "loss": 124417.05, "step": 21820 }, { "epoch": 0.04409798114876958, - "grad_norm": 780.3211059570312, + "grad_norm": 1100651.25, "learning_rate": 4.366e-06, - "loss": 36.352, + "loss": 302131.0, "step": 21830 }, { "epoch": 0.044118181781453394, - "grad_norm": 217.75856018066406, + "grad_norm": 73687.734375, "learning_rate": 4.368e-06, - "loss": 24.0482, + "loss": 96149.2688, "step": 21840 }, { "epoch": 0.04413838241413721, - "grad_norm": 439.7067565917969, + "grad_norm": 382268.1875, "learning_rate": 4.3700000000000005e-06, - "loss": 37.1481, + "loss": 220028.025, "step": 21850 }, { "epoch": 0.04415858304682103, - "grad_norm": 309.642578125, + "grad_norm": 88663.015625, "learning_rate": 4.372e-06, - "loss": 26.6976, + "loss": 90815.3687, "step": 21860 }, { "epoch": 0.04417878367950484, - "grad_norm": 169.34609985351562, + "grad_norm": 40016.44140625, "learning_rate": 4.374000000000001e-06, - "loss": 25.6863, + "loss": 174733.375, "step": 21870 }, { "epoch": 0.04419898431218866, - "grad_norm": 357.8691101074219, + "grad_norm": 233768.203125, "learning_rate": 4.376e-06, - "loss": 50.9932, + "loss": 417042.95, "step": 21880 }, { "epoch": 0.044219184944872474, - "grad_norm": 397.551513671875, + "grad_norm": 490588.625, "learning_rate": 4.378e-06, - "loss": 23.5169, + "loss": 140733.4125, "step": 21890 }, { "epoch": 0.04423938557755629, - "grad_norm": 453.8232727050781, + "grad_norm": 163535.96875, "learning_rate": 4.38e-06, - "loss": 24.9619, + "loss": 67760.6875, "step": 21900 }, { "epoch": 0.04425958621024011, - "grad_norm": 287.2008972167969, + "grad_norm": 138783.046875, "learning_rate": 4.382e-06, - "loss": 18.2778, + "loss": 56831.075, "step": 21910 }, { "epoch": 0.04427978684292392, - "grad_norm": 167.90672302246094, + "grad_norm": 32106.146484375, "learning_rate": 4.384000000000001e-06, - "loss": 27.0911, + "loss": 144475.0625, "step": 21920 }, { "epoch": 0.04429998747560773, - "grad_norm": 222.32225036621094, + "grad_norm": 78901.71875, "learning_rate": 4.386e-06, - "loss": 23.165, + "loss": 57274.4375, "step": 21930 }, { "epoch": 0.04432018810829155, - "grad_norm": 32.362789154052734, + "grad_norm": 10151.48828125, "learning_rate": 4.388e-06, - "loss": 20.8344, + "loss": 103449.1812, "step": 21940 }, { "epoch": 0.044340388740975366, - "grad_norm": 1063.19140625, + "grad_norm": 870837.875, "learning_rate": 4.39e-06, - "loss": 51.5889, + "loss": 218999.9, "step": 21950 }, { "epoch": 0.044360589373659186, - "grad_norm": 151.3734130859375, + "grad_norm": 5903.890625, "learning_rate": 4.3920000000000005e-06, - "loss": 16.7795, + "loss": 71105.375, "step": 21960 }, { "epoch": 0.044380790006343, - "grad_norm": 587.815673828125, + "grad_norm": 557564.3125, "learning_rate": 4.394000000000001e-06, - "loss": 67.0913, + "loss": 337785.075, "step": 21970 }, { "epoch": 0.04440099063902681, - "grad_norm": 45.843868255615234, + "grad_norm": 9936.697265625, "learning_rate": 4.396e-06, - "loss": 60.2924, + "loss": 308378.2, "step": 21980 }, { "epoch": 0.04442119127171063, - "grad_norm": 256.3442687988281, + "grad_norm": 155534.296875, "learning_rate": 4.398000000000001e-06, - "loss": 43.1731, + "loss": 390012.775, "step": 21990 }, { "epoch": 0.044441391904394445, - "grad_norm": 69.2979965209961, + "grad_norm": 9794.0341796875, "learning_rate": 4.4e-06, - "loss": 22.1585, + "loss": 66919.8938, "step": 22000 }, { "epoch": 0.04446159253707826, - "grad_norm": 413.2718200683594, + "grad_norm": 536924.875, "learning_rate": 4.402e-06, - "loss": 38.8711, + "loss": 258347.0, "step": 22010 }, { "epoch": 0.04448179316976208, - "grad_norm": 1238.522216796875, + "grad_norm": 1437984.625, "learning_rate": 4.4040000000000005e-06, - "loss": 42.8863, + "loss": 410829.725, "step": 22020 }, { "epoch": 0.04450199380244589, - "grad_norm": 415.684326171875, + "grad_norm": 11451.146484375, "learning_rate": 4.406000000000001e-06, - "loss": 41.3031, + "loss": 247048.4, "step": 22030 }, { "epoch": 0.04452219443512971, - "grad_norm": 616.5980834960938, + "grad_norm": 348007.0625, "learning_rate": 4.408000000000001e-06, - "loss": 28.795, + "loss": 127371.8875, "step": 22040 }, { "epoch": 0.044542395067813524, - "grad_norm": 567.5768432617188, + "grad_norm": 1345988.375, "learning_rate": 4.41e-06, - "loss": 43.7818, + "loss": 355132.8, "step": 22050 }, { "epoch": 0.04456259570049734, - "grad_norm": 226.39715576171875, + "grad_norm": 62232.125, "learning_rate": 4.412e-06, - "loss": 31.4243, + "loss": 229707.675, "step": 22060 }, { "epoch": 0.04458279633318116, - "grad_norm": 277.405517578125, + "grad_norm": 139452.953125, "learning_rate": 4.4140000000000004e-06, - "loss": 43.7672, + "loss": 222070.325, "step": 22070 }, { "epoch": 0.04460299696586497, - "grad_norm": 228.32418823242188, + "grad_norm": 218985.390625, "learning_rate": 4.416000000000001e-06, - "loss": 32.853, + "loss": 155525.0, "step": 22080 }, { "epoch": 0.04462319759854878, - "grad_norm": 294.9041442871094, + "grad_norm": 38900.65625, "learning_rate": 4.418000000000001e-06, - "loss": 34.7952, + "loss": 209101.125, "step": 22090 }, { "epoch": 0.0446433982312326, - "grad_norm": 178.13169860839844, + "grad_norm": 341184.8125, "learning_rate": 4.42e-06, - "loss": 23.8155, + "loss": 165061.85, "step": 22100 }, { "epoch": 0.044663598863916416, - "grad_norm": 281.69317626953125, + "grad_norm": 88758.078125, "learning_rate": 4.422e-06, - "loss": 29.139, + "loss": 135616.25, "step": 22110 }, { "epoch": 0.044683799496600236, - "grad_norm": 252.3081817626953, + "grad_norm": 11743.875, "learning_rate": 4.424e-06, - "loss": 18.0647, + "loss": 78324.4312, "step": 22120 }, { "epoch": 0.04470400012928405, - "grad_norm": 67.35968017578125, + "grad_norm": 90495.765625, "learning_rate": 4.4260000000000005e-06, - "loss": 19.8382, + "loss": 109121.6375, "step": 22130 }, { "epoch": 0.04472420076196786, - "grad_norm": 135.77525329589844, + "grad_norm": 2062.639892578125, "learning_rate": 4.428000000000001e-06, - "loss": 32.8386, + "loss": 146206.3875, "step": 22140 }, { "epoch": 0.04474440139465168, - "grad_norm": 611.4405517578125, + "grad_norm": 500358.8125, "learning_rate": 4.430000000000001e-06, - "loss": 34.8868, + "loss": 194516.5, "step": 22150 }, { "epoch": 0.044764602027335496, - "grad_norm": 271.38336181640625, + "grad_norm": 117024.9296875, "learning_rate": 4.432e-06, - "loss": 36.5795, + "loss": 155849.7125, "step": 22160 }, { "epoch": 0.04478480266001931, - "grad_norm": 321.98797607421875, + "grad_norm": 29690.154296875, "learning_rate": 4.434e-06, - "loss": 28.9508, + "loss": 161167.75, "step": 22170 }, { "epoch": 0.04480500329270313, - "grad_norm": 532.3291625976562, + "grad_norm": 677993.9375, "learning_rate": 4.436e-06, - "loss": 25.2873, + "loss": 174761.9375, "step": 22180 }, { "epoch": 0.04482520392538694, - "grad_norm": 192.2565460205078, + "grad_norm": 65728.578125, "learning_rate": 4.438e-06, - "loss": 22.3823, + "loss": 117086.3375, "step": 22190 }, { "epoch": 0.04484540455807076, - "grad_norm": 393.37213134765625, + "grad_norm": 345945.53125, "learning_rate": 4.440000000000001e-06, - "loss": 67.2099, + "loss": 351028.1, "step": 22200 }, { "epoch": 0.044865605190754575, - "grad_norm": 0.0, + "grad_norm": 1328482.875, "learning_rate": 4.442e-06, - "loss": 41.9024, + "loss": 404236.3, "step": 22210 }, { "epoch": 0.04488580582343839, - "grad_norm": 355.0119934082031, + "grad_norm": 447462.25, "learning_rate": 4.444e-06, - "loss": 24.5312, + "loss": 206722.0125, "step": 22220 }, { "epoch": 0.04490600645612221, - "grad_norm": 1475.906982421875, + "grad_norm": 1596166.5, "learning_rate": 4.446e-06, - "loss": 41.4057, + "loss": 287232.275, "step": 22230 }, { "epoch": 0.04492620708880602, - "grad_norm": 277.4278869628906, + "grad_norm": 99188.90625, "learning_rate": 4.4480000000000004e-06, - "loss": 25.8214, + "loss": 129704.05, "step": 22240 }, { "epoch": 0.044946407721489834, - "grad_norm": 106.08101654052734, + "grad_norm": 10164.697265625, "learning_rate": 4.450000000000001e-06, - "loss": 15.1555, + "loss": 40478.4906, "step": 22250 }, { "epoch": 0.044966608354173654, - "grad_norm": 300.8702697753906, + "grad_norm": 99212.6796875, "learning_rate": 4.452e-06, - "loss": 17.3687, + "loss": 171169.9625, "step": 22260 }, { "epoch": 0.04498680898685747, - "grad_norm": 495.77703857421875, + "grad_norm": 137844.734375, "learning_rate": 4.454000000000001e-06, - "loss": 36.3726, + "loss": 132813.65, "step": 22270 }, { "epoch": 0.04500700961954129, - "grad_norm": 225.75399780273438, + "grad_norm": 41528.8203125, "learning_rate": 4.456e-06, - "loss": 49.7217, + "loss": 277462.325, "step": 22280 }, { "epoch": 0.0450272102522251, - "grad_norm": 921.94677734375, + "grad_norm": 1836535.75, "learning_rate": 4.458e-06, - "loss": 58.6378, + "loss": 574689.35, "step": 22290 }, { "epoch": 0.04504741088490891, - "grad_norm": 230.3359375, + "grad_norm": 35331.19140625, "learning_rate": 4.4600000000000005e-06, - "loss": 28.7533, + "loss": 172445.1125, "step": 22300 }, { "epoch": 0.04506761151759273, - "grad_norm": 188.73988342285156, + "grad_norm": 10455.73828125, "learning_rate": 4.462e-06, - "loss": 24.0754, + "loss": 123354.3625, "step": 22310 }, { "epoch": 0.045087812150276546, - "grad_norm": 361.24432373046875, + "grad_norm": 230753.34375, "learning_rate": 4.464000000000001e-06, - "loss": 47.7305, + "loss": 283932.3, "step": 22320 }, { "epoch": 0.04510801278296036, - "grad_norm": 221.80642700195312, + "grad_norm": 178843.203125, "learning_rate": 4.466e-06, - "loss": 33.0072, + "loss": 227817.0, "step": 22330 }, { "epoch": 0.04512821341564418, - "grad_norm": 256.0701599121094, + "grad_norm": 109040.8828125, "learning_rate": 4.468e-06, - "loss": 28.1957, + "loss": 144554.875, "step": 22340 }, { "epoch": 0.04514841404832799, - "grad_norm": 254.6149444580078, + "grad_norm": 96351.8828125, "learning_rate": 4.47e-06, - "loss": 27.2295, + "loss": 234532.0, "step": 22350 }, { "epoch": 0.04516861468101181, - "grad_norm": 353.35107421875, + "grad_norm": 306953.53125, "learning_rate": 4.4720000000000006e-06, - "loss": 26.6498, + "loss": 125460.9, "step": 22360 }, { "epoch": 0.045188815313695625, - "grad_norm": 288.8414611816406, + "grad_norm": 38948.4453125, "learning_rate": 4.474000000000001e-06, - "loss": 26.8834, + "loss": 135323.1375, "step": 22370 }, { "epoch": 0.04520901594637944, - "grad_norm": 479.2511291503906, + "grad_norm": 113675.2734375, "learning_rate": 4.476e-06, - "loss": 27.7844, + "loss": 289233.45, "step": 22380 }, { "epoch": 0.04522921657906326, - "grad_norm": 160.38461303710938, + "grad_norm": 1605.0184326171875, "learning_rate": 4.478e-06, - "loss": 50.0185, + "loss": 325297.225, "step": 22390 }, { "epoch": 0.04524941721174707, - "grad_norm": 309.38104248046875, + "grad_norm": 53380.13671875, "learning_rate": 4.48e-06, - "loss": 59.0626, + "loss": 367135.85, "step": 22400 }, { "epoch": 0.045269617844430884, - "grad_norm": 464.31689453125, + "grad_norm": 159282.125, "learning_rate": 4.4820000000000005e-06, - "loss": 34.7741, + "loss": 256580.95, "step": 22410 }, { "epoch": 0.045289818477114704, - "grad_norm": 312.4227294921875, + "grad_norm": 217002.84375, "learning_rate": 4.484000000000001e-06, - "loss": 35.0636, + "loss": 135524.7375, "step": 22420 }, { "epoch": 0.04531001910979852, - "grad_norm": 598.5370483398438, + "grad_norm": 13347.0673828125, "learning_rate": 4.486000000000001e-06, - "loss": 42.6343, + "loss": 217820.9, "step": 22430 }, { "epoch": 0.04533021974248234, - "grad_norm": 370.20147705078125, + "grad_norm": 113705.875, "learning_rate": 4.488e-06, - "loss": 44.871, + "loss": 209579.825, "step": 22440 }, { "epoch": 0.04535042037516615, - "grad_norm": 195.80914306640625, + "grad_norm": 60030.08203125, "learning_rate": 4.49e-06, - "loss": 31.4365, + "loss": 201331.125, "step": 22450 }, { "epoch": 0.045370621007849964, - "grad_norm": 362.7513427734375, + "grad_norm": 106788.75, "learning_rate": 4.492e-06, - "loss": 28.8031, + "loss": 177811.5875, "step": 22460 }, { "epoch": 0.045390821640533784, - "grad_norm": 921.4364624023438, + "grad_norm": 1237642.25, "learning_rate": 4.4940000000000005e-06, - "loss": 39.8159, + "loss": 239500.95, "step": 22470 }, { "epoch": 0.0454110222732176, - "grad_norm": 613.3515014648438, + "grad_norm": 748160.8125, "learning_rate": 4.496000000000001e-06, - "loss": 56.5131, + "loss": 472685.65, "step": 22480 }, { "epoch": 0.04543122290590141, - "grad_norm": 354.8691101074219, + "grad_norm": 121813.2734375, "learning_rate": 4.498e-06, - "loss": 32.4083, + "loss": 139285.85, "step": 22490 }, { "epoch": 0.04545142353858523, - "grad_norm": 226.96910095214844, + "grad_norm": 44167.72265625, "learning_rate": 4.5e-06, - "loss": 28.8302, + "loss": 179424.4125, "step": 22500 }, { "epoch": 0.04547162417126904, - "grad_norm": 466.5164489746094, + "grad_norm": 646647.0, "learning_rate": 4.502e-06, - "loss": 32.9258, + "loss": 168671.725, "step": 22510 }, { "epoch": 0.04549182480395286, - "grad_norm": 357.9403381347656, + "grad_norm": 188229.578125, "learning_rate": 4.504e-06, - "loss": 44.7259, + "loss": 300065.95, "step": 22520 }, { "epoch": 0.045512025436636676, - "grad_norm": 212.605712890625, + "grad_norm": 6820.8798828125, "learning_rate": 4.5060000000000006e-06, - "loss": 40.4262, + "loss": 246785.375, "step": 22530 }, { "epoch": 0.04553222606932049, - "grad_norm": 564.6653442382812, + "grad_norm": 831516.875, "learning_rate": 4.508e-06, - "loss": 40.581, + "loss": 220031.05, "step": 22540 }, { "epoch": 0.04555242670200431, - "grad_norm": 306.528564453125, + "grad_norm": 18118.431640625, "learning_rate": 4.510000000000001e-06, - "loss": 25.7713, + "loss": 124386.95, "step": 22550 }, { "epoch": 0.04557262733468812, - "grad_norm": 234.44297790527344, + "grad_norm": 5847.3603515625, "learning_rate": 4.512e-06, - "loss": 29.1724, + "loss": 302869.75, "step": 22560 }, { "epoch": 0.045592827967371935, - "grad_norm": 160.4866485595703, + "grad_norm": 13499.7412109375, "learning_rate": 4.514e-06, - "loss": 62.8937, + "loss": 281759.8, "step": 22570 }, { "epoch": 0.045613028600055755, - "grad_norm": 1656.3267822265625, + "grad_norm": 164023.765625, "learning_rate": 4.5160000000000005e-06, - "loss": 41.0898, + "loss": 117237.075, "step": 22580 }, { "epoch": 0.04563322923273957, - "grad_norm": 350.4644775390625, + "grad_norm": 258751.71875, "learning_rate": 4.518e-06, - "loss": 33.7665, + "loss": 176513.475, "step": 22590 }, { "epoch": 0.04565342986542339, - "grad_norm": 100.78395080566406, + "grad_norm": 18916.5078125, "learning_rate": 4.520000000000001e-06, - "loss": 17.6624, + "loss": 52117.5312, "step": 22600 }, { "epoch": 0.0456736304981072, - "grad_norm": 213.51217651367188, + "grad_norm": 22671.84765625, "learning_rate": 4.522e-06, - "loss": 29.5539, + "loss": 153797.575, "step": 22610 }, { "epoch": 0.045693831130791014, - "grad_norm": 164.36734008789062, + "grad_norm": 33070.8046875, "learning_rate": 4.524e-06, - "loss": 39.8798, + "loss": 166721.6, "step": 22620 }, { "epoch": 0.045714031763474834, - "grad_norm": 450.3773193359375, + "grad_norm": 187165.15625, "learning_rate": 4.526e-06, - "loss": 39.0242, + "loss": 163069.325, "step": 22630 }, { "epoch": 0.04573423239615865, - "grad_norm": 311.81683349609375, + "grad_norm": 48526.640625, "learning_rate": 4.5280000000000005e-06, - "loss": 35.9892, + "loss": 131340.8125, "step": 22640 }, { "epoch": 0.04575443302884246, - "grad_norm": 315.1264343261719, + "grad_norm": 106301.84375, "learning_rate": 4.530000000000001e-06, - "loss": 26.1883, + "loss": 82293.4875, "step": 22650 }, { "epoch": 0.04577463366152628, - "grad_norm": 1015.3447265625, + "grad_norm": 1349613.25, "learning_rate": 4.532e-06, - "loss": 19.782, + "loss": 178017.475, "step": 22660 }, { "epoch": 0.04579483429421009, - "grad_norm": 146.39332580566406, + "grad_norm": 6500.0419921875, "learning_rate": 4.534000000000001e-06, - "loss": 26.6612, + "loss": 158913.575, "step": 22670 }, { "epoch": 0.04581503492689391, - "grad_norm": 575.60693359375, + "grad_norm": 785806.8125, "learning_rate": 4.536e-06, - "loss": 39.4156, + "loss": 230519.1, "step": 22680 }, { "epoch": 0.045835235559577726, - "grad_norm": 135.11111450195312, + "grad_norm": 11484.5166015625, "learning_rate": 4.5380000000000004e-06, - "loss": 42.7714, + "loss": 229247.9, "step": 22690 }, { "epoch": 0.04585543619226154, - "grad_norm": 62.020042419433594, + "grad_norm": 4767.66650390625, "learning_rate": 4.540000000000001e-06, - "loss": 41.5205, + "loss": 437236.15, "step": 22700 }, { "epoch": 0.04587563682494536, - "grad_norm": 418.8486633300781, + "grad_norm": 404515.0, "learning_rate": 4.542e-06, - "loss": 42.9501, + "loss": 268137.375, "step": 22710 }, { "epoch": 0.04589583745762917, - "grad_norm": 317.9814147949219, + "grad_norm": 49699.6953125, "learning_rate": 4.544000000000001e-06, - "loss": 50.0577, + "loss": 265155.15, "step": 22720 }, { "epoch": 0.045916038090312986, - "grad_norm": 311.05535888671875, + "grad_norm": 443849.34375, "learning_rate": 4.546e-06, - "loss": 36.0328, + "loss": 281695.55, "step": 22730 }, { "epoch": 0.045936238722996806, - "grad_norm": 309.5693664550781, + "grad_norm": 1833.2943115234375, "learning_rate": 4.548e-06, - "loss": 17.6359, + "loss": 72122.6125, "step": 22740 }, { "epoch": 0.04595643935568062, - "grad_norm": 269.7965393066406, + "grad_norm": 21096.556640625, "learning_rate": 4.5500000000000005e-06, - "loss": 29.5682, + "loss": 135700.9125, "step": 22750 }, { "epoch": 0.04597663998836444, - "grad_norm": 201.2880859375, + "grad_norm": 9609.837890625, "learning_rate": 4.552000000000001e-06, - "loss": 37.9817, + "loss": 245123.425, "step": 22760 }, { "epoch": 0.04599684062104825, - "grad_norm": 97.00022888183594, + "grad_norm": 16329.822265625, "learning_rate": 4.554000000000001e-06, - "loss": 24.4323, + "loss": 127496.5375, "step": 22770 }, { "epoch": 0.046017041253732065, - "grad_norm": 216.03265380859375, + "grad_norm": 68789.6875, "learning_rate": 4.556e-06, - "loss": 31.051, + "loss": 172427.3375, "step": 22780 }, { "epoch": 0.046037241886415885, - "grad_norm": 357.0312194824219, + "grad_norm": 128914.46875, "learning_rate": 4.558e-06, - "loss": 24.9925, + "loss": 121579.275, "step": 22790 }, { "epoch": 0.0460574425190997, - "grad_norm": 289.9405517578125, + "grad_norm": 367681.53125, "learning_rate": 4.56e-06, - "loss": 40.7071, + "loss": 324490.075, "step": 22800 }, { "epoch": 0.04607764315178351, - "grad_norm": 278.6811828613281, + "grad_norm": 165349.578125, "learning_rate": 4.5620000000000005e-06, - "loss": 27.6926, + "loss": 130620.075, "step": 22810 }, { "epoch": 0.04609784378446733, - "grad_norm": 350.1117248535156, + "grad_norm": 74005.5546875, "learning_rate": 4.564e-06, - "loss": 28.5968, + "loss": 121624.575, "step": 22820 }, { "epoch": 0.046118044417151144, - "grad_norm": 224.86300659179688, + "grad_norm": 18430.09375, "learning_rate": 4.566000000000001e-06, - "loss": 41.4569, + "loss": 239052.45, "step": 22830 }, { "epoch": 0.046138245049834964, - "grad_norm": 513.5762939453125, + "grad_norm": 145710.5, "learning_rate": 4.568e-06, - "loss": 43.4404, + "loss": 312222.675, "step": 22840 }, { "epoch": 0.04615844568251878, - "grad_norm": 153.2064666748047, + "grad_norm": 263679.96875, "learning_rate": 4.57e-06, - "loss": 28.0236, + "loss": 173585.0875, "step": 22850 }, { "epoch": 0.04617864631520259, - "grad_norm": 478.1938781738281, + "grad_norm": 242873.1875, "learning_rate": 4.5720000000000004e-06, - "loss": 38.9502, + "loss": 211317.65, "step": 22860 }, { "epoch": 0.04619884694788641, - "grad_norm": 835.1389770507812, + "grad_norm": 1387912.25, "learning_rate": 4.574e-06, - "loss": 30.5866, + "loss": 236449.05, "step": 22870 }, { "epoch": 0.04621904758057022, - "grad_norm": 449.0034484863281, + "grad_norm": 135328.921875, "learning_rate": 4.576000000000001e-06, - "loss": 38.8385, + "loss": 175957.3125, "step": 22880 }, { "epoch": 0.046239248213254036, - "grad_norm": 122.23052215576172, + "grad_norm": 13085.3173828125, "learning_rate": 4.578e-06, - "loss": 34.8877, + "loss": 233247.625, "step": 22890 }, { "epoch": 0.046259448845937856, - "grad_norm": 250.9205322265625, + "grad_norm": 305385.25, "learning_rate": 4.58e-06, - "loss": 26.8342, + "loss": 157495.3, "step": 22900 }, { "epoch": 0.04627964947862167, - "grad_norm": 373.60589599609375, + "grad_norm": 153854.875, "learning_rate": 4.582e-06, - "loss": 42.1963, + "loss": 333751.45, "step": 22910 }, { "epoch": 0.04629985011130549, - "grad_norm": 236.33836364746094, + "grad_norm": 93705.9765625, "learning_rate": 4.5840000000000005e-06, - "loss": 36.0522, + "loss": 249993.425, "step": 22920 }, { "epoch": 0.0463200507439893, - "grad_norm": 301.6274108886719, + "grad_norm": 74621.109375, "learning_rate": 4.586000000000001e-06, - "loss": 43.6349, + "loss": 364071.175, "step": 22930 }, { "epoch": 0.046340251376673115, - "grad_norm": 432.77850341796875, + "grad_norm": 117431.65625, "learning_rate": 4.588e-06, - "loss": 22.031, + "loss": 67453.95, "step": 22940 }, { "epoch": 0.046360452009356935, - "grad_norm": 330.04632568359375, + "grad_norm": 102397.2578125, "learning_rate": 4.590000000000001e-06, - "loss": 30.1454, + "loss": 200425.9875, "step": 22950 }, { "epoch": 0.04638065264204075, - "grad_norm": 481.8309631347656, + "grad_norm": 19236.435546875, "learning_rate": 4.592e-06, - "loss": 49.3018, + "loss": 358391.675, "step": 22960 }, { "epoch": 0.04640085327472456, - "grad_norm": 182.36968994140625, + "grad_norm": 167783.421875, "learning_rate": 4.594e-06, - "loss": 29.0452, + "loss": 130793.925, "step": 22970 }, { "epoch": 0.04642105390740838, - "grad_norm": 183.73507690429688, + "grad_norm": 89681.96875, "learning_rate": 4.5960000000000006e-06, - "loss": 30.6057, + "loss": 164067.975, "step": 22980 }, { "epoch": 0.046441254540092194, - "grad_norm": 215.35194396972656, + "grad_norm": 164326.8125, "learning_rate": 4.598e-06, - "loss": 30.7552, + "loss": 206077.9, "step": 22990 }, { "epoch": 0.046461455172776014, - "grad_norm": 397.8327331542969, + "grad_norm": 267333.625, "learning_rate": 4.600000000000001e-06, - "loss": 38.2998, + "loss": 240074.15, "step": 23000 }, { "epoch": 0.04648165580545983, - "grad_norm": 362.9880676269531, + "grad_norm": 85064.7109375, "learning_rate": 4.602e-06, - "loss": 36.6341, + "loss": 128278.0625, "step": 23010 }, { "epoch": 0.04650185643814364, - "grad_norm": 192.90228271484375, + "grad_norm": 115740.5625, "learning_rate": 4.604e-06, - "loss": 26.8832, + "loss": 210796.1, "step": 23020 }, { "epoch": 0.04652205707082746, - "grad_norm": 115.16897583007812, + "grad_norm": 4666.1787109375, "learning_rate": 4.6060000000000005e-06, - "loss": 38.8671, + "loss": 284408.2, "step": 23030 }, { "epoch": 0.046542257703511274, - "grad_norm": 132.3080291748047, + "grad_norm": 10779.421875, "learning_rate": 4.608000000000001e-06, - "loss": 25.9005, + "loss": 139637.675, "step": 23040 }, { "epoch": 0.04656245833619509, - "grad_norm": 241.00857543945312, + "grad_norm": 200256.296875, "learning_rate": 4.610000000000001e-06, - "loss": 35.1976, + "loss": 185089.15, "step": 23050 }, { "epoch": 0.04658265896887891, - "grad_norm": 297.29949951171875, + "grad_norm": 38200.9765625, "learning_rate": 4.612e-06, - "loss": 27.1766, + "loss": 83293.7375, "step": 23060 }, { "epoch": 0.04660285960156272, - "grad_norm": 91.31853485107422, + "grad_norm": 5390.93212890625, "learning_rate": 4.614e-06, - "loss": 48.7416, + "loss": 345207.05, "step": 23070 }, { "epoch": 0.04662306023424654, - "grad_norm": 254.75955200195312, + "grad_norm": 87985.609375, "learning_rate": 4.616e-06, - "loss": 25.2411, + "loss": 132724.1, "step": 23080 }, { "epoch": 0.04664326086693035, - "grad_norm": 454.9356384277344, + "grad_norm": 319540.40625, "learning_rate": 4.6180000000000005e-06, - "loss": 26.4849, + "loss": 160399.45, "step": 23090 }, { "epoch": 0.046663461499614166, - "grad_norm": 411.7412414550781, + "grad_norm": 112307.1328125, "learning_rate": 4.620000000000001e-06, - "loss": 50.9135, + "loss": 162717.175, "step": 23100 }, { "epoch": 0.046683662132297986, - "grad_norm": 363.6336364746094, + "grad_norm": 160943.078125, "learning_rate": 4.622e-06, - "loss": 63.1865, + "loss": 334188.2, "step": 23110 }, { "epoch": 0.0467038627649818, - "grad_norm": 349.7676696777344, + "grad_norm": 321898.71875, "learning_rate": 4.624e-06, - "loss": 35.0445, + "loss": 286165.225, "step": 23120 }, { "epoch": 0.04672406339766561, - "grad_norm": 434.74432373046875, + "grad_norm": 301068.5625, "learning_rate": 4.626e-06, - "loss": 31.6022, + "loss": 220538.6, "step": 23130 }, { "epoch": 0.04674426403034943, - "grad_norm": 325.1117858886719, + "grad_norm": 2641.815673828125, "learning_rate": 4.628e-06, - "loss": 32.9686, + "loss": 271589.7, "step": 23140 }, { "epoch": 0.046764464663033245, - "grad_norm": 279.353271484375, + "grad_norm": 354367.625, "learning_rate": 4.6300000000000006e-06, - "loss": 23.7207, + "loss": 108231.375, "step": 23150 }, { "epoch": 0.046784665295717065, - "grad_norm": 203.7786102294922, + "grad_norm": 39087.16796875, "learning_rate": 4.632000000000001e-06, - "loss": 15.9233, + "loss": 99778.0, "step": 23160 }, { "epoch": 0.04680486592840088, - "grad_norm": 68.2558364868164, + "grad_norm": 4958.6435546875, "learning_rate": 4.634e-06, - "loss": 18.657, + "loss": 81221.9125, "step": 23170 }, { "epoch": 0.04682506656108469, - "grad_norm": 333.67999267578125, + "grad_norm": 41986.1015625, "learning_rate": 4.636e-06, - "loss": 37.6118, + "loss": 122107.4, "step": 23180 }, { "epoch": 0.04684526719376851, - "grad_norm": 172.0549774169922, + "grad_norm": 13754.970703125, "learning_rate": 4.638e-06, - "loss": 28.8333, + "loss": 191171.7, "step": 23190 }, { "epoch": 0.046865467826452324, - "grad_norm": 365.0789489746094, + "grad_norm": 255991.03125, "learning_rate": 4.6400000000000005e-06, - "loss": 25.9707, + "loss": 146205.375, "step": 23200 }, { "epoch": 0.04688566845913614, - "grad_norm": 452.52215576171875, + "grad_norm": 644058.9375, "learning_rate": 4.642000000000001e-06, - "loss": 22.5802, + "loss": 127119.1125, "step": 23210 }, { "epoch": 0.04690586909181996, - "grad_norm": 237.44772338867188, + "grad_norm": 5973.359375, "learning_rate": 4.644e-06, - "loss": 13.3965, + "loss": 43554.7344, "step": 23220 }, { "epoch": 0.04692606972450377, - "grad_norm": 308.40032958984375, + "grad_norm": 63427.38671875, "learning_rate": 4.646000000000001e-06, - "loss": 32.5556, + "loss": 211487.125, "step": 23230 }, { "epoch": 0.04694627035718759, - "grad_norm": 513.1425170898438, + "grad_norm": 32440.09375, "learning_rate": 4.648e-06, - "loss": 29.3123, + "loss": 314187.2, "step": 23240 }, { "epoch": 0.0469664709898714, - "grad_norm": 558.6598510742188, + "grad_norm": 122811.5546875, "learning_rate": 4.65e-06, - "loss": 47.8486, + "loss": 262894.7, "step": 23250 }, { "epoch": 0.046986671622555216, - "grad_norm": 335.6075439453125, + "grad_norm": 324420.9375, "learning_rate": 4.6520000000000005e-06, - "loss": 33.3238, + "loss": 200670.5, "step": 23260 }, { "epoch": 0.047006872255239036, - "grad_norm": 180.42686462402344, + "grad_norm": 10935.41015625, "learning_rate": 4.654e-06, - "loss": 49.5276, + "loss": 352320.9, "step": 23270 }, { "epoch": 0.04702707288792285, - "grad_norm": 223.03897094726562, + "grad_norm": 32590.275390625, "learning_rate": 4.656000000000001e-06, - "loss": 19.6996, + "loss": 68821.5688, "step": 23280 }, { "epoch": 0.04704727352060666, - "grad_norm": 353.6033630371094, + "grad_norm": 305925.96875, "learning_rate": 4.658e-06, - "loss": 43.4261, + "loss": 307707.6, "step": 23290 }, { "epoch": 0.04706747415329048, - "grad_norm": 408.29296875, + "grad_norm": 572477.3125, "learning_rate": 4.66e-06, - "loss": 31.6004, + "loss": 164973.6, "step": 23300 }, { "epoch": 0.047087674785974296, - "grad_norm": 176.74081420898438, + "grad_norm": 35492.32421875, "learning_rate": 4.6620000000000004e-06, - "loss": 16.1454, + "loss": 59520.5375, "step": 23310 }, { "epoch": 0.047107875418658116, - "grad_norm": 104.51349639892578, + "grad_norm": 9561.7763671875, "learning_rate": 4.664000000000001e-06, - "loss": 24.2505, + "loss": 97204.225, "step": 23320 }, { "epoch": 0.04712807605134193, - "grad_norm": 252.4790496826172, + "grad_norm": 13256.4453125, "learning_rate": 4.666000000000001e-06, - "loss": 20.1939, + "loss": 99554.9937, "step": 23330 }, { "epoch": 0.04714827668402574, - "grad_norm": 180.61465454101562, + "grad_norm": 17458.912109375, "learning_rate": 4.668e-06, - "loss": 22.1896, + "loss": 111562.0375, "step": 23340 }, { "epoch": 0.04716847731670956, - "grad_norm": 245.4461669921875, + "grad_norm": 19415.55078125, "learning_rate": 4.670000000000001e-06, - "loss": 50.637, + "loss": 309722.325, "step": 23350 }, { "epoch": 0.047188677949393375, - "grad_norm": 237.6515350341797, + "grad_norm": 84620.171875, "learning_rate": 4.672e-06, - "loss": 69.12, + "loss": 247789.95, "step": 23360 }, { "epoch": 0.04720887858207719, - "grad_norm": 364.08929443359375, + "grad_norm": 286329.5625, "learning_rate": 4.6740000000000005e-06, - "loss": 38.5139, + "loss": 153173.25, "step": 23370 }, { "epoch": 0.04722907921476101, - "grad_norm": 648.5537109375, + "grad_norm": 635284.5625, "learning_rate": 4.676000000000001e-06, - "loss": 80.8779, + "loss": 492805.6, "step": 23380 }, { "epoch": 0.04724927984744482, - "grad_norm": 801.7887573242188, + "grad_norm": 1298451.0, "learning_rate": 4.678e-06, - "loss": 35.1651, + "loss": 239871.875, "step": 23390 }, { "epoch": 0.04726948048012864, - "grad_norm": 180.57571411132812, + "grad_norm": 4112.34521484375, "learning_rate": 4.680000000000001e-06, - "loss": 25.4433, + "loss": 153305.8, "step": 23400 }, { "epoch": 0.047289681112812454, - "grad_norm": 1085.69384765625, + "grad_norm": 435952.78125, "learning_rate": 4.682e-06, - "loss": 29.3402, + "loss": 174479.95, "step": 23410 }, { "epoch": 0.04730988174549627, - "grad_norm": 181.84576416015625, + "grad_norm": 9638.857421875, "learning_rate": 4.684e-06, - "loss": 24.5804, + "loss": 204140.6375, "step": 23420 }, { "epoch": 0.04733008237818009, - "grad_norm": 170.5708465576172, + "grad_norm": 34642.03515625, "learning_rate": 4.6860000000000005e-06, - "loss": 31.8119, + "loss": 143031.175, "step": 23430 }, { "epoch": 0.0473502830108639, - "grad_norm": 282.826171875, + "grad_norm": 29280.5625, "learning_rate": 4.688000000000001e-06, - "loss": 32.2964, + "loss": 189756.65, "step": 23440 }, { "epoch": 0.04737048364354771, - "grad_norm": 589.2730102539062, + "grad_norm": 593745.875, "learning_rate": 4.69e-06, - "loss": 45.9049, + "loss": 334560.4, "step": 23450 }, { "epoch": 0.04739068427623153, - "grad_norm": 184.30691528320312, + "grad_norm": 7357.2919921875, "learning_rate": 4.692e-06, - "loss": 22.7185, + "loss": 93950.1375, "step": 23460 }, { "epoch": 0.047410884908915346, - "grad_norm": 356.1437072753906, + "grad_norm": 73356.578125, "learning_rate": 4.694e-06, - "loss": 27.7077, + "loss": 169315.1, "step": 23470 }, { "epoch": 0.047431085541599166, - "grad_norm": 152.33602905273438, + "grad_norm": 85247.8671875, "learning_rate": 4.6960000000000004e-06, - "loss": 27.6814, + "loss": 160621.15, "step": 23480 }, { "epoch": 0.04745128617428298, - "grad_norm": 91.97364807128906, + "grad_norm": 8409.3515625, "learning_rate": 4.698000000000001e-06, - "loss": 32.484, + "loss": 325498.75, "step": 23490 }, { "epoch": 0.04747148680696679, - "grad_norm": 266.3104248046875, + "grad_norm": 134551.890625, "learning_rate": 4.7e-06, - "loss": 47.0541, + "loss": 393881.05, "step": 23500 }, { "epoch": 0.04749168743965061, - "grad_norm": 162.3765106201172, + "grad_norm": 93960.9296875, "learning_rate": 4.702e-06, - "loss": 20.9091, + "loss": 147417.2625, "step": 23510 }, { "epoch": 0.047511888072334425, - "grad_norm": 228.0862579345703, + "grad_norm": 119484.4921875, "learning_rate": 4.704e-06, - "loss": 20.6345, + "loss": 135383.625, "step": 23520 }, { "epoch": 0.04753208870501824, - "grad_norm": 298.2235107421875, + "grad_norm": 96867.71875, "learning_rate": 4.706e-06, - "loss": 29.1333, + "loss": 133364.5625, "step": 23530 }, { "epoch": 0.04755228933770206, - "grad_norm": 866.2457885742188, + "grad_norm": 765692.8125, "learning_rate": 4.7080000000000005e-06, - "loss": 48.024, + "loss": 250770.75, "step": 23540 }, { "epoch": 0.04757248997038587, - "grad_norm": 128.22824096679688, + "grad_norm": 74834.03125, "learning_rate": 4.71e-06, - "loss": 12.996, + "loss": 39630.6031, "step": 23550 }, { "epoch": 0.04759269060306969, - "grad_norm": 345.7879943847656, + "grad_norm": 62654.109375, "learning_rate": 4.712000000000001e-06, - "loss": 48.0625, + "loss": 438108.6, "step": 23560 }, { "epoch": 0.047612891235753504, - "grad_norm": 216.14480590820312, + "grad_norm": 6381.927734375, "learning_rate": 4.714e-06, - "loss": 31.7131, + "loss": 220570.4, "step": 23570 }, { "epoch": 0.04763309186843732, - "grad_norm": 337.5866394042969, + "grad_norm": 102692.7578125, "learning_rate": 4.716e-06, - "loss": 33.3357, + "loss": 189883.4, "step": 23580 }, { "epoch": 0.04765329250112114, - "grad_norm": 238.96351623535156, + "grad_norm": 102117.15625, "learning_rate": 4.718e-06, - "loss": 35.2006, + "loss": 161269.175, "step": 23590 }, { "epoch": 0.04767349313380495, - "grad_norm": 117.896728515625, + "grad_norm": 36990.94921875, "learning_rate": 4.7200000000000005e-06, - "loss": 32.72, + "loss": 271791.075, "step": 23600 }, { "epoch": 0.047693693766488764, - "grad_norm": 472.9936828613281, + "grad_norm": 292451.90625, "learning_rate": 4.722000000000001e-06, - "loss": 37.0214, + "loss": 237240.475, "step": 23610 }, { "epoch": 0.047713894399172584, - "grad_norm": 149.30357360839844, + "grad_norm": 1795.3363037109375, "learning_rate": 4.724e-06, - "loss": 40.1063, + "loss": 156562.075, "step": 23620 }, { "epoch": 0.0477340950318564, - "grad_norm": 516.667236328125, + "grad_norm": 119545.9296875, "learning_rate": 4.726000000000001e-06, - "loss": 38.01, + "loss": 181117.6875, "step": 23630 }, { "epoch": 0.04775429566454022, - "grad_norm": 237.48007202148438, + "grad_norm": 51070.8984375, "learning_rate": 4.728e-06, - "loss": 28.6039, + "loss": 193929.75, "step": 23640 }, { "epoch": 0.04777449629722403, - "grad_norm": 252.01455688476562, + "grad_norm": 25101.572265625, "learning_rate": 4.7300000000000005e-06, - "loss": 23.7702, + "loss": 109557.9125, "step": 23650 }, { "epoch": 0.04779469692990784, - "grad_norm": 232.40296936035156, + "grad_norm": 84739.1015625, "learning_rate": 4.732000000000001e-06, - "loss": 49.4607, + "loss": 275673.3, "step": 23660 }, { "epoch": 0.04781489756259166, - "grad_norm": 474.0699157714844, + "grad_norm": 240295.828125, "learning_rate": 4.734e-06, - "loss": 45.5415, + "loss": 272404.6, "step": 23670 }, { "epoch": 0.047835098195275476, - "grad_norm": 428.4226989746094, + "grad_norm": 50790.19140625, "learning_rate": 4.736000000000001e-06, - "loss": 26.8226, + "loss": 99113.3438, "step": 23680 }, { "epoch": 0.04785529882795929, - "grad_norm": 391.0641784667969, + "grad_norm": 116096.7265625, "learning_rate": 4.738e-06, - "loss": 23.176, + "loss": 167477.9, "step": 23690 }, { "epoch": 0.04787549946064311, - "grad_norm": 254.60574340820312, + "grad_norm": 138742.421875, "learning_rate": 4.74e-06, - "loss": 35.1211, + "loss": 276017.7, "step": 23700 }, { "epoch": 0.04789570009332692, - "grad_norm": 386.9615478515625, + "grad_norm": 204350.421875, "learning_rate": 4.7420000000000005e-06, - "loss": 26.655, + "loss": 107279.6875, "step": 23710 }, { "epoch": 0.04791590072601074, - "grad_norm": 423.5086364746094, + "grad_norm": 235707.796875, "learning_rate": 4.744000000000001e-06, - "loss": 34.1011, + "loss": 227382.4, "step": 23720 }, { "epoch": 0.047936101358694555, - "grad_norm": 125.49838256835938, + "grad_norm": 22789.103515625, "learning_rate": 4.746000000000001e-06, - "loss": 35.3341, + "loss": 159993.025, "step": 23730 }, { "epoch": 0.04795630199137837, - "grad_norm": 369.2350769042969, + "grad_norm": 163733.515625, "learning_rate": 4.748e-06, - "loss": 42.2543, + "loss": 233301.45, "step": 23740 }, { "epoch": 0.04797650262406219, - "grad_norm": 265.3240051269531, + "grad_norm": 6967.84716796875, "learning_rate": 4.75e-06, - "loss": 41.9161, + "loss": 181113.5, "step": 23750 }, { "epoch": 0.047996703256746, - "grad_norm": 381.4123840332031, + "grad_norm": 25985.30078125, "learning_rate": 4.752e-06, - "loss": 51.3242, + "loss": 245819.625, "step": 23760 }, { "epoch": 0.048016903889429814, - "grad_norm": 1546.1595458984375, + "grad_norm": 116140.109375, "learning_rate": 4.7540000000000006e-06, - "loss": 31.1774, + "loss": 117574.05, "step": 23770 }, { "epoch": 0.048037104522113634, - "grad_norm": 360.0041809082031, + "grad_norm": 102788.7734375, "learning_rate": 4.756000000000001e-06, - "loss": 50.6428, + "loss": 203951.8875, "step": 23780 }, { "epoch": 0.04805730515479745, - "grad_norm": 253.70651245117188, + "grad_norm": 110924.9140625, "learning_rate": 4.758e-06, - "loss": 31.1466, + "loss": 368189.45, "step": 23790 }, { "epoch": 0.04807750578748127, - "grad_norm": 2709.918701171875, + "grad_norm": 1300944.25, "learning_rate": 4.76e-06, - "loss": 60.5765, + "loss": 316294.1, "step": 23800 }, { "epoch": 0.04809770642016508, - "grad_norm": 364.52545166015625, + "grad_norm": 44722.69921875, "learning_rate": 4.762e-06, - "loss": 34.6433, + "loss": 123228.8125, "step": 23810 }, { "epoch": 0.04811790705284889, - "grad_norm": 303.4349670410156, + "grad_norm": 96419.921875, "learning_rate": 4.7640000000000005e-06, - "loss": 25.8956, + "loss": 114051.85, "step": 23820 }, { "epoch": 0.04813810768553271, - "grad_norm": 298.9841613769531, + "grad_norm": 177745.421875, "learning_rate": 4.766000000000001e-06, - "loss": 20.4022, + "loss": 110647.4125, "step": 23830 }, { "epoch": 0.048158308318216526, - "grad_norm": 385.0189208984375, + "grad_norm": 154517.328125, "learning_rate": 4.768000000000001e-06, - "loss": 32.9447, + "loss": 226817.15, "step": 23840 }, { "epoch": 0.04817850895090034, - "grad_norm": 225.636962890625, + "grad_norm": 138840.625, "learning_rate": 4.77e-06, - "loss": 31.4838, + "loss": 196054.65, "step": 23850 }, { "epoch": 0.04819870958358416, - "grad_norm": 259.67108154296875, + "grad_norm": 127227.8203125, "learning_rate": 4.772e-06, - "loss": 32.3447, + "loss": 186653.3125, "step": 23860 }, { "epoch": 0.04821891021626797, - "grad_norm": 213.6777801513672, + "grad_norm": 187074.90625, "learning_rate": 4.774e-06, - "loss": 26.7017, + "loss": 148962.9625, "step": 23870 }, { "epoch": 0.04823911084895179, - "grad_norm": 221.456787109375, + "grad_norm": 9877.349609375, "learning_rate": 4.7760000000000005e-06, - "loss": 35.3876, + "loss": 323653.225, "step": 23880 }, { "epoch": 0.048259311481635606, - "grad_norm": 510.59423828125, + "grad_norm": 504329.59375, "learning_rate": 4.778000000000001e-06, - "loss": 58.0918, + "loss": 362589.0, "step": 23890 }, { "epoch": 0.04827951211431942, - "grad_norm": 586.248291015625, + "grad_norm": 492734.1875, "learning_rate": 4.78e-06, - "loss": 31.6313, + "loss": 221310.55, "step": 23900 }, { "epoch": 0.04829971274700324, - "grad_norm": 421.5391540527344, + "grad_norm": 257999.96875, "learning_rate": 4.782e-06, - "loss": 30.8656, + "loss": 129651.55, "step": 23910 }, { "epoch": 0.04831991337968705, - "grad_norm": 293.57696533203125, + "grad_norm": 147020.671875, "learning_rate": 4.784e-06, - "loss": 26.2061, + "loss": 122581.7125, "step": 23920 }, { "epoch": 0.048340114012370865, - "grad_norm": 145.83151245117188, + "grad_norm": 15546.0361328125, "learning_rate": 4.7860000000000004e-06, - "loss": 19.0745, + "loss": 87780.5938, "step": 23930 }, { "epoch": 0.048360314645054685, - "grad_norm": 218.84844970703125, + "grad_norm": 141224.84375, "learning_rate": 4.7880000000000006e-06, - "loss": 28.3748, + "loss": 122386.6625, "step": 23940 }, { "epoch": 0.0483805152777385, - "grad_norm": 369.0185852050781, + "grad_norm": 471926.8125, "learning_rate": 4.79e-06, - "loss": 34.9431, + "loss": 208617.4875, "step": 23950 }, { "epoch": 0.04840071591042232, - "grad_norm": 209.40855407714844, + "grad_norm": 138332.6875, "learning_rate": 4.792000000000001e-06, - "loss": 34.9354, + "loss": 175784.6625, "step": 23960 }, { "epoch": 0.04842091654310613, - "grad_norm": 650.5877075195312, + "grad_norm": 664945.75, "learning_rate": 4.794e-06, - "loss": 30.4894, + "loss": 172561.5625, "step": 23970 }, { "epoch": 0.048441117175789944, - "grad_norm": 599.2838745117188, + "grad_norm": 562670.6875, "learning_rate": 4.796e-06, - "loss": 61.641, + "loss": 286669.25, "step": 23980 }, { "epoch": 0.048461317808473764, - "grad_norm": 191.25987243652344, + "grad_norm": 88939.9609375, "learning_rate": 4.7980000000000005e-06, - "loss": 29.2233, + "loss": 95670.025, "step": 23990 }, { "epoch": 0.04848151844115758, - "grad_norm": 117.61515045166016, + "grad_norm": 162772.671875, "learning_rate": 4.800000000000001e-06, - "loss": 27.267, + "loss": 125534.15, "step": 24000 }, { "epoch": 0.04850171907384139, - "grad_norm": 355.64459228515625, + "grad_norm": 59589.6796875, "learning_rate": 4.802000000000001e-06, - "loss": 30.4391, + "loss": 127192.7125, "step": 24010 }, { "epoch": 0.04852191970652521, - "grad_norm": 333.71051025390625, + "grad_norm": 158136.90625, "learning_rate": 4.804e-06, - "loss": 23.3836, + "loss": 67013.4375, "step": 24020 }, { "epoch": 0.04854212033920902, - "grad_norm": 168.31088256835938, + "grad_norm": 46222.25, "learning_rate": 4.806000000000001e-06, - "loss": 22.9719, + "loss": 99339.425, "step": 24030 }, { "epoch": 0.04856232097189284, - "grad_norm": 372.96038818359375, + "grad_norm": 175593.453125, "learning_rate": 4.808e-06, - "loss": 19.0593, + "loss": 112190.9375, "step": 24040 }, { "epoch": 0.048582521604576656, - "grad_norm": 158.93374633789062, + "grad_norm": 8431.55078125, "learning_rate": 4.8100000000000005e-06, - "loss": 35.1784, + "loss": 293460.525, "step": 24050 }, { "epoch": 0.04860272223726047, - "grad_norm": 143.72250366210938, + "grad_norm": 8104.66259765625, "learning_rate": 4.812000000000001e-06, - "loss": 58.8182, + "loss": 342806.825, "step": 24060 }, { "epoch": 0.04862292286994429, - "grad_norm": 248.034912109375, + "grad_norm": 42160.39453125, "learning_rate": 4.814e-06, - "loss": 17.1163, + "loss": 102834.9625, "step": 24070 }, { "epoch": 0.0486431235026281, - "grad_norm": 908.7403564453125, + "grad_norm": 208192.5625, "learning_rate": 4.816e-06, - "loss": 32.6592, + "loss": 112074.1875, "step": 24080 }, { "epoch": 0.048663324135311915, - "grad_norm": 268.7630310058594, + "grad_norm": 153534.0625, "learning_rate": 4.818e-06, - "loss": 47.9006, + "loss": 320646.075, "step": 24090 }, { "epoch": 0.048683524767995735, - "grad_norm": 101.19955444335938, + "grad_norm": 16285.51171875, "learning_rate": 4.8200000000000004e-06, - "loss": 24.2114, + "loss": 93995.7625, "step": 24100 }, { "epoch": 0.04870372540067955, - "grad_norm": 272.14288330078125, + "grad_norm": 282670.5, "learning_rate": 4.822000000000001e-06, - "loss": 41.5454, + "loss": 230627.575, "step": 24110 }, { "epoch": 0.04872392603336337, - "grad_norm": 523.4356689453125, + "grad_norm": 590960.875, "learning_rate": 4.824000000000001e-06, - "loss": 26.7681, + "loss": 173150.5875, "step": 24120 }, { "epoch": 0.04874412666604718, - "grad_norm": 266.33056640625, + "grad_norm": 95977.671875, "learning_rate": 4.826e-06, - "loss": 19.0341, + "loss": 66900.1875, "step": 24130 }, { "epoch": 0.048764327298730994, - "grad_norm": 389.2867736816406, + "grad_norm": 43628.3125, "learning_rate": 4.828e-06, - "loss": 43.7141, + "loss": 269536.925, "step": 24140 }, { "epoch": 0.048784527931414814, - "grad_norm": 571.64453125, + "grad_norm": 497965.71875, "learning_rate": 4.83e-06, - "loss": 29.1342, + "loss": 126365.5, "step": 24150 }, { "epoch": 0.04880472856409863, - "grad_norm": 329.1141662597656, + "grad_norm": 116352.9453125, "learning_rate": 4.8320000000000005e-06, - "loss": 27.7108, + "loss": 113685.9625, "step": 24160 }, { "epoch": 0.04882492919678244, - "grad_norm": 293.8599548339844, + "grad_norm": 43053.734375, "learning_rate": 4.834000000000001e-06, - "loss": 48.921, + "loss": 351489.8, "step": 24170 }, { "epoch": 0.04884512982946626, - "grad_norm": 522.3822021484375, + "grad_norm": 593453.75, "learning_rate": 4.836e-06, - "loss": 57.5064, + "loss": 198645.05, "step": 24180 }, { "epoch": 0.048865330462150074, - "grad_norm": 411.9894714355469, + "grad_norm": 38693.03515625, "learning_rate": 4.838e-06, - "loss": 40.1941, + "loss": 440694.95, "step": 24190 }, { "epoch": 0.048885531094833894, - "grad_norm": 247.519775390625, + "grad_norm": 190855.21875, "learning_rate": 4.84e-06, - "loss": 42.679, + "loss": 255519.5, "step": 24200 }, { "epoch": 0.04890573172751771, - "grad_norm": 951.4860229492188, + "grad_norm": 828624.125, "learning_rate": 4.842e-06, - "loss": 45.0348, + "loss": 241170.975, "step": 24210 }, { "epoch": 0.04892593236020152, "grad_norm": 0.0, "learning_rate": 4.8440000000000005e-06, - "loss": 29.2135, + "loss": 241553.3, "step": 24220 }, { "epoch": 0.04894613299288534, "grad_norm": 0.0, "learning_rate": 4.846e-06, - "loss": 31.1792, + "loss": 220685.7, "step": 24230 }, { "epoch": 0.04896633362556915, - "grad_norm": 545.0596923828125, + "grad_norm": 295647.25, "learning_rate": 4.848000000000001e-06, - "loss": 36.6954, + "loss": 268954.0, "step": 24240 }, { "epoch": 0.048986534258252966, - "grad_norm": 357.08251953125, + "grad_norm": 898622.125, "learning_rate": 4.85e-06, - "loss": 31.0875, + "loss": 240123.9, "step": 24250 }, { "epoch": 0.049006734890936786, - "grad_norm": 473.1929016113281, + "grad_norm": 414410.5625, "learning_rate": 4.852e-06, - "loss": 37.1735, + "loss": 278669.625, "step": 24260 }, { "epoch": 0.0490269355236206, - "grad_norm": 189.36502075195312, + "grad_norm": 55587.82421875, "learning_rate": 4.8540000000000005e-06, - "loss": 36.2747, + "loss": 156973.6375, "step": 24270 }, { "epoch": 0.04904713615630442, - "grad_norm": 120.2989730834961, + "grad_norm": 4584.2119140625, "learning_rate": 4.856e-06, - "loss": 33.0543, + "loss": 198344.3125, "step": 24280 }, { "epoch": 0.04906733678898823, - "grad_norm": 72.84486389160156, + "grad_norm": 98379.6328125, "learning_rate": 4.858000000000001e-06, - "loss": 40.5298, + "loss": 165522.225, "step": 24290 }, { "epoch": 0.049087537421672045, - "grad_norm": 469.8461608886719, + "grad_norm": 70731.796875, "learning_rate": 4.86e-06, - "loss": 39.9338, + "loss": 342678.775, "step": 24300 }, { "epoch": 0.049107738054355865, - "grad_norm": 264.76458740234375, + "grad_norm": 83252.8046875, "learning_rate": 4.862e-06, - "loss": 40.7187, + "loss": 243472.4, "step": 24310 }, { "epoch": 0.04912793868703968, - "grad_norm": 2196.45556640625, + "grad_norm": 533299.3125, "learning_rate": 4.864e-06, - "loss": 54.8013, + "loss": 189744.8625, "step": 24320 }, { "epoch": 0.04914813931972349, - "grad_norm": 726.4110717773438, + "grad_norm": 498391.0, "learning_rate": 4.8660000000000005e-06, - "loss": 49.6623, + "loss": 342530.55, "step": 24330 }, { "epoch": 0.04916833995240731, - "grad_norm": 60.958251953125, + "grad_norm": 5859.25537109375, "learning_rate": 4.868000000000001e-06, - "loss": 34.9965, + "loss": 224686.85, "step": 24340 }, { "epoch": 0.049188540585091124, - "grad_norm": 238.892822265625, + "grad_norm": 87926.515625, "learning_rate": 4.87e-06, - "loss": 47.3049, + "loss": 331589.925, "step": 24350 }, { "epoch": 0.04920874121777494, - "grad_norm": 666.7787475585938, + "grad_norm": 694122.8125, "learning_rate": 4.872000000000001e-06, - "loss": 35.4274, + "loss": 381926.3, "step": 24360 }, { "epoch": 0.04922894185045876, - "grad_norm": 548.2902221679688, + "grad_norm": 382019.4375, "learning_rate": 4.874e-06, - "loss": 56.2392, + "loss": 480572.8, "step": 24370 }, { "epoch": 0.04924914248314257, - "grad_norm": 411.37261962890625, + "grad_norm": 193350.0625, "learning_rate": 4.876e-06, - "loss": 29.0048, + "loss": 179130.9375, "step": 24380 }, { "epoch": 0.04926934311582639, - "grad_norm": 255.85255432128906, + "grad_norm": 129876.90625, "learning_rate": 4.8780000000000006e-06, - "loss": 39.0574, + "loss": 241265.375, "step": 24390 }, { "epoch": 0.0492895437485102, - "grad_norm": 136.60525512695312, + "grad_norm": 6843.8359375, "learning_rate": 4.880000000000001e-06, - "loss": 23.4296, + "loss": 103202.0125, "step": 24400 }, { "epoch": 0.049309744381194016, - "grad_norm": 280.9986572265625, + "grad_norm": 3513.09228515625, "learning_rate": 4.882000000000001e-06, - "loss": 29.5125, + "loss": 113045.975, "step": 24410 }, { "epoch": 0.049329945013877836, - "grad_norm": 533.7042236328125, + "grad_norm": 514921.5, "learning_rate": 4.884e-06, - "loss": 31.8714, + "loss": 141614.975, "step": 24420 }, { "epoch": 0.04935014564656165, - "grad_norm": 179.54776000976562, + "grad_norm": 19660.357421875, "learning_rate": 4.886e-06, - "loss": 18.4666, + "loss": 84361.525, "step": 24430 }, { "epoch": 0.04937034627924546, - "grad_norm": 396.60711669921875, + "grad_norm": 589536.3125, "learning_rate": 4.8880000000000005e-06, - "loss": 25.4745, + "loss": 150513.2875, "step": 24440 }, { "epoch": 0.04939054691192928, - "grad_norm": 406.3586120605469, + "grad_norm": 245846.84375, "learning_rate": 4.890000000000001e-06, - "loss": 25.7635, + "loss": 161999.0375, "step": 24450 }, { "epoch": 0.049410747544613096, - "grad_norm": 2937.0927734375, + "grad_norm": 30732.90625, "learning_rate": 4.892000000000001e-06, - "loss": 54.7187, + "loss": 275986.325, "step": 24460 }, { "epoch": 0.049430948177296916, - "grad_norm": 722.0195922851562, + "grad_norm": 1388978.25, "learning_rate": 4.894e-06, - "loss": 45.7203, + "loss": 345189.65, "step": 24470 }, { "epoch": 0.04945114880998073, - "grad_norm": 281.9906005859375, + "grad_norm": 136924.59375, "learning_rate": 4.896e-06, - "loss": 32.035, + "loss": 220131.675, "step": 24480 }, { "epoch": 0.04947134944266454, - "grad_norm": 221.22129821777344, + "grad_norm": 51859.390625, "learning_rate": 4.898e-06, - "loss": 59.1987, + "loss": 233501.425, "step": 24490 }, { "epoch": 0.04949155007534836, - "grad_norm": 456.32635498046875, + "grad_norm": 64099.05859375, "learning_rate": 4.9000000000000005e-06, - "loss": 31.1111, + "loss": 165654.95, "step": 24500 }, { "epoch": 0.049511750708032175, - "grad_norm": 483.26934814453125, + "grad_norm": 314680.09375, "learning_rate": 4.902000000000001e-06, - "loss": 43.1372, + "loss": 335220.925, "step": 24510 }, { "epoch": 0.04953195134071599, - "grad_norm": 48.44151306152344, + "grad_norm": 118885.1796875, "learning_rate": 4.904000000000001e-06, - "loss": 26.2799, + "loss": 105522.3, "step": 24520 }, { "epoch": 0.04955215197339981, - "grad_norm": 1512.83984375, + "grad_norm": 20980.859375, "learning_rate": 4.906e-06, - "loss": 27.6163, + "loss": 148338.6125, "step": 24530 }, { "epoch": 0.04957235260608362, - "grad_norm": 285.0791931152344, + "grad_norm": 300738.96875, "learning_rate": 4.908e-06, - "loss": 28.3728, + "loss": 174350.475, "step": 24540 }, { "epoch": 0.04959255323876744, - "grad_norm": 787.2332763671875, + "grad_norm": 210993.3125, "learning_rate": 4.9100000000000004e-06, - "loss": 23.7764, + "loss": 124817.075, "step": 24550 }, { "epoch": 0.049612753871451254, - "grad_norm": 205.12551879882812, + "grad_norm": 25064.234375, "learning_rate": 4.9120000000000006e-06, - "loss": 41.47, + "loss": 285263.025, "step": 24560 }, { "epoch": 0.04963295450413507, - "grad_norm": 135.7936553955078, + "grad_norm": 18799.13671875, "learning_rate": 4.914000000000001e-06, - "loss": 23.9787, + "loss": 170290.1875, "step": 24570 }, { "epoch": 0.04965315513681889, - "grad_norm": 165.5546417236328, + "grad_norm": 68497.65625, "learning_rate": 4.916e-06, - "loss": 19.4133, + "loss": 88904.9438, "step": 24580 }, { "epoch": 0.0496733557695027, - "grad_norm": 320.9225769042969, + "grad_norm": 139329.984375, "learning_rate": 4.918e-06, - "loss": 44.5957, + "loss": 364620.675, "step": 24590 }, { "epoch": 0.04969355640218651, - "grad_norm": 201.8945770263672, + "grad_norm": 307823.78125, "learning_rate": 4.92e-06, - "loss": 26.1929, + "loss": 176874.75, "step": 24600 }, { "epoch": 0.04971375703487033, - "grad_norm": 239.5926055908203, + "grad_norm": 160917.015625, "learning_rate": 4.9220000000000005e-06, - "loss": 24.9607, + "loss": 103213.075, "step": 24610 }, { "epoch": 0.049733957667554146, - "grad_norm": 512.6834716796875, + "grad_norm": 285696.84375, "learning_rate": 4.924000000000001e-06, - "loss": 28.6257, + "loss": 173746.25, "step": 24620 }, { "epoch": 0.049754158300237966, - "grad_norm": 264.6862487792969, + "grad_norm": 124781.0703125, "learning_rate": 4.926e-06, - "loss": 18.0654, + "loss": 112600.2125, "step": 24630 }, { "epoch": 0.04977435893292178, - "grad_norm": 467.5845642089844, + "grad_norm": 555115.875, "learning_rate": 4.928000000000001e-06, - "loss": 32.0786, + "loss": 137203.975, "step": 24640 }, { "epoch": 0.04979455956560559, - "grad_norm": 315.939453125, + "grad_norm": 95052.0390625, "learning_rate": 4.93e-06, - "loss": 34.9802, + "loss": 180327.175, "step": 24650 }, { "epoch": 0.04981476019828941, - "grad_norm": 422.5688171386719, + "grad_norm": 79893.6796875, "learning_rate": 4.932e-06, - "loss": 30.9701, + "loss": 153687.6625, "step": 24660 }, { "epoch": 0.049834960830973225, - "grad_norm": 670.5335083007812, + "grad_norm": 28430.619140625, "learning_rate": 4.9340000000000005e-06, - "loss": 59.6177, + "loss": 502218.25, "step": 24670 }, { "epoch": 0.04985516146365704, - "grad_norm": 159.326904296875, + "grad_norm": 19246.798828125, "learning_rate": 4.936e-06, - "loss": 33.8358, + "loss": 225470.275, "step": 24680 }, { "epoch": 0.04987536209634086, - "grad_norm": 517.6242065429688, + "grad_norm": 356947.46875, "learning_rate": 4.938000000000001e-06, - "loss": 28.0662, + "loss": 159612.8625, "step": 24690 }, { "epoch": 0.04989556272902467, - "grad_norm": 172.5724334716797, + "grad_norm": 27122.91796875, "learning_rate": 4.94e-06, - "loss": 43.5524, + "loss": 335856.275, "step": 24700 }, { "epoch": 0.04991576336170849, - "grad_norm": 31.741701126098633, + "grad_norm": 96829.453125, "learning_rate": 4.942e-06, - "loss": 19.8362, + "loss": 106659.975, "step": 24710 }, { "epoch": 0.049935963994392304, - "grad_norm": 451.263671875, + "grad_norm": 42875.01953125, "learning_rate": 4.9440000000000004e-06, - "loss": 49.2625, + "loss": 312233.575, "step": 24720 }, { "epoch": 0.04995616462707612, - "grad_norm": 142.236083984375, + "grad_norm": 23772.19140625, "learning_rate": 4.946000000000001e-06, - "loss": 41.2436, + "loss": 335629.175, "step": 24730 }, { "epoch": 0.04997636525975994, - "grad_norm": 292.59075927734375, + "grad_norm": 227523.078125, "learning_rate": 4.948000000000001e-06, - "loss": 26.0532, + "loss": 208491.425, "step": 24740 }, { "epoch": 0.04999656589244375, - "grad_norm": 103.89948272705078, + "grad_norm": 36901.61328125, "learning_rate": 4.95e-06, - "loss": 28.4878, + "loss": 106469.1625, "step": 24750 }, { "epoch": 0.050016766525127564, - "grad_norm": 245.5137481689453, + "grad_norm": 20129.46875, "learning_rate": 4.952e-06, - "loss": 47.3051, + "loss": 206754.7125, "step": 24760 }, { "epoch": 0.050036967157811384, - "grad_norm": 1218.4417724609375, + "grad_norm": 1176278.5, "learning_rate": 4.954e-06, - "loss": 55.935, + "loss": 423942.6, "step": 24770 }, { "epoch": 0.0500571677904952, - "grad_norm": 534.2709350585938, + "grad_norm": 427123.25, "learning_rate": 4.9560000000000005e-06, - "loss": 41.0981, + "loss": 170401.1375, "step": 24780 }, { "epoch": 0.05007736842317902, - "grad_norm": 277.80224609375, + "grad_norm": 9555.595703125, "learning_rate": 4.958000000000001e-06, - "loss": 43.365, + "loss": 207559.7625, "step": 24790 }, { "epoch": 0.05009756905586283, - "grad_norm": 190.49661254882812, + "grad_norm": 8899.623046875, "learning_rate": 4.960000000000001e-06, - "loss": 23.9928, + "loss": 48307.6781, "step": 24800 }, { "epoch": 0.05011776968854664, - "grad_norm": 380.3485107421875, + "grad_norm": 38808.83203125, "learning_rate": 4.962e-06, - "loss": 30.1288, + "loss": 320238.825, "step": 24810 }, { "epoch": 0.05013797032123046, - "grad_norm": 207.34530639648438, + "grad_norm": 20134.978515625, "learning_rate": 4.964e-06, - "loss": 56.9578, + "loss": 265376.1, "step": 24820 }, { "epoch": 0.050158170953914276, - "grad_norm": 716.9381713867188, + "grad_norm": 821762.875, "learning_rate": 4.966e-06, - "loss": 36.0246, + "loss": 235816.05, "step": 24830 }, { "epoch": 0.05017837158659809, - "grad_norm": 430.68359375, + "grad_norm": 630361.625, "learning_rate": 4.9680000000000005e-06, - "loss": 36.4686, + "loss": 281904.45, "step": 24840 }, { "epoch": 0.05019857221928191, - "grad_norm": 409.4767150878906, + "grad_norm": 254491.640625, "learning_rate": 4.970000000000001e-06, - "loss": 50.6373, + "loss": 328212.2, "step": 24850 }, { "epoch": 0.05021877285196572, - "grad_norm": 383.60711669921875, + "grad_norm": 155271.953125, "learning_rate": 4.972e-06, - "loss": 34.2077, + "loss": 169921.4, "step": 24860 }, { "epoch": 0.05023897348464954, - "grad_norm": 630.3153686523438, + "grad_norm": 723850.6875, "learning_rate": 4.974e-06, - "loss": 49.5778, + "loss": 273138.05, "step": 24870 }, { "epoch": 0.050259174117333355, - "grad_norm": 836.9533081054688, + "grad_norm": 1634573.875, "learning_rate": 4.976e-06, - "loss": 44.2279, + "loss": 307730.375, "step": 24880 }, { "epoch": 0.05027937475001717, - "grad_norm": 202.4707794189453, + "grad_norm": 8210.0966796875, "learning_rate": 4.9780000000000005e-06, - "loss": 36.9321, + "loss": 209845.1, "step": 24890 }, { "epoch": 0.05029957538270099, - "grad_norm": 270.40838623046875, + "grad_norm": 178655.015625, "learning_rate": 4.980000000000001e-06, - "loss": 24.9977, + "loss": 187184.7125, "step": 24900 }, { "epoch": 0.0503197760153848, - "grad_norm": 199.77508544921875, + "grad_norm": 38827.71484375, "learning_rate": 4.982e-06, - "loss": 21.765, + "loss": 75457.2312, "step": 24910 }, { "epoch": 0.050339976648068614, - "grad_norm": 278.4015808105469, + "grad_norm": 27151.556640625, "learning_rate": 4.984000000000001e-06, - "loss": 33.6428, + "loss": 86535.8, "step": 24920 }, { "epoch": 0.050360177280752434, - "grad_norm": 190.54000854492188, + "grad_norm": 38248.34375, "learning_rate": 4.986e-06, - "loss": 14.8352, + "loss": 89260.3625, "step": 24930 }, { "epoch": 0.05038037791343625, - "grad_norm": 28.801469802856445, + "grad_norm": 7080.85595703125, "learning_rate": 4.988e-06, - "loss": 13.3349, + "loss": 100053.4438, "step": 24940 }, { "epoch": 0.05040057854612007, - "grad_norm": 86.93453216552734, + "grad_norm": 6423.9755859375, "learning_rate": 4.9900000000000005e-06, - "loss": 35.9467, + "loss": 243399.525, "step": 24950 }, { "epoch": 0.05042077917880388, - "grad_norm": 88.48124694824219, + "grad_norm": 104537.8671875, "learning_rate": 4.992e-06, - "loss": 34.9635, + "loss": 212932.875, "step": 24960 }, { "epoch": 0.05044097981148769, - "grad_norm": 308.2735595703125, + "grad_norm": 145424.703125, "learning_rate": 4.994000000000001e-06, - "loss": 28.927, + "loss": 177549.5875, "step": 24970 }, { "epoch": 0.05046118044417151, - "grad_norm": 332.31890869140625, + "grad_norm": 20151.779296875, "learning_rate": 4.996e-06, - "loss": 40.6573, + "loss": 342648.875, "step": 24980 }, { "epoch": 0.050481381076855326, - "grad_norm": 179.0585174560547, + "grad_norm": 14602.51171875, "learning_rate": 4.998e-06, - "loss": 39.3382, + "loss": 237215.825, "step": 24990 }, { "epoch": 0.05050158170953914, - "grad_norm": 401.5177307128906, + "grad_norm": 13727.271484375, "learning_rate": 5e-06, - "loss": 18.8979, + "loss": 134369.9125, "step": 25000 }, { "epoch": 0.05052178234222296, - "grad_norm": 126.90967559814453, + "grad_norm": 1588.2701416015625, "learning_rate": 5.0020000000000006e-06, - "loss": 24.5952, + "loss": 138786.45, "step": 25010 }, { "epoch": 0.05054198297490677, - "grad_norm": 203.88487243652344, + "grad_norm": 125598.4453125, "learning_rate": 5.004e-06, - "loss": 35.7379, + "loss": 194614.025, "step": 25020 }, { "epoch": 0.05056218360759059, - "grad_norm": 31.458026885986328, + "grad_norm": 105158.875, "learning_rate": 5.006000000000001e-06, - "loss": 25.2942, + "loss": 179015.5125, "step": 25030 }, { "epoch": 0.050582384240274406, - "grad_norm": 278.30572509765625, + "grad_norm": 26436.498046875, "learning_rate": 5.008000000000001e-06, - "loss": 37.5499, + "loss": 224536.875, "step": 25040 }, { "epoch": 0.05060258487295822, - "grad_norm": 207.86512756347656, + "grad_norm": 26292.10546875, "learning_rate": 5.01e-06, - "loss": 26.2798, + "loss": 99923.2375, "step": 25050 }, { "epoch": 0.05062278550564204, - "grad_norm": 132.76596069335938, + "grad_norm": 10590.5947265625, "learning_rate": 5.0120000000000005e-06, - "loss": 25.1971, + "loss": 147185.1125, "step": 25060 }, { "epoch": 0.05064298613832585, - "grad_norm": 257.3799133300781, + "grad_norm": 20305.330078125, "learning_rate": 5.014e-06, - "loss": 23.6105, + "loss": 58354.475, "step": 25070 }, { "epoch": 0.050663186771009665, - "grad_norm": 1644.5107421875, + "grad_norm": 444099.84375, "learning_rate": 5.016000000000001e-06, - "loss": 63.1909, + "loss": 232351.9, "step": 25080 }, { "epoch": 0.050683387403693485, - "grad_norm": 963.2476196289062, + "grad_norm": 1126664.375, "learning_rate": 5.018000000000001e-06, - "loss": 55.8948, + "loss": 382552.0, "step": 25090 }, { "epoch": 0.0507035880363773, - "grad_norm": 219.97930908203125, + "grad_norm": 33603.76953125, "learning_rate": 5.02e-06, - "loss": 16.093, + "loss": 100591.3687, "step": 25100 }, { "epoch": 0.05072378866906112, - "grad_norm": 198.54092407226562, + "grad_norm": 114549.3828125, "learning_rate": 5.022e-06, - "loss": 28.0116, + "loss": 204203.1125, "step": 25110 }, { "epoch": 0.05074398930174493, - "grad_norm": 9.563661575317383, + "grad_norm": 3068.76220703125, "learning_rate": 5.024e-06, - "loss": 36.7111, + "loss": 253111.4, "step": 25120 }, { "epoch": 0.050764189934428744, - "grad_norm": 410.5440979003906, + "grad_norm": 176515.96875, "learning_rate": 5.026000000000001e-06, - "loss": 68.4753, + "loss": 271487.825, "step": 25130 }, { "epoch": 0.050784390567112564, - "grad_norm": 264.5072326660156, + "grad_norm": 270048.625, "learning_rate": 5.028000000000001e-06, - "loss": 30.7421, + "loss": 163224.5875, "step": 25140 }, { "epoch": 0.05080459119979638, "grad_norm": 0.0, "learning_rate": 5.03e-06, - "loss": 45.6329, + "loss": 266125.075, "step": 25150 }, { "epoch": 0.05082479183248019, - "grad_norm": 197.56336975097656, + "grad_norm": 26598.6953125, "learning_rate": 5.032e-06, - "loss": 26.0097, + "loss": 98978.6875, "step": 25160 }, { "epoch": 0.05084499246516401, - "grad_norm": 95.28469848632812, + "grad_norm": 162520.484375, "learning_rate": 5.0339999999999996e-06, - "loss": 71.7765, + "loss": 280335.975, "step": 25170 }, { "epoch": 0.05086519309784782, - "grad_norm": 226.5924072265625, + "grad_norm": 26455.357421875, "learning_rate": 5.0360000000000006e-06, - "loss": 31.2543, + "loss": 258680.425, "step": 25180 }, { "epoch": 0.05088539373053164, - "grad_norm": 196.97518920898438, + "grad_norm": 41511.4453125, "learning_rate": 5.038000000000001e-06, - "loss": 22.2428, + "loss": 154017.0, "step": 25190 }, { "epoch": 0.050905594363215456, - "grad_norm": 370.3966369628906, + "grad_norm": 174240.796875, "learning_rate": 5.04e-06, - "loss": 23.8425, + "loss": 104668.975, "step": 25200 }, { "epoch": 0.05092579499589927, - "grad_norm": 319.02447509765625, + "grad_norm": 26312.576171875, "learning_rate": 5.042e-06, - "loss": 21.7254, + "loss": 78976.5875, "step": 25210 }, { "epoch": 0.05094599562858309, - "grad_norm": 281.3335876464844, + "grad_norm": 144433.671875, "learning_rate": 5.044e-06, - "loss": 41.2796, + "loss": 171961.6, "step": 25220 }, { "epoch": 0.0509661962612669, - "grad_norm": 1281.8736572265625, + "grad_norm": 1787067.25, "learning_rate": 5.0460000000000005e-06, - "loss": 52.3635, + "loss": 379097.45, "step": 25230 }, { "epoch": 0.050986396893950715, - "grad_norm": 298.2367858886719, + "grad_norm": 36843.25, "learning_rate": 5.048000000000001e-06, - "loss": 41.1969, + "loss": 312265.4, "step": 25240 }, { "epoch": 0.051006597526634535, - "grad_norm": 348.9700012207031, + "grad_norm": 142702.015625, "learning_rate": 5.050000000000001e-06, - "loss": 35.1046, + "loss": 205235.925, "step": 25250 }, { "epoch": 0.05102679815931835, - "grad_norm": 377.2474365234375, + "grad_norm": 171950.453125, "learning_rate": 5.052e-06, - "loss": 24.9522, + "loss": 178793.525, "step": 25260 }, { "epoch": 0.05104699879200217, - "grad_norm": 357.2254333496094, + "grad_norm": 9029.4677734375, "learning_rate": 5.054e-06, - "loss": 17.7826, + "loss": 57952.7812, "step": 25270 }, { "epoch": 0.05106719942468598, - "grad_norm": 178.88450622558594, + "grad_norm": 61416.8359375, "learning_rate": 5.056000000000001e-06, - "loss": 32.328, + "loss": 195928.9, "step": 25280 }, { "epoch": 0.051087400057369795, - "grad_norm": 73.72966766357422, + "grad_norm": 118745.4453125, "learning_rate": 5.0580000000000005e-06, - "loss": 39.4752, + "loss": 249751.225, "step": 25290 }, { "epoch": 0.051107600690053615, "grad_norm": 0.0, "learning_rate": 5.060000000000001e-06, - "loss": 42.9825, + "loss": 235968.35, "step": 25300 }, { "epoch": 0.05112780132273743, "grad_norm": 0.0, "learning_rate": 5.062e-06, - "loss": 19.2751, + "loss": 98508.2375, "step": 25310 }, { "epoch": 0.05114800195542124, - "grad_norm": 164.6295928955078, + "grad_norm": 484406.15625, "learning_rate": 5.064e-06, - "loss": 30.3619, + "loss": 194709.3625, "step": 25320 }, { "epoch": 0.05116820258810506, - "grad_norm": 107.75360107421875, + "grad_norm": 11116.5625, "learning_rate": 5.066000000000001e-06, - "loss": 36.0318, + "loss": 181933.65, "step": 25330 }, { "epoch": 0.051188403220788874, - "grad_norm": 386.89239501953125, + "grad_norm": 148612.796875, "learning_rate": 5.0680000000000004e-06, - "loss": 33.5402, + "loss": 153070.5125, "step": 25340 }, { "epoch": 0.051208603853472694, - "grad_norm": 350.92889404296875, + "grad_norm": 73726.0703125, "learning_rate": 5.070000000000001e-06, - "loss": 34.1303, + "loss": 157852.3625, "step": 25350 }, { "epoch": 0.05122880448615651, - "grad_norm": 494.48809814453125, + "grad_norm": 954959.1875, "learning_rate": 5.072e-06, - "loss": 28.6663, + "loss": 200037.6, "step": 25360 }, { "epoch": 0.05124900511884032, - "grad_norm": 149.0237579345703, + "grad_norm": 36866.81640625, "learning_rate": 5.074e-06, - "loss": 21.9641, + "loss": 72240.2937, "step": 25370 }, { "epoch": 0.05126920575152414, - "grad_norm": 409.06689453125, + "grad_norm": 123478.8359375, "learning_rate": 5.076000000000001e-06, - "loss": 46.543, + "loss": 303564.6, "step": 25380 }, { "epoch": 0.05128940638420795, - "grad_norm": 292.32781982421875, + "grad_norm": 48282.3515625, "learning_rate": 5.078e-06, - "loss": 26.6015, + "loss": 116724.3125, "step": 25390 }, { "epoch": 0.051309607016891766, - "grad_norm": 115.6833267211914, + "grad_norm": 24874.3125, "learning_rate": 5.0800000000000005e-06, - "loss": 34.6124, + "loss": 197594.9125, "step": 25400 }, { "epoch": 0.051329807649575586, - "grad_norm": 152.78005981445312, + "grad_norm": 23997.935546875, "learning_rate": 5.082000000000001e-06, - "loss": 20.7547, + "loss": 78871.3313, "step": 25410 }, { "epoch": 0.0513500082822594, - "grad_norm": 274.0210876464844, + "grad_norm": 43314.53125, "learning_rate": 5.084e-06, - "loss": 30.4251, + "loss": 103144.775, "step": 25420 }, { "epoch": 0.05137020891494322, - "grad_norm": 287.94451904296875, + "grad_norm": 81364.5859375, "learning_rate": 5.086000000000001e-06, - "loss": 17.8743, + "loss": 122690.05, "step": 25430 }, { "epoch": 0.05139040954762703, - "grad_norm": 363.70941162109375, + "grad_norm": 184980.9375, "learning_rate": 5.088000000000001e-06, - "loss": 20.8506, + "loss": 110456.7625, "step": 25440 }, { "epoch": 0.051410610180310845, - "grad_norm": 679.3761596679688, + "grad_norm": 660342.3125, "learning_rate": 5.09e-06, - "loss": 34.454, + "loss": 199093.5, "step": 25450 }, { "epoch": 0.051430810812994665, - "grad_norm": 366.37115478515625, + "grad_norm": 97768.4296875, "learning_rate": 5.0920000000000005e-06, - "loss": 26.0402, + "loss": 127075.8625, "step": 25460 }, { "epoch": 0.05145101144567848, - "grad_norm": 491.2541198730469, + "grad_norm": 288051.125, "learning_rate": 5.094e-06, - "loss": 25.1115, + "loss": 225884.975, "step": 25470 }, { "epoch": 0.05147121207836229, - "grad_norm": 479.7918701171875, + "grad_norm": 540998.4375, "learning_rate": 5.096000000000001e-06, - "loss": 30.0957, + "loss": 183354.3375, "step": 25480 }, { "epoch": 0.05149141271104611, - "grad_norm": 84.84467315673828, + "grad_norm": 24153.423828125, "learning_rate": 5.098000000000001e-06, - "loss": 35.6397, + "loss": 438626.95, "step": 25490 }, { "epoch": 0.051511613343729924, - "grad_norm": 741.4186401367188, + "grad_norm": 7466.04248046875, "learning_rate": 5.1e-06, - "loss": 19.2858, + "loss": 75893.3813, "step": 25500 }, { "epoch": 0.051531813976413744, - "grad_norm": 380.09454345703125, + "grad_norm": 330271.78125, "learning_rate": 5.1020000000000004e-06, - "loss": 45.9541, + "loss": 376687.475, "step": 25510 }, { "epoch": 0.05155201460909756, - "grad_norm": 265.50042724609375, + "grad_norm": 25328.001953125, "learning_rate": 5.104e-06, - "loss": 32.1022, + "loss": 225915.275, "step": 25520 }, { "epoch": 0.05157221524178137, - "grad_norm": 543.1854858398438, + "grad_norm": 431035.75, "learning_rate": 5.106000000000001e-06, - "loss": 47.4233, + "loss": 299984.3, "step": 25530 }, { "epoch": 0.05159241587446519, - "grad_norm": 977.0132446289062, + "grad_norm": 613788.75, "learning_rate": 5.108000000000001e-06, - "loss": 52.7017, + "loss": 362494.525, "step": 25540 }, { "epoch": 0.051612616507149, - "grad_norm": 234.1029815673828, + "grad_norm": 51250.18359375, "learning_rate": 5.11e-06, - "loss": 39.0079, + "loss": 182429.0, "step": 25550 }, { "epoch": 0.051632817139832816, - "grad_norm": 133.4276885986328, + "grad_norm": 10250.611328125, "learning_rate": 5.112e-06, - "loss": 38.9247, + "loss": 182064.6875, "step": 25560 }, { "epoch": 0.051653017772516636, - "grad_norm": 562.444091796875, + "grad_norm": 829442.25, "learning_rate": 5.114e-06, - "loss": 47.2719, + "loss": 215986.45, "step": 25570 }, { "epoch": 0.05167321840520045, - "grad_norm": 243.87353515625, + "grad_norm": 91384.59375, "learning_rate": 5.116000000000001e-06, - "loss": 39.1346, + "loss": 119246.7625, "step": 25580 }, { "epoch": 0.05169341903788427, - "grad_norm": 354.45098876953125, + "grad_norm": 365554.9375, "learning_rate": 5.118000000000001e-06, - "loss": 57.6305, + "loss": 214266.775, "step": 25590 }, { "epoch": 0.05171361967056808, - "grad_norm": 194.29457092285156, + "grad_norm": 18747.787109375, "learning_rate": 5.12e-06, - "loss": 24.6174, + "loss": 123327.8125, "step": 25600 }, { "epoch": 0.051733820303251896, - "grad_norm": 237.46725463867188, + "grad_norm": 77922.734375, "learning_rate": 5.122e-06, - "loss": 33.0616, + "loss": 282575.825, "step": 25610 }, { "epoch": 0.051754020935935716, - "grad_norm": 120.91265869140625, + "grad_norm": 9667.7705078125, "learning_rate": 5.124e-06, - "loss": 57.4991, + "loss": 528718.35, "step": 25620 }, { "epoch": 0.05177422156861953, - "grad_norm": 809.0008544921875, + "grad_norm": 1143758.25, "learning_rate": 5.126e-06, - "loss": 41.0335, + "loss": 261946.1, "step": 25630 }, { "epoch": 0.05179442220130334, - "grad_norm": 258.4845275878906, + "grad_norm": 155869.171875, "learning_rate": 5.128000000000001e-06, - "loss": 26.7815, + "loss": 210103.8, "step": 25640 }, { "epoch": 0.05181462283398716, - "grad_norm": 251.044677734375, + "grad_norm": 62702.8515625, "learning_rate": 5.130000000000001e-06, - "loss": 50.3299, + "loss": 213912.675, "step": 25650 }, { "epoch": 0.051834823466670975, - "grad_norm": 314.73687744140625, + "grad_norm": 29887.810546875, "learning_rate": 5.132e-06, - "loss": 44.4338, + "loss": 261600.55, "step": 25660 }, { "epoch": 0.051855024099354795, - "grad_norm": 120.0523452758789, + "grad_norm": 24761.353515625, "learning_rate": 5.134e-06, - "loss": 29.2697, + "loss": 161718.3375, "step": 25670 }, { "epoch": 0.05187522473203861, - "grad_norm": 300.96966552734375, + "grad_norm": 13480.5, "learning_rate": 5.136e-06, - "loss": 34.9113, + "loss": 143136.7375, "step": 25680 }, { "epoch": 0.05189542536472242, - "grad_norm": 159.7620849609375, + "grad_norm": 20074.90234375, "learning_rate": 5.138000000000001e-06, - "loss": 22.3714, + "loss": 117870.2, "step": 25690 }, { "epoch": 0.05191562599740624, - "grad_norm": 408.8251037597656, + "grad_norm": 71217.96875, "learning_rate": 5.140000000000001e-06, - "loss": 37.7346, + "loss": 353143.525, "step": 25700 }, { "epoch": 0.051935826630090054, - "grad_norm": 1339.5537109375, + "grad_norm": 300128.0625, "learning_rate": 5.142e-06, - "loss": 30.9084, + "loss": 175699.675, "step": 25710 }, { "epoch": 0.05195602726277387, - "grad_norm": 195.84228515625, + "grad_norm": 8646.8408203125, "learning_rate": 5.144e-06, - "loss": 21.4031, + "loss": 167022.9375, "step": 25720 }, { "epoch": 0.05197622789545769, - "grad_norm": 283.7611999511719, + "grad_norm": 45141.6796875, "learning_rate": 5.1459999999999995e-06, - "loss": 31.4553, + "loss": 142192.425, "step": 25730 }, { "epoch": 0.0519964285281415, - "grad_norm": 406.22857666015625, + "grad_norm": 586271.5, "learning_rate": 5.1480000000000005e-06, - "loss": 34.3794, + "loss": 240436.2, "step": 25740 }, { "epoch": 0.05201662916082532, - "grad_norm": 1262.8756103515625, + "grad_norm": 117848.0234375, "learning_rate": 5.150000000000001e-06, - "loss": 36.6906, + "loss": 245883.55, "step": 25750 }, { "epoch": 0.05203682979350913, - "grad_norm": 40.364742279052734, + "grad_norm": 902.4866943359375, "learning_rate": 5.152e-06, - "loss": 32.7776, + "loss": 169885.0625, "step": 25760 }, { "epoch": 0.052057030426192946, - "grad_norm": 217.0058135986328, + "grad_norm": 81505.6015625, "learning_rate": 5.154e-06, - "loss": 30.9879, + "loss": 208023.7875, "step": 25770 }, { "epoch": 0.052077231058876766, - "grad_norm": 1036.8714599609375, + "grad_norm": 0.0, "learning_rate": 5.156e-06, - "loss": 34.4645, + "loss": 53045.375, "step": 25780 }, { "epoch": 0.05209743169156058, - "grad_norm": 394.1192626953125, + "grad_norm": 75648.5625, "learning_rate": 5.158e-06, - "loss": 47.2173, + "loss": 159900.825, "step": 25790 }, { "epoch": 0.05211763232424439, - "grad_norm": 172.93893432617188, + "grad_norm": 7587.94482421875, "learning_rate": 5.1600000000000006e-06, - "loss": 27.2692, + "loss": 103219.2312, "step": 25800 }, { "epoch": 0.05213783295692821, - "grad_norm": 331.1111755371094, + "grad_norm": 23899.544921875, "learning_rate": 5.162000000000001e-06, - "loss": 38.2051, + "loss": 238771.525, "step": 25810 }, { "epoch": 0.052158033589612025, - "grad_norm": 62.95143508911133, + "grad_norm": 29328.091796875, "learning_rate": 5.164e-06, - "loss": 35.2421, + "loss": 220910.575, "step": 25820 }, { "epoch": 0.052178234222295845, - "grad_norm": 206.21441650390625, + "grad_norm": 100886.6484375, "learning_rate": 5.166e-06, - "loss": 38.8744, + "loss": 297657.8, "step": 25830 }, { "epoch": 0.05219843485497966, - "grad_norm": 534.4074096679688, + "grad_norm": 957283.1875, "learning_rate": 5.168000000000001e-06, - "loss": 45.46, + "loss": 352892.95, "step": 25840 }, { "epoch": 0.05221863548766347, - "grad_norm": 190.77833557128906, + "grad_norm": 30210.68359375, "learning_rate": 5.1700000000000005e-06, - "loss": 36.677, + "loss": 175579.1375, "step": 25850 }, { "epoch": 0.05223883612034729, - "grad_norm": 115.88018798828125, + "grad_norm": 78609.9375, "learning_rate": 5.172000000000001e-06, - "loss": 37.8392, + "loss": 318625.475, "step": 25860 }, { "epoch": 0.052259036753031105, - "grad_norm": 408.080078125, + "grad_norm": 427091.625, "learning_rate": 5.174e-06, - "loss": 32.2986, + "loss": 163341.525, "step": 25870 }, { "epoch": 0.05227923738571492, - "grad_norm": 274.748779296875, + "grad_norm": 62483.2421875, "learning_rate": 5.176e-06, - "loss": 39.0948, + "loss": 320273.4, "step": 25880 }, { "epoch": 0.05229943801839874, - "grad_norm": 236.39987182617188, + "grad_norm": 380198.0625, "learning_rate": 5.178000000000001e-06, - "loss": 29.9548, + "loss": 174152.475, "step": 25890 }, { "epoch": 0.05231963865108255, - "grad_norm": 579.6231079101562, + "grad_norm": 107963.5625, "learning_rate": 5.18e-06, - "loss": 46.9257, + "loss": 326810.15, "step": 25900 }, { "epoch": 0.05233983928376637, - "grad_norm": 159.98565673828125, + "grad_norm": 12469.6435546875, "learning_rate": 5.1820000000000005e-06, - "loss": 51.2098, + "loss": 277246.375, "step": 25910 }, { "epoch": 0.052360039916450184, - "grad_norm": 481.517578125, + "grad_norm": 882569.0, "learning_rate": 5.184e-06, - "loss": 38.5569, + "loss": 266196.5, "step": 25920 }, { "epoch": 0.052380240549134, - "grad_norm": 218.76170349121094, + "grad_norm": 61912.4375, "learning_rate": 5.186e-06, - "loss": 34.3429, + "loss": 228704.1, "step": 25930 }, { "epoch": 0.05240044118181782, - "grad_norm": 143.18772888183594, + "grad_norm": 29344.42578125, "learning_rate": 5.188000000000001e-06, - "loss": 28.4076, + "loss": 355023.05, "step": 25940 }, { "epoch": 0.05242064181450163, - "grad_norm": 273.875244140625, + "grad_norm": 109166.078125, "learning_rate": 5.19e-06, - "loss": 18.0686, + "loss": 88604.9, "step": 25950 }, { "epoch": 0.05244084244718544, - "grad_norm": 219.48605346679688, + "grad_norm": 5392.80029296875, "learning_rate": 5.1920000000000004e-06, - "loss": 28.1385, + "loss": 98628.3188, "step": 25960 }, { "epoch": 0.05246104307986926, - "grad_norm": 32.729549407958984, + "grad_norm": 1724.328369140625, "learning_rate": 5.194e-06, - "loss": 14.7589, + "loss": 61481.8938, "step": 25970 }, { "epoch": 0.052481243712553076, - "grad_norm": 244.2727508544922, + "grad_norm": 61635.04296875, "learning_rate": 5.196e-06, - "loss": 28.1004, + "loss": 91608.55, "step": 25980 }, { "epoch": 0.052501444345236896, - "grad_norm": 63.02434539794922, + "grad_norm": 5503.791015625, "learning_rate": 5.198000000000001e-06, - "loss": 29.5537, + "loss": 60566.9313, "step": 25990 }, { "epoch": 0.05252164497792071, - "grad_norm": 315.0021667480469, + "grad_norm": 61934.30078125, "learning_rate": 5.2e-06, - "loss": 25.2316, + "loss": 111545.325, "step": 26000 }, { "epoch": 0.05254184561060452, - "grad_norm": 412.6581115722656, + "grad_norm": 535418.625, "learning_rate": 5.202e-06, - "loss": 29.1501, + "loss": 159517.5, "step": 26010 }, { "epoch": 0.05256204624328834, - "grad_norm": 495.1465148925781, + "grad_norm": 436808.09375, "learning_rate": 5.2040000000000005e-06, - "loss": 40.651, + "loss": 267469.0, "step": 26020 }, { "epoch": 0.052582246875972155, - "grad_norm": 251.30874633789062, + "grad_norm": 62824.35546875, "learning_rate": 5.206e-06, - "loss": 24.8458, + "loss": 127481.5375, "step": 26030 }, { "epoch": 0.05260244750865597, - "grad_norm": 340.6131896972656, + "grad_norm": 273621.84375, "learning_rate": 5.208000000000001e-06, - "loss": 27.7611, + "loss": 283101.125, "step": 26040 }, { "epoch": 0.05262264814133979, - "grad_norm": 333.89617919921875, + "grad_norm": 413390.21875, "learning_rate": 5.210000000000001e-06, - "loss": 19.1768, + "loss": 144707.3, "step": 26050 }, { "epoch": 0.0526428487740236, - "grad_norm": 104.84229278564453, + "grad_norm": 14754.4853515625, "learning_rate": 5.212e-06, - "loss": 41.2576, + "loss": 302473.525, "step": 26060 }, { "epoch": 0.05266304940670742, - "grad_norm": 365.7760925292969, + "grad_norm": 52560.31640625, "learning_rate": 5.214e-06, - "loss": 26.6519, + "loss": 117275.95, "step": 26070 }, { "epoch": 0.052683250039391234, - "grad_norm": 535.5447998046875, + "grad_norm": 429896.3125, "learning_rate": 5.216e-06, - "loss": 33.7481, + "loss": 290092.675, "step": 26080 }, { "epoch": 0.05270345067207505, - "grad_norm": 367.01727294921875, + "grad_norm": 72828.9453125, "learning_rate": 5.218000000000001e-06, - "loss": 33.4843, + "loss": 106867.7375, "step": 26090 }, { "epoch": 0.05272365130475887, - "grad_norm": 122.15428161621094, + "grad_norm": 23054.4765625, "learning_rate": 5.220000000000001e-06, - "loss": 14.3232, + "loss": 53356.8313, "step": 26100 }, { "epoch": 0.05274385193744268, - "grad_norm": 409.5092468261719, + "grad_norm": 165641.46875, "learning_rate": 5.222e-06, - "loss": 34.1983, + "loss": 86914.2688, "step": 26110 }, { "epoch": 0.05276405257012649, - "grad_norm": 290.99560546875, + "grad_norm": 76364.078125, "learning_rate": 5.224e-06, - "loss": 35.0679, + "loss": 201528.6875, "step": 26120 }, { "epoch": 0.05278425320281031, - "grad_norm": 5402.544921875, + "grad_norm": 203256.640625, "learning_rate": 5.226e-06, - "loss": 55.7997, + "loss": 221695.775, "step": 26130 }, { "epoch": 0.052804453835494126, "grad_norm": 0.0, "learning_rate": 5.228000000000001e-06, - "loss": 54.6845, + "loss": 335705.675, "step": 26140 }, { "epoch": 0.052824654468177946, - "grad_norm": 121.34117126464844, + "grad_norm": 51113.16796875, "learning_rate": 5.230000000000001e-06, - "loss": 28.876, + "loss": 173226.7625, "step": 26150 }, { "epoch": 0.05284485510086176, - "grad_norm": 357.8832092285156, + "grad_norm": 79905.0703125, "learning_rate": 5.232e-06, - "loss": 30.5092, + "loss": 114188.9125, "step": 26160 }, { "epoch": 0.05286505573354557, - "grad_norm": 311.82489013671875, + "grad_norm": 103999.25, "learning_rate": 5.234e-06, - "loss": 39.6837, + "loss": 217416.2, "step": 26170 }, { "epoch": 0.05288525636622939, - "grad_norm": 37.93082809448242, + "grad_norm": 7198.185546875, "learning_rate": 5.236e-06, - "loss": 13.6538, + "loss": 41921.9531, "step": 26180 }, { "epoch": 0.052905456998913206, - "grad_norm": 345.971435546875, + "grad_norm": 12707.7685546875, "learning_rate": 5.2380000000000005e-06, - "loss": 24.8791, + "loss": 180388.6875, "step": 26190 }, { "epoch": 0.05292565763159702, - "grad_norm": 175.8555145263672, + "grad_norm": 6060.2939453125, "learning_rate": 5.240000000000001e-06, - "loss": 25.0739, + "loss": 123967.0875, "step": 26200 }, { "epoch": 0.05294585826428084, - "grad_norm": 614.9837646484375, + "grad_norm": 592384.125, "learning_rate": 5.242000000000001e-06, - "loss": 28.5274, + "loss": 182326.0, "step": 26210 }, { "epoch": 0.05296605889696465, - "grad_norm": 262.697509765625, + "grad_norm": 69057.9609375, "learning_rate": 5.244e-06, - "loss": 21.9201, + "loss": 123603.5875, "step": 26220 }, { "epoch": 0.05298625952964847, - "grad_norm": 277.9417724609375, + "grad_norm": 601.5535888671875, "learning_rate": 5.246e-06, - "loss": 33.9503, + "loss": 376871.875, "step": 26230 }, { "epoch": 0.053006460162332285, - "grad_norm": 508.5185852050781, + "grad_norm": 484327.21875, "learning_rate": 5.248000000000001e-06, - "loss": 21.1503, + "loss": 112456.475, "step": 26240 }, { "epoch": 0.0530266607950161, - "grad_norm": 85.74031066894531, + "grad_norm": 5033.17626953125, "learning_rate": 5.2500000000000006e-06, - "loss": 54.1428, + "loss": 250514.05, "step": 26250 }, { "epoch": 0.05304686142769992, - "grad_norm": 18.77439308166504, + "grad_norm": 5658.38427734375, "learning_rate": 5.252000000000001e-06, - "loss": 34.7449, + "loss": 132914.575, "step": 26260 }, { "epoch": 0.05306706206038373, - "grad_norm": 434.8307800292969, + "grad_norm": 135209.234375, "learning_rate": 5.254e-06, - "loss": 47.4321, + "loss": 340273.775, "step": 26270 }, { "epoch": 0.053087262693067544, - "grad_norm": 94.10435485839844, + "grad_norm": 5511.95166015625, "learning_rate": 5.256e-06, - "loss": 21.691, + "loss": 177776.4125, "step": 26280 }, { "epoch": 0.053107463325751364, - "grad_norm": 249.16969299316406, + "grad_norm": 10729.9658203125, "learning_rate": 5.258000000000001e-06, - "loss": 27.1049, + "loss": 213930.725, "step": 26290 }, { "epoch": 0.05312766395843518, - "grad_norm": 259.4500732421875, + "grad_norm": 65558.234375, "learning_rate": 5.2600000000000005e-06, - "loss": 41.8547, + "loss": 286358.7, "step": 26300 }, { "epoch": 0.053147864591119, - "grad_norm": 117.62743377685547, + "grad_norm": 6762.1025390625, "learning_rate": 5.262000000000001e-06, - "loss": 39.8521, + "loss": 191046.9625, "step": 26310 }, { "epoch": 0.05316806522380281, - "grad_norm": 718.5263061523438, + "grad_norm": 1467687.375, "learning_rate": 5.264e-06, - "loss": 55.0665, + "loss": 492894.4, "step": 26320 }, { "epoch": 0.05318826585648662, - "grad_norm": 386.2872009277344, + "grad_norm": 231693.671875, "learning_rate": 5.266e-06, - "loss": 31.59, + "loss": 266921.8, "step": 26330 }, { "epoch": 0.05320846648917044, - "grad_norm": 241.26043701171875, + "grad_norm": 17128.255859375, "learning_rate": 5.268000000000001e-06, - "loss": 29.2935, + "loss": 128311.85, "step": 26340 }, { "epoch": 0.053228667121854256, - "grad_norm": 734.0238647460938, + "grad_norm": 950382.375, "learning_rate": 5.27e-06, - "loss": 49.3361, + "loss": 265746.375, "step": 26350 }, { "epoch": 0.05324886775453807, - "grad_norm": 773.4956665039062, + "grad_norm": 823391.5, "learning_rate": 5.2720000000000005e-06, - "loss": 51.6634, + "loss": 331165.55, "step": 26360 }, { "epoch": 0.05326906838722189, - "grad_norm": 118.69739532470703, + "grad_norm": 18675.03515625, "learning_rate": 5.274e-06, - "loss": 28.0177, + "loss": 166269.8625, "step": 26370 }, { "epoch": 0.0532892690199057, - "grad_norm": 13.86148452758789, + "grad_norm": 80585.8125, "learning_rate": 5.276e-06, - "loss": 17.8887, + "loss": 127365.725, "step": 26380 }, { "epoch": 0.05330946965258952, - "grad_norm": 707.5802001953125, + "grad_norm": 432938.46875, "learning_rate": 5.278000000000001e-06, - "loss": 22.4515, + "loss": 112844.1875, "step": 26390 }, { "epoch": 0.053329670285273335, - "grad_norm": 293.2173156738281, + "grad_norm": 47061.38671875, "learning_rate": 5.28e-06, - "loss": 22.0472, + "loss": 149747.725, "step": 26400 }, { "epoch": 0.05334987091795715, - "grad_norm": 107.76779174804688, + "grad_norm": 17135.58203125, "learning_rate": 5.282e-06, - "loss": 28.122, + "loss": 130563.0375, "step": 26410 }, { "epoch": 0.05337007155064097, - "grad_norm": 319.65496826171875, + "grad_norm": 97073.65625, "learning_rate": 5.2840000000000006e-06, - "loss": 54.3718, + "loss": 423604.2, "step": 26420 }, { "epoch": 0.05339027218332478, - "grad_norm": 302.2618103027344, + "grad_norm": 117083.9296875, "learning_rate": 5.286e-06, - "loss": 18.9766, + "loss": 45923.4938, "step": 26430 }, { "epoch": 0.053410472816008595, - "grad_norm": 124.77485656738281, + "grad_norm": 19711.3203125, "learning_rate": 5.288000000000001e-06, - "loss": 18.7363, + "loss": 111860.925, "step": 26440 }, { "epoch": 0.053430673448692415, - "grad_norm": 19.75719451904297, + "grad_norm": 919.495361328125, "learning_rate": 5.290000000000001e-06, - "loss": 32.6725, + "loss": 293600.2, "step": 26450 }, { "epoch": 0.05345087408137623, - "grad_norm": 160.94912719726562, + "grad_norm": 31041.552734375, "learning_rate": 5.292e-06, - "loss": 43.9855, + "loss": 251877.175, "step": 26460 }, { "epoch": 0.05347107471406005, - "grad_norm": 303.1990661621094, + "grad_norm": 102984.0, "learning_rate": 5.2940000000000005e-06, - "loss": 49.4688, + "loss": 331021.675, "step": 26470 }, { "epoch": 0.05349127534674386, - "grad_norm": 438.0093078613281, + "grad_norm": 127707.3984375, "learning_rate": 5.296e-06, - "loss": 17.595, + "loss": 82888.425, "step": 26480 }, { "epoch": 0.053511475979427674, - "grad_norm": 265.2622985839844, + "grad_norm": 26926.275390625, "learning_rate": 5.298000000000001e-06, - "loss": 34.8372, + "loss": 194264.1625, "step": 26490 }, { "epoch": 0.053531676612111494, - "grad_norm": 342.2603759765625, + "grad_norm": 36639.1171875, "learning_rate": 5.300000000000001e-06, - "loss": 64.3463, + "loss": 190151.2375, "step": 26500 }, { "epoch": 0.05355187724479531, - "grad_norm": 297.47625732421875, + "grad_norm": 44145.4296875, "learning_rate": 5.302e-06, - "loss": 24.1028, + "loss": 166751.8375, "step": 26510 }, { "epoch": 0.05357207787747912, - "grad_norm": 217.04269409179688, + "grad_norm": 132443.671875, "learning_rate": 5.304e-06, - "loss": 25.0686, + "loss": 97547.2437, "step": 26520 }, { "epoch": 0.05359227851016294, - "grad_norm": 442.0998840332031, + "grad_norm": 106548.953125, "learning_rate": 5.306e-06, - "loss": 46.3262, + "loss": 193446.95, "step": 26530 }, { "epoch": 0.05361247914284675, - "grad_norm": 326.03790283203125, + "grad_norm": 1130302.125, "learning_rate": 5.308000000000001e-06, - "loss": 11.7794, + "loss": 151510.6375, "step": 26540 }, { "epoch": 0.05363267977553057, - "grad_norm": 453.63458251953125, + "grad_norm": 26640.197265625, "learning_rate": 5.310000000000001e-06, - "loss": 39.3804, + "loss": 277612.825, "step": 26550 }, { "epoch": 0.053652880408214386, - "grad_norm": 53.374000549316406, + "grad_norm": 97146.75, "learning_rate": 5.312e-06, - "loss": 36.1299, + "loss": 281926.575, "step": 26560 }, { "epoch": 0.0536730810408982, - "grad_norm": 269.9315490722656, + "grad_norm": 11702.3515625, "learning_rate": 5.314e-06, - "loss": 26.4243, + "loss": 163853.125, "step": 26570 }, { "epoch": 0.05369328167358202, - "grad_norm": 386.93377685546875, + "grad_norm": 73309.0, "learning_rate": 5.3160000000000004e-06, - "loss": 28.9528, + "loss": 74291.325, "step": 26580 }, { "epoch": 0.05371348230626583, - "grad_norm": 253.3940887451172, + "grad_norm": 42692.0625, "learning_rate": 5.318000000000001e-06, - "loss": 30.4764, + "loss": 173483.7875, "step": 26590 }, { "epoch": 0.053733682938949645, "grad_norm": 0.0, "learning_rate": 5.320000000000001e-06, - "loss": 38.4537, + "loss": 325556.725, "step": 26600 }, { "epoch": 0.053753883571633465, - "grad_norm": 417.5376892089844, + "grad_norm": 47996.55859375, "learning_rate": 5.322000000000001e-06, - "loss": 25.8576, + "loss": 150749.325, "step": 26610 }, { "epoch": 0.05377408420431728, - "grad_norm": 199.4116668701172, + "grad_norm": 28416.990234375, "learning_rate": 5.324e-06, - "loss": 17.2241, + "loss": 81343.4, "step": 26620 }, { "epoch": 0.0537942848370011, - "grad_norm": 743.338134765625, + "grad_norm": 1013026.875, "learning_rate": 5.326e-06, - "loss": 33.0902, + "loss": 237877.35, "step": 26630 }, { "epoch": 0.05381448546968491, - "grad_norm": 156.85171508789062, + "grad_norm": 34752.0234375, "learning_rate": 5.328000000000001e-06, - "loss": 18.8293, + "loss": 152764.6625, "step": 26640 }, { "epoch": 0.053834686102368724, - "grad_norm": 406.0733947753906, + "grad_norm": 257996.171875, "learning_rate": 5.330000000000001e-06, - "loss": 47.1567, + "loss": 337893.425, "step": 26650 }, { "epoch": 0.053854886735052544, - "grad_norm": 206.8536834716797, + "grad_norm": 17745.84375, "learning_rate": 5.332000000000001e-06, - "loss": 58.3281, + "loss": 317401.675, "step": 26660 }, { "epoch": 0.05387508736773636, - "grad_norm": 291.2559814453125, + "grad_norm": 150808.796875, "learning_rate": 5.334e-06, - "loss": 35.1946, + "loss": 242034.525, "step": 26670 }, { "epoch": 0.05389528800042017, - "grad_norm": 275.2223815917969, + "grad_norm": 60681.08203125, "learning_rate": 5.336e-06, - "loss": 37.8603, + "loss": 298649.525, "step": 26680 }, { "epoch": 0.05391548863310399, - "grad_norm": 283.79132080078125, + "grad_norm": 22977.359375, "learning_rate": 5.338000000000001e-06, - "loss": 39.2534, + "loss": 260379.375, "step": 26690 }, { "epoch": 0.0539356892657878, - "grad_norm": 126.51179504394531, + "grad_norm": 38297.8828125, "learning_rate": 5.3400000000000005e-06, - "loss": 18.4498, + "loss": 148360.575, "step": 26700 }, { "epoch": 0.05395588989847162, - "grad_norm": 298.1804504394531, + "grad_norm": 73063.5703125, "learning_rate": 5.342000000000001e-06, - "loss": 44.271, + "loss": 207131.3625, "step": 26710 }, { "epoch": 0.053976090531155436, - "grad_norm": 400.8433532714844, + "grad_norm": 93101.859375, "learning_rate": 5.344e-06, - "loss": 29.7003, + "loss": 73573.4125, "step": 26720 }, { "epoch": 0.05399629116383925, - "grad_norm": 219.6957550048828, + "grad_norm": 50973.5390625, "learning_rate": 5.346e-06, - "loss": 33.9938, + "loss": 240435.925, "step": 26730 }, { "epoch": 0.05401649179652307, - "grad_norm": 347.8514099121094, + "grad_norm": 627026.1875, "learning_rate": 5.348000000000001e-06, - "loss": 42.7821, + "loss": 416361.4, "step": 26740 }, { "epoch": 0.05403669242920688, - "grad_norm": 539.4111328125, + "grad_norm": 20480.6328125, "learning_rate": 5.3500000000000004e-06, - "loss": 19.2532, + "loss": 76867.6313, "step": 26750 }, { "epoch": 0.054056893061890696, - "grad_norm": 515.8953247070312, + "grad_norm": 149700.8125, "learning_rate": 5.352000000000001e-06, - "loss": 37.248, + "loss": 316743.05, "step": 26760 }, { "epoch": 0.054077093694574516, - "grad_norm": 1480.735595703125, + "grad_norm": 313564.5, "learning_rate": 5.354e-06, - "loss": 29.1723, + "loss": 84169.45, "step": 26770 }, { "epoch": 0.05409729432725833, - "grad_norm": 350.07733154296875, + "grad_norm": 124371.3125, "learning_rate": 5.356e-06, - "loss": 24.7955, + "loss": 78897.0312, "step": 26780 }, { "epoch": 0.05411749495994215, - "grad_norm": 105.70265197753906, + "grad_norm": 48927.36328125, "learning_rate": 5.358000000000001e-06, - "loss": 21.997, + "loss": 135612.85, "step": 26790 }, { "epoch": 0.05413769559262596, - "grad_norm": 298.6070556640625, + "grad_norm": 21222.384765625, "learning_rate": 5.36e-06, - "loss": 24.973, + "loss": 85542.1438, "step": 26800 }, { "epoch": 0.054157896225309775, - "grad_norm": 826.4459838867188, + "grad_norm": 1047003.625, "learning_rate": 5.3620000000000005e-06, - "loss": 44.188, + "loss": 350341.375, "step": 26810 }, { "epoch": 0.054178096857993595, - "grad_norm": 761.2321166992188, + "grad_norm": 222856.03125, "learning_rate": 5.364000000000001e-06, - "loss": 23.6996, + "loss": 93049.5875, "step": 26820 }, { "epoch": 0.05419829749067741, - "grad_norm": 198.7512969970703, + "grad_norm": 29923.12109375, "learning_rate": 5.366e-06, - "loss": 31.6769, + "loss": 160696.775, "step": 26830 }, { "epoch": 0.05421849812336122, - "grad_norm": 244.68893432617188, + "grad_norm": 103775.1484375, "learning_rate": 5.368000000000001e-06, - "loss": 48.2041, + "loss": 229625.075, "step": 26840 }, { "epoch": 0.05423869875604504, - "grad_norm": 307.557373046875, + "grad_norm": 54544.16796875, "learning_rate": 5.370000000000001e-06, - "loss": 23.0467, + "loss": 161113.225, "step": 26850 }, { "epoch": 0.054258899388728854, - "grad_norm": 324.9233703613281, + "grad_norm": 293665.8125, "learning_rate": 5.372e-06, - "loss": 20.549, + "loss": 157361.2875, "step": 26860 }, { "epoch": 0.054279100021412674, - "grad_norm": 232.0588836669922, + "grad_norm": 113816.6640625, "learning_rate": 5.3740000000000006e-06, - "loss": 27.1651, + "loss": 178958.025, "step": 26870 }, { "epoch": 0.05429930065409649, - "grad_norm": 227.4842071533203, + "grad_norm": 102619.703125, "learning_rate": 5.376e-06, - "loss": 29.5139, + "loss": 161754.7, "step": 26880 }, { "epoch": 0.0543195012867803, - "grad_norm": 323.15338134765625, + "grad_norm": 278376.96875, "learning_rate": 5.378e-06, - "loss": 48.1717, + "loss": 385235.2, "step": 26890 }, { "epoch": 0.05433970191946412, - "grad_norm": 225.5341033935547, + "grad_norm": 8405.0947265625, "learning_rate": 5.380000000000001e-06, - "loss": 31.1154, + "loss": 132917.3125, "step": 26900 }, { "epoch": 0.05435990255214793, - "grad_norm": 504.28497314453125, + "grad_norm": 731409.6875, "learning_rate": 5.382e-06, - "loss": 26.7281, + "loss": 194016.775, "step": 26910 }, { "epoch": 0.054380103184831746, - "grad_norm": 144.5900115966797, + "grad_norm": 19908.9765625, "learning_rate": 5.3840000000000005e-06, - "loss": 23.0646, + "loss": 88184.0, "step": 26920 }, { "epoch": 0.054400303817515566, - "grad_norm": 251.57594299316406, + "grad_norm": 64301.03515625, "learning_rate": 5.386e-06, - "loss": 33.4303, + "loss": 137771.3875, "step": 26930 }, { "epoch": 0.05442050445019938, - "grad_norm": 930.7855834960938, + "grad_norm": 143947.328125, "learning_rate": 5.388e-06, - "loss": 37.4148, + "loss": 118437.925, "step": 26940 }, { "epoch": 0.0544407050828832, - "grad_norm": 309.0507507324219, + "grad_norm": 175812.203125, "learning_rate": 5.390000000000001e-06, - "loss": 29.1236, + "loss": 195024.4375, "step": 26950 }, { "epoch": 0.05446090571556701, - "grad_norm": 399.0177001953125, + "grad_norm": 60475.78125, "learning_rate": 5.392e-06, - "loss": 33.7618, + "loss": 150154.2, "step": 26960 }, { "epoch": 0.054481106348250825, - "grad_norm": 568.4434204101562, + "grad_norm": 977087.5625, "learning_rate": 5.394e-06, - "loss": 42.4966, + "loss": 249210.975, "step": 26970 }, { "epoch": 0.054501306980934645, - "grad_norm": 78.31511688232422, + "grad_norm": 29356.357421875, "learning_rate": 5.3960000000000005e-06, - "loss": 21.6994, + "loss": 124225.475, "step": 26980 }, { "epoch": 0.05452150761361846, - "grad_norm": 203.22230529785156, + "grad_norm": 70517.9453125, "learning_rate": 5.398e-06, - "loss": 57.3447, + "loss": 367245.6, "step": 26990 }, { "epoch": 0.05454170824630227, - "grad_norm": 316.94281005859375, + "grad_norm": 159334.65625, "learning_rate": 5.400000000000001e-06, - "loss": 21.6052, + "loss": 125299.6125, "step": 27000 }, { "epoch": 0.05456190887898609, - "grad_norm": 349.6338195800781, + "grad_norm": 220720.40625, "learning_rate": 5.402000000000001e-06, - "loss": 25.8364, + "loss": 126674.7875, "step": 27010 }, { "epoch": 0.054582109511669905, - "grad_norm": 269.2860412597656, + "grad_norm": 127481.78125, "learning_rate": 5.404e-06, - "loss": 36.8285, + "loss": 162041.6375, "step": 27020 }, { "epoch": 0.054602310144353725, - "grad_norm": 590.0916748046875, + "grad_norm": 712984.8125, "learning_rate": 5.406e-06, - "loss": 30.6549, + "loss": 278258.45, "step": 27030 }, { "epoch": 0.05462251077703754, - "grad_norm": 163.40185546875, + "grad_norm": 14313.9833984375, "learning_rate": 5.408e-06, - "loss": 16.0808, + "loss": 45535.5594, "step": 27040 }, { "epoch": 0.05464271140972135, - "grad_norm": 426.44573974609375, + "grad_norm": 569033.75, "learning_rate": 5.410000000000001e-06, - "loss": 22.1718, + "loss": 145753.2, "step": 27050 }, { "epoch": 0.05466291204240517, - "grad_norm": 99.6770248413086, + "grad_norm": 96814.84375, "learning_rate": 5.412000000000001e-06, - "loss": 24.1849, + "loss": 160814.0, "step": 27060 }, { "epoch": 0.054683112675088984, - "grad_norm": 481.256591796875, + "grad_norm": 82058.2109375, "learning_rate": 5.414e-06, - "loss": 38.6982, + "loss": 236671.975, "step": 27070 }, { "epoch": 0.0547033133077728, - "grad_norm": 145.97348022460938, + "grad_norm": 7158.68017578125, "learning_rate": 5.416e-06, - "loss": 38.3321, + "loss": 222862.575, "step": 27080 }, { "epoch": 0.05472351394045662, - "grad_norm": 244.0701446533203, + "grad_norm": 31144.947265625, "learning_rate": 5.418e-06, - "loss": 44.1002, + "loss": 265031.65, "step": 27090 }, { "epoch": 0.05474371457314043, - "grad_norm": 66.1285171508789, + "grad_norm": 4945.10107421875, "learning_rate": 5.420000000000001e-06, - "loss": 41.6084, + "loss": 249014.9, "step": 27100 }, { "epoch": 0.05476391520582425, - "grad_norm": 471.6724548339844, + "grad_norm": 925863.1875, "learning_rate": 5.422000000000001e-06, - "loss": 25.6201, + "loss": 142253.5875, "step": 27110 }, { "epoch": 0.05478411583850806, - "grad_norm": 116.57247161865234, + "grad_norm": 33364.62890625, "learning_rate": 5.424e-06, - "loss": 30.7712, + "loss": 309218.575, "step": 27120 }, { "epoch": 0.054804316471191876, - "grad_norm": 235.6954803466797, + "grad_norm": 218499.953125, "learning_rate": 5.426e-06, - "loss": 25.578, + "loss": 135660.5875, "step": 27130 }, { "epoch": 0.054824517103875696, - "grad_norm": 229.59060668945312, + "grad_norm": 159454.09375, "learning_rate": 5.4279999999999995e-06, - "loss": 38.1911, + "loss": 192879.45, "step": 27140 }, { "epoch": 0.05484471773655951, - "grad_norm": 369.60626220703125, + "grad_norm": 47839.51953125, "learning_rate": 5.4300000000000005e-06, - "loss": 15.9542, + "loss": 74735.0375, "step": 27150 }, { "epoch": 0.05486491836924332, - "grad_norm": 556.3214721679688, + "grad_norm": 1001702.0, "learning_rate": 5.432000000000001e-06, - "loss": 25.2471, + "loss": 191364.025, "step": 27160 }, { "epoch": 0.05488511900192714, - "grad_norm": 453.157958984375, + "grad_norm": 526862.8125, "learning_rate": 5.434e-06, - "loss": 39.5062, + "loss": 337501.975, "step": 27170 }, { "epoch": 0.054905319634610955, - "grad_norm": 239.89064025878906, + "grad_norm": 113412.3671875, "learning_rate": 5.436e-06, - "loss": 25.2629, + "loss": 115087.7375, "step": 27180 }, { "epoch": 0.054925520267294775, - "grad_norm": 190.35305786132812, + "grad_norm": 52871.29296875, "learning_rate": 5.438e-06, - "loss": 18.187, + "loss": 45310.4062, "step": 27190 }, { "epoch": 0.05494572089997859, - "grad_norm": 395.3410339355469, + "grad_norm": 19495.1953125, "learning_rate": 5.4400000000000004e-06, - "loss": 25.4592, + "loss": 141040.025, "step": 27200 }, { "epoch": 0.0549659215326624, - "grad_norm": 688.203125, + "grad_norm": 165418.03125, "learning_rate": 5.442000000000001e-06, - "loss": 63.9065, + "loss": 282270.75, "step": 27210 }, { "epoch": 0.05498612216534622, - "grad_norm": 192.1884002685547, + "grad_norm": 96893.8515625, "learning_rate": 5.444000000000001e-06, - "loss": 40.2984, + "loss": 366059.375, "step": 27220 }, { "epoch": 0.055006322798030034, - "grad_norm": 211.1009521484375, + "grad_norm": 30724.47265625, "learning_rate": 5.446e-06, - "loss": 29.2781, + "loss": 230927.225, "step": 27230 }, { "epoch": 0.05502652343071385, - "grad_norm": 109.21246337890625, + "grad_norm": 15142.0810546875, "learning_rate": 5.448e-06, - "loss": 17.4171, + "loss": 121425.1625, "step": 27240 }, { "epoch": 0.05504672406339767, - "grad_norm": 353.7481994628906, + "grad_norm": 14920.7021484375, "learning_rate": 5.450000000000001e-06, - "loss": 45.4631, + "loss": 202811.8, "step": 27250 }, { "epoch": 0.05506692469608148, - "grad_norm": 745.6088256835938, + "grad_norm": 593301.5625, "learning_rate": 5.4520000000000005e-06, - "loss": 32.752, + "loss": 139869.325, "step": 27260 }, { "epoch": 0.0550871253287653, - "grad_norm": 208.9687957763672, + "grad_norm": 58212.109375, "learning_rate": 5.454000000000001e-06, - "loss": 13.6371, + "loss": 70525.85, "step": 27270 }, { "epoch": 0.05510732596144911, - "grad_norm": 350.83941650390625, + "grad_norm": 304960.875, "learning_rate": 5.456e-06, - "loss": 43.5445, + "loss": 201968.15, "step": 27280 }, { "epoch": 0.055127526594132926, - "grad_norm": 341.1553649902344, + "grad_norm": 347531.5625, "learning_rate": 5.458e-06, - "loss": 38.8892, + "loss": 179619.75, "step": 27290 }, { "epoch": 0.055147727226816746, - "grad_norm": 359.916748046875, + "grad_norm": 196235.859375, "learning_rate": 5.460000000000001e-06, - "loss": 44.6945, + "loss": 235519.675, "step": 27300 }, { "epoch": 0.05516792785950056, - "grad_norm": 38.90704345703125, + "grad_norm": 11971.5146484375, "learning_rate": 5.462e-06, - "loss": 14.8064, + "loss": 106205.375, "step": 27310 }, { "epoch": 0.05518812849218437, - "grad_norm": 278.1080322265625, + "grad_norm": 3787.105712890625, "learning_rate": 5.4640000000000005e-06, - "loss": 63.8273, + "loss": 198383.3625, "step": 27320 }, { "epoch": 0.05520832912486819, - "grad_norm": 436.64990234375, + "grad_norm": 229375.703125, "learning_rate": 5.466e-06, - "loss": 33.218, + "loss": 163762.3625, "step": 27330 }, { "epoch": 0.055228529757552006, - "grad_norm": 314.9707946777344, + "grad_norm": 206920.5, "learning_rate": 5.468e-06, - "loss": 22.9617, + "loss": 129944.0875, "step": 27340 }, { "epoch": 0.05524873039023582, - "grad_norm": 554.5739135742188, + "grad_norm": 870287.375, "learning_rate": 5.470000000000001e-06, - "loss": 18.1486, + "loss": 143244.025, "step": 27350 }, { "epoch": 0.05526893102291964, - "grad_norm": 226.20201110839844, + "grad_norm": 70952.015625, "learning_rate": 5.472e-06, - "loss": 17.9212, + "loss": 100931.0, "step": 27360 }, { "epoch": 0.05528913165560345, - "grad_norm": 234.67518615722656, + "grad_norm": 52329.34765625, "learning_rate": 5.4740000000000004e-06, - "loss": 37.0259, + "loss": 145502.4375, "step": 27370 }, { "epoch": 0.05530933228828727, - "grad_norm": 259.5121765136719, + "grad_norm": 126543.6015625, "learning_rate": 5.476000000000001e-06, - "loss": 25.7845, + "loss": 90415.7937, "step": 27380 }, { "epoch": 0.055329532920971085, - "grad_norm": 378.53460693359375, + "grad_norm": 121819.5625, "learning_rate": 5.478e-06, - "loss": 38.2042, + "loss": 172873.675, "step": 27390 }, { "epoch": 0.0553497335536549, - "grad_norm": 247.5023956298828, + "grad_norm": 54112.3984375, "learning_rate": 5.480000000000001e-06, - "loss": 33.0884, + "loss": 168547.7875, "step": 27400 }, { "epoch": 0.05536993418633872, - "grad_norm": 491.0729675292969, + "grad_norm": 470770.6875, "learning_rate": 5.482000000000001e-06, - "loss": 26.6517, + "loss": 192508.025, "step": 27410 }, { "epoch": 0.05539013481902253, - "grad_norm": 308.5608825683594, + "grad_norm": 138991.59375, "learning_rate": 5.484e-06, - "loss": 26.3782, + "loss": 116323.075, "step": 27420 }, { "epoch": 0.055410335451706344, - "grad_norm": 361.5599365234375, + "grad_norm": 380913.40625, "learning_rate": 5.4860000000000005e-06, - "loss": 31.672, + "loss": 155256.0875, "step": 27430 }, { "epoch": 0.055430536084390164, - "grad_norm": 293.3143310546875, + "grad_norm": 44660.015625, "learning_rate": 5.488e-06, - "loss": 26.1239, + "loss": 148080.375, "step": 27440 }, { "epoch": 0.05545073671707398, - "grad_norm": 238.87124633789062, + "grad_norm": 192845.703125, "learning_rate": 5.490000000000001e-06, - "loss": 17.9067, + "loss": 137488.6375, "step": 27450 }, { "epoch": 0.0554709373497578, - "grad_norm": 300.4674072265625, + "grad_norm": 267394.28125, "learning_rate": 5.492000000000001e-06, - "loss": 22.1399, + "loss": 118511.2375, "step": 27460 }, { "epoch": 0.05549113798244161, - "grad_norm": 1151.210693359375, + "grad_norm": 629468.75, "learning_rate": 5.494e-06, - "loss": 31.8923, + "loss": 147282.55, "step": 27470 }, { "epoch": 0.05551133861512542, - "grad_norm": 398.8284912109375, + "grad_norm": 352617.1875, "learning_rate": 5.496e-06, - "loss": 34.124, + "loss": 172473.4875, "step": 27480 }, { "epoch": 0.05553153924780924, - "grad_norm": 182.739990234375, + "grad_norm": 123661.0390625, "learning_rate": 5.498e-06, - "loss": 16.2706, + "loss": 59113.1625, "step": 27490 }, { "epoch": 0.055551739880493056, - "grad_norm": 69.2137451171875, + "grad_norm": 10133.3271484375, "learning_rate": 5.500000000000001e-06, - "loss": 43.4489, + "loss": 125303.5, "step": 27500 }, { "epoch": 0.05557194051317687, - "grad_norm": 243.2827911376953, + "grad_norm": 471022.375, "learning_rate": 5.502000000000001e-06, - "loss": 26.3211, + "loss": 168410.7, "step": 27510 }, { "epoch": 0.05559214114586069, - "grad_norm": 877.0611572265625, + "grad_norm": 1126719.75, "learning_rate": 5.504e-06, - "loss": 35.1816, + "loss": 208622.5, "step": 27520 }, { "epoch": 0.0556123417785445, - "grad_norm": 125.4283447265625, + "grad_norm": 122503.8671875, "learning_rate": 5.506e-06, - "loss": 15.6442, + "loss": 66326.2875, "step": 27530 }, { "epoch": 0.05563254241122832, - "grad_norm": 243.79249572753906, + "grad_norm": 181101.65625, "learning_rate": 5.508e-06, - "loss": 11.2258, + "loss": 81501.175, "step": 27540 }, { "epoch": 0.055652743043912135, - "grad_norm": 404.4970397949219, + "grad_norm": 762328.4375, "learning_rate": 5.510000000000001e-06, - "loss": 28.2651, + "loss": 179801.4, "step": 27550 }, { "epoch": 0.05567294367659595, - "grad_norm": 287.2158508300781, + "grad_norm": 36549.64453125, "learning_rate": 5.512000000000001e-06, - "loss": 23.4717, + "loss": 181351.95, "step": 27560 }, { "epoch": 0.05569314430927977, - "grad_norm": 57.77788543701172, + "grad_norm": 1812.8441162109375, "learning_rate": 5.514e-06, - "loss": 25.1778, + "loss": 167907.8125, "step": 27570 }, { "epoch": 0.05571334494196358, - "grad_norm": 80.17808532714844, + "grad_norm": 5179.896484375, "learning_rate": 5.516e-06, - "loss": 30.3234, + "loss": 209921.725, "step": 27580 }, { "epoch": 0.055733545574647395, - "grad_norm": 370.5777893066406, + "grad_norm": 64823.109375, "learning_rate": 5.518e-06, - "loss": 44.0101, + "loss": 231050.575, "step": 27590 }, { "epoch": 0.055753746207331215, - "grad_norm": 248.14292907714844, + "grad_norm": 28121.533203125, "learning_rate": 5.5200000000000005e-06, - "loss": 34.1338, + "loss": 183427.8125, "step": 27600 }, { "epoch": 0.05577394684001503, - "grad_norm": 297.3516845703125, + "grad_norm": 71639.1640625, "learning_rate": 5.522000000000001e-06, - "loss": 22.6625, + "loss": 140016.2625, "step": 27610 }, { "epoch": 0.05579414747269885, - "grad_norm": 151.025146484375, + "grad_norm": 38782.07421875, "learning_rate": 5.524000000000001e-06, - "loss": 20.9243, + "loss": 91541.3062, "step": 27620 }, { "epoch": 0.05581434810538266, - "grad_norm": 679.7206420898438, + "grad_norm": 504488.1875, "learning_rate": 5.526e-06, - "loss": 40.1341, + "loss": 284701.025, "step": 27630 }, { "epoch": 0.055834548738066474, - "grad_norm": 2318.187744140625, + "grad_norm": 136196.109375, "learning_rate": 5.528e-06, - "loss": 28.5167, + "loss": 180227.6125, "step": 27640 }, { "epoch": 0.055854749370750294, - "grad_norm": 308.1387023925781, + "grad_norm": 111331.0703125, "learning_rate": 5.530000000000001e-06, - "loss": 47.0316, + "loss": 212424.75, "step": 27650 }, { "epoch": 0.05587495000343411, - "grad_norm": 67.56517028808594, + "grad_norm": 135027.609375, "learning_rate": 5.5320000000000006e-06, - "loss": 23.0679, + "loss": 97583.0312, "step": 27660 }, { "epoch": 0.05589515063611792, - "grad_norm": 924.32080078125, + "grad_norm": 45307.91796875, "learning_rate": 5.534000000000001e-06, - "loss": 25.1727, + "loss": 226695.45, "step": 27670 }, { "epoch": 0.05591535126880174, - "grad_norm": 282.2278137207031, + "grad_norm": 40275.87109375, "learning_rate": 5.536e-06, - "loss": 38.4195, + "loss": 239300.975, "step": 27680 }, { "epoch": 0.05593555190148555, - "grad_norm": 378.9815979003906, + "grad_norm": 274836.59375, "learning_rate": 5.538e-06, - "loss": 27.4518, + "loss": 151303.875, "step": 27690 }, { "epoch": 0.05595575253416937, - "grad_norm": 163.6479949951172, + "grad_norm": 58051.42578125, "learning_rate": 5.540000000000001e-06, - "loss": 37.4892, + "loss": 289705.425, "step": 27700 }, { "epoch": 0.055975953166853186, - "grad_norm": 88.61754608154297, + "grad_norm": 28583.712890625, "learning_rate": 5.5420000000000005e-06, - "loss": 25.9047, + "loss": 150384.2375, "step": 27710 }, { "epoch": 0.055996153799537, - "grad_norm": 241.44847106933594, + "grad_norm": 176541.140625, "learning_rate": 5.544000000000001e-06, - "loss": 36.617, + "loss": 142514.7625, "step": 27720 }, { "epoch": 0.05601635443222082, - "grad_norm": 285.30438232421875, + "grad_norm": 55098.1171875, "learning_rate": 5.546e-06, - "loss": 31.8549, + "loss": 112704.475, "step": 27730 }, { "epoch": 0.05603655506490463, - "grad_norm": 136.18719482421875, + "grad_norm": 31402.419921875, "learning_rate": 5.548e-06, - "loss": 43.947, + "loss": 274943.125, "step": 27740 }, { "epoch": 0.056056755697588445, - "grad_norm": 493.2935791015625, + "grad_norm": 682574.9375, "learning_rate": 5.550000000000001e-06, - "loss": 33.054, + "loss": 158313.4, "step": 27750 }, { "epoch": 0.056076956330272265, - "grad_norm": 381.3257751464844, + "grad_norm": 355530.53125, "learning_rate": 5.552e-06, - "loss": 35.3583, + "loss": 223965.9, "step": 27760 }, { "epoch": 0.05609715696295608, - "grad_norm": 93.75751495361328, + "grad_norm": 3907.4931640625, "learning_rate": 5.5540000000000005e-06, - "loss": 15.3817, + "loss": 114650.9125, "step": 27770 }, { "epoch": 0.0561173575956399, - "grad_norm": 337.303466796875, + "grad_norm": 26223.671875, "learning_rate": 5.556000000000001e-06, - "loss": 24.3953, + "loss": 157073.2125, "step": 27780 }, { "epoch": 0.05613755822832371, - "grad_norm": 145.49217224121094, + "grad_norm": 19261.576171875, "learning_rate": 5.558e-06, - "loss": 19.2551, + "loss": 107782.8125, "step": 27790 }, { "epoch": 0.056157758861007524, - "grad_norm": 69.3620834350586, + "grad_norm": 25485.76171875, "learning_rate": 5.560000000000001e-06, - "loss": 34.3316, + "loss": 334496.525, "step": 27800 }, { "epoch": 0.056177959493691344, - "grad_norm": 293.1623229980469, + "grad_norm": 25486.716796875, "learning_rate": 5.562000000000001e-06, - "loss": 22.2989, + "loss": 75776.9563, "step": 27810 }, { "epoch": 0.05619816012637516, - "grad_norm": 157.4688720703125, + "grad_norm": 31564.158203125, "learning_rate": 5.5640000000000004e-06, - "loss": 34.6148, + "loss": 292350.625, "step": 27820 }, { "epoch": 0.05621836075905897, - "grad_norm": 1601.130859375, + "grad_norm": 621227.3125, "learning_rate": 5.566000000000001e-06, - "loss": 43.8286, + "loss": 197632.5625, "step": 27830 }, { "epoch": 0.05623856139174279, - "grad_norm": 205.823486328125, + "grad_norm": 2389.894775390625, "learning_rate": 5.568e-06, - "loss": 45.9922, + "loss": 185495.675, "step": 27840 }, { "epoch": 0.0562587620244266, - "grad_norm": 59.5796012878418, + "grad_norm": 112642.2578125, "learning_rate": 5.570000000000001e-06, - "loss": 24.7914, + "loss": 167258.9, "step": 27850 }, { "epoch": 0.05627896265711042, - "grad_norm": 333.1473083496094, + "grad_norm": 188535.921875, "learning_rate": 5.572000000000001e-06, - "loss": 30.5427, + "loss": 213805.8, "step": 27860 }, { "epoch": 0.056299163289794237, - "grad_norm": 324.0419616699219, + "grad_norm": 355747.0625, "learning_rate": 5.574e-06, - "loss": 34.5958, + "loss": 282062.675, "step": 27870 }, { "epoch": 0.05631936392247805, - "grad_norm": 227.6458282470703, + "grad_norm": 88941.5, "learning_rate": 5.5760000000000005e-06, - "loss": 23.0325, + "loss": 190622.5875, "step": 27880 }, { "epoch": 0.05633956455516187, - "grad_norm": 352.4715576171875, + "grad_norm": 253791.65625, "learning_rate": 5.578e-06, - "loss": 29.4214, + "loss": 165097.7125, "step": 27890 }, { "epoch": 0.05635976518784568, - "grad_norm": 128.63223266601562, + "grad_norm": 39731.390625, "learning_rate": 5.580000000000001e-06, - "loss": 26.9033, + "loss": 137119.0, "step": 27900 }, { "epoch": 0.056379965820529496, - "grad_norm": 295.8462829589844, + "grad_norm": 117420.953125, "learning_rate": 5.582000000000001e-06, - "loss": 30.203, + "loss": 215298.15, "step": 27910 }, { "epoch": 0.056400166453213316, - "grad_norm": 153.4561767578125, + "grad_norm": 18163.6875, "learning_rate": 5.584e-06, - "loss": 28.914, + "loss": 157653.9875, "step": 27920 }, { "epoch": 0.05642036708589713, - "grad_norm": 420.8994445800781, + "grad_norm": 39441.484375, "learning_rate": 5.586e-06, - "loss": 28.6144, + "loss": 116108.075, "step": 27930 }, { "epoch": 0.05644056771858095, - "grad_norm": 219.6123046875, + "grad_norm": 13419.896484375, "learning_rate": 5.588e-06, - "loss": 31.8929, + "loss": 99946.95, "step": 27940 }, { "epoch": 0.05646076835126476, - "grad_norm": 134.90292358398438, + "grad_norm": 3373.751708984375, "learning_rate": 5.590000000000001e-06, - "loss": 40.4273, + "loss": 320299.0, "step": 27950 }, { "epoch": 0.056480968983948575, - "grad_norm": 523.1329956054688, + "grad_norm": 457950.46875, "learning_rate": 5.592000000000001e-06, - "loss": 18.6551, + "loss": 115545.1875, "step": 27960 }, { "epoch": 0.056501169616632395, - "grad_norm": 512.2754516601562, + "grad_norm": 200511.71875, "learning_rate": 5.594e-06, - "loss": 47.292, + "loss": 190281.8875, "step": 27970 }, { "epoch": 0.05652137024931621, - "grad_norm": 55.876888275146484, + "grad_norm": 6118.19189453125, "learning_rate": 5.596e-06, - "loss": 31.8431, + "loss": 137332.3375, "step": 27980 }, { "epoch": 0.05654157088200002, - "grad_norm": 181.684814453125, + "grad_norm": 48057.78515625, "learning_rate": 5.5980000000000004e-06, - "loss": 30.0193, + "loss": 222659.45, "step": 27990 }, { "epoch": 0.05656177151468384, - "grad_norm": 204.7184295654297, + "grad_norm": 14759.55859375, "learning_rate": 5.600000000000001e-06, - "loss": 19.8612, + "loss": 137407.1125, "step": 28000 }, { "epoch": 0.056581972147367654, "grad_norm": 0.0, "learning_rate": 5.602000000000001e-06, - "loss": 45.8665, + "loss": 203257.8625, "step": 28010 }, { "epoch": 0.056602172780051474, - "grad_norm": 463.9403381347656, + "grad_norm": 301444.375, "learning_rate": 5.604000000000001e-06, - "loss": 45.7898, + "loss": 313931.025, "step": 28020 }, { "epoch": 0.05662237341273529, - "grad_norm": 321.863037109375, + "grad_norm": 207699.53125, "learning_rate": 5.606e-06, - "loss": 28.8418, + "loss": 304211.0, "step": 28030 }, { "epoch": 0.0566425740454191, - "grad_norm": 446.02069091796875, + "grad_norm": 269430.375, "learning_rate": 5.608e-06, - "loss": 26.2253, + "loss": 157617.325, "step": 28040 }, { "epoch": 0.05666277467810292, - "grad_norm": 254.49205017089844, + "grad_norm": 147779.6875, "learning_rate": 5.610000000000001e-06, - "loss": 32.8853, + "loss": 197855.775, "step": 28050 }, { "epoch": 0.05668297531078673, - "grad_norm": 439.8501892089844, + "grad_norm": 32050.79296875, "learning_rate": 5.612000000000001e-06, - "loss": 26.3658, + "loss": 161786.4, "step": 28060 }, { "epoch": 0.056703175943470546, - "grad_norm": 306.6741638183594, + "grad_norm": 62917.71875, "learning_rate": 5.614000000000001e-06, - "loss": 37.8322, + "loss": 166943.6875, "step": 28070 }, { "epoch": 0.056723376576154366, - "grad_norm": 260.4828796386719, + "grad_norm": 144837.5625, "learning_rate": 5.616e-06, - "loss": 29.4606, + "loss": 219939.2, "step": 28080 }, { "epoch": 0.05674357720883818, - "grad_norm": 72.44647979736328, + "grad_norm": 4897.53515625, "learning_rate": 5.618e-06, - "loss": 30.5602, + "loss": 206781.0125, "step": 28090 }, { "epoch": 0.056763777841522, - "grad_norm": 290.8236999511719, + "grad_norm": 62632.6953125, "learning_rate": 5.620000000000001e-06, - "loss": 30.3998, + "loss": 151683.9125, "step": 28100 }, { "epoch": 0.05678397847420581, - "grad_norm": 102.21273803710938, + "grad_norm": 2337.861328125, "learning_rate": 5.6220000000000006e-06, - "loss": 37.064, + "loss": 177460.1875, "step": 28110 }, { "epoch": 0.056804179106889625, - "grad_norm": 379.03729248046875, + "grad_norm": 187617.84375, "learning_rate": 5.624000000000001e-06, - "loss": 35.541, + "loss": 191481.275, "step": 28120 }, { "epoch": 0.056824379739573445, - "grad_norm": 236.905517578125, + "grad_norm": 63831.234375, "learning_rate": 5.626e-06, - "loss": 42.933, + "loss": 470805.2, "step": 28130 }, { "epoch": 0.05684458037225726, - "grad_norm": 582.6843872070312, + "grad_norm": 612790.125, "learning_rate": 5.628e-06, - "loss": 37.1196, + "loss": 197093.8625, "step": 28140 }, { "epoch": 0.05686478100494107, - "grad_norm": 565.9315185546875, + "grad_norm": 78978.0625, "learning_rate": 5.63e-06, - "loss": 24.5229, + "loss": 129425.2125, "step": 28150 }, { "epoch": 0.05688498163762489, - "grad_norm": 124.73725891113281, + "grad_norm": 14401.4677734375, "learning_rate": 5.6320000000000005e-06, - "loss": 18.64, + "loss": 81706.9875, "step": 28160 }, { "epoch": 0.056905182270308705, - "grad_norm": 29.175222396850586, + "grad_norm": 5827.91650390625, "learning_rate": 5.634000000000001e-06, - "loss": 33.4167, + "loss": 210560.65, "step": 28170 }, { "epoch": 0.056925382902992525, - "grad_norm": 493.8686218261719, + "grad_norm": 817238.625, "learning_rate": 5.636000000000001e-06, - "loss": 16.7518, + "loss": 106437.525, "step": 28180 }, { "epoch": 0.05694558353567634, - "grad_norm": 473.3516845703125, + "grad_norm": 282720.71875, "learning_rate": 5.638e-06, - "loss": 38.6327, + "loss": 379517.925, "step": 28190 }, { "epoch": 0.05696578416836015, - "grad_norm": 506.81524658203125, + "grad_norm": 392032.3125, "learning_rate": 5.64e-06, - "loss": 54.423, + "loss": 290400.5, "step": 28200 }, { "epoch": 0.05698598480104397, - "grad_norm": 187.45361328125, + "grad_norm": 83163.4765625, "learning_rate": 5.642000000000001e-06, - "loss": 28.4133, + "loss": 119669.9125, "step": 28210 }, { "epoch": 0.057006185433727784, - "grad_norm": 1286.269287109375, + "grad_norm": 1454245.125, "learning_rate": 5.6440000000000005e-06, - "loss": 30.785, + "loss": 346710.25, "step": 28220 }, { "epoch": 0.0570263860664116, - "grad_norm": 155.1964111328125, + "grad_norm": 155652.53125, "learning_rate": 5.646000000000001e-06, - "loss": 32.1819, + "loss": 161165.6875, "step": 28230 }, { "epoch": 0.05704658669909542, - "grad_norm": 142.5745391845703, + "grad_norm": 42183.01171875, "learning_rate": 5.648e-06, - "loss": 26.0579, + "loss": 158900.0375, "step": 28240 }, { "epoch": 0.05706678733177923, - "grad_norm": 382.9991455078125, + "grad_norm": 68031.9921875, "learning_rate": 5.65e-06, - "loss": 36.7989, + "loss": 355963.375, "step": 28250 }, { "epoch": 0.05708698796446305, - "grad_norm": 311.9433288574219, + "grad_norm": 172316.0, "learning_rate": 5.652000000000001e-06, - "loss": 30.9128, + "loss": 176566.0875, "step": 28260 }, { "epoch": 0.05710718859714686, - "grad_norm": 821.916259765625, + "grad_norm": 377922.75, "learning_rate": 5.654e-06, - "loss": 61.4871, + "loss": 278438.675, "step": 28270 }, { "epoch": 0.057127389229830676, - "grad_norm": 742.4916381835938, + "grad_norm": 627531.875, "learning_rate": 5.6560000000000006e-06, - "loss": 32.3058, + "loss": 199467.1875, "step": 28280 }, { "epoch": 0.057147589862514496, - "grad_norm": 90.88944244384766, + "grad_norm": 13223.3388671875, "learning_rate": 5.658e-06, - "loss": 27.2073, + "loss": 115006.2875, "step": 28290 }, { "epoch": 0.05716779049519831, - "grad_norm": 63.00052261352539, + "grad_norm": 10252.390625, "learning_rate": 5.66e-06, - "loss": 27.3424, + "loss": 159091.6, "step": 28300 }, { "epoch": 0.05718799112788212, - "grad_norm": 67.73237609863281, + "grad_norm": 1811.036376953125, "learning_rate": 5.662000000000001e-06, - "loss": 35.9595, + "loss": 159691.0875, "step": 28310 }, { "epoch": 0.05720819176056594, - "grad_norm": 78.1071548461914, + "grad_norm": 12928.5888671875, "learning_rate": 5.664e-06, - "loss": 9.6641, + "loss": 42705.0062, "step": 28320 }, { "epoch": 0.057228392393249755, - "grad_norm": 901.3255004882812, + "grad_norm": 670512.75, "learning_rate": 5.6660000000000005e-06, - "loss": 29.1247, + "loss": 225449.075, "step": 28330 }, { "epoch": 0.057248593025933575, - "grad_norm": 631.9803466796875, + "grad_norm": 708595.0625, "learning_rate": 5.668e-06, - "loss": 32.6025, + "loss": 217645.2, "step": 28340 }, { "epoch": 0.05726879365861739, - "grad_norm": 269.4149169921875, + "grad_norm": 28833.54296875, "learning_rate": 5.67e-06, - "loss": 25.6203, + "loss": 184587.325, "step": 28350 }, { "epoch": 0.0572889942913012, - "grad_norm": 665.5552978515625, + "grad_norm": 60940.14453125, "learning_rate": 5.672000000000001e-06, - "loss": 27.0234, + "loss": 141242.4875, "step": 28360 }, { "epoch": 0.05730919492398502, - "grad_norm": 1237.1932373046875, + "grad_norm": 680178.9375, "learning_rate": 5.674e-06, - "loss": 41.6504, + "loss": 321805.4, "step": 28370 }, { "epoch": 0.057329395556668834, - "grad_norm": 165.74679565429688, + "grad_norm": 59162.296875, "learning_rate": 5.676e-06, - "loss": 22.0172, + "loss": 88124.6438, "step": 28380 }, { "epoch": 0.05734959618935265, - "grad_norm": 355.13836669921875, + "grad_norm": 29880.40625, "learning_rate": 5.6780000000000005e-06, - "loss": 16.2022, + "loss": 120097.7125, "step": 28390 }, { "epoch": 0.05736979682203647, - "grad_norm": 275.1878662109375, + "grad_norm": 58871.7421875, "learning_rate": 5.68e-06, - "loss": 22.4103, + "loss": 142861.1375, "step": 28400 }, { "epoch": 0.05738999745472028, - "grad_norm": 129.44248962402344, + "grad_norm": 9069.4609375, "learning_rate": 5.682000000000001e-06, - "loss": 11.4887, + "loss": 47616.3406, "step": 28410 }, { "epoch": 0.0574101980874041, - "grad_norm": 272.0674743652344, + "grad_norm": 16355.15234375, "learning_rate": 5.684000000000001e-06, - "loss": 48.8762, + "loss": 309348.175, "step": 28420 }, { "epoch": 0.05743039872008791, - "grad_norm": 138.18812561035156, + "grad_norm": 26169.728515625, "learning_rate": 5.686e-06, - "loss": 33.4573, + "loss": 252194.1, "step": 28430 }, { "epoch": 0.057450599352771727, - "grad_norm": 180.22451782226562, + "grad_norm": 124907.3046875, "learning_rate": 5.6880000000000004e-06, - "loss": 18.0441, + "loss": 87665.7937, "step": 28440 }, { "epoch": 0.057470799985455547, - "grad_norm": 147.80079650878906, + "grad_norm": 3271.8095703125, "learning_rate": 5.69e-06, - "loss": 20.8648, + "loss": 115155.65, "step": 28450 }, { "epoch": 0.05749100061813936, - "grad_norm": 251.6083221435547, + "grad_norm": 202251.90625, "learning_rate": 5.692000000000001e-06, - "loss": 31.337, + "loss": 243549.15, "step": 28460 }, { "epoch": 0.05751120125082317, - "grad_norm": 408.03497314453125, + "grad_norm": 28554.826171875, "learning_rate": 5.694000000000001e-06, - "loss": 43.0213, + "loss": 263131.95, "step": 28470 }, { "epoch": 0.05753140188350699, - "grad_norm": 310.3747863769531, + "grad_norm": 126717.421875, "learning_rate": 5.696e-06, - "loss": 20.943, + "loss": 230064.825, "step": 28480 }, { "epoch": 0.057551602516190806, "grad_norm": 0.0, "learning_rate": 5.698e-06, - "loss": 30.0415, + "loss": 262892.425, "step": 28490 }, { "epoch": 0.057571803148874626, - "grad_norm": 281.2377014160156, + "grad_norm": 371268.15625, "learning_rate": 5.7e-06, - "loss": 31.7942, + "loss": 253208.225, "step": 28500 }, { "epoch": 0.05759200378155844, - "grad_norm": 301.1387023925781, + "grad_norm": 13908.7646484375, "learning_rate": 5.702000000000001e-06, - "loss": 36.2539, + "loss": 172363.1125, "step": 28510 }, { "epoch": 0.05761220441424225, - "grad_norm": 426.962158203125, + "grad_norm": 76762.5703125, "learning_rate": 5.704000000000001e-06, - "loss": 34.8207, + "loss": 139268.75, "step": 28520 }, { "epoch": 0.05763240504692607, - "grad_norm": 122.75354766845703, + "grad_norm": 71413.4375, "learning_rate": 5.706e-06, - "loss": 25.3265, + "loss": 190448.8125, "step": 28530 }, { "epoch": 0.057652605679609885, - "grad_norm": 35.92996597290039, + "grad_norm": 12572.75, "learning_rate": 5.708e-06, - "loss": 30.0774, + "loss": 222598.95, "step": 28540 }, { "epoch": 0.0576728063122937, - "grad_norm": 271.5851745605469, + "grad_norm": 22319.69140625, "learning_rate": 5.71e-06, - "loss": 29.5664, + "loss": 133098.9625, "step": 28550 }, { "epoch": 0.05769300694497752, - "grad_norm": 82.54947662353516, + "grad_norm": 170222.234375, "learning_rate": 5.7120000000000005e-06, - "loss": 33.1574, + "loss": 221947.625, "step": 28560 }, { "epoch": 0.05771320757766133, - "grad_norm": 158.8838653564453, + "grad_norm": 13913.55078125, "learning_rate": 5.714000000000001e-06, - "loss": 31.1948, + "loss": 118263.65, "step": 28570 }, { "epoch": 0.05773340821034515, - "grad_norm": 90.04515075683594, + "grad_norm": 27977.666015625, "learning_rate": 5.716000000000001e-06, - "loss": 58.9537, + "loss": 378954.65, "step": 28580 }, { "epoch": 0.057753608843028964, - "grad_norm": 157.55052185058594, + "grad_norm": 51492.7890625, "learning_rate": 5.718e-06, - "loss": 35.8344, + "loss": 325558.375, "step": 28590 }, { "epoch": 0.05777380947571278, - "grad_norm": 187.2686767578125, + "grad_norm": 4800.30908203125, "learning_rate": 5.72e-06, - "loss": 33.3788, + "loss": 252237.775, "step": 28600 }, { "epoch": 0.0577940101083966, - "grad_norm": 705.1644897460938, + "grad_norm": 191705.03125, "learning_rate": 5.722000000000001e-06, - "loss": 26.3549, + "loss": 147998.5125, "step": 28610 }, { "epoch": 0.05781421074108041, - "grad_norm": 1000.5523071289062, + "grad_norm": 447729.71875, "learning_rate": 5.724000000000001e-06, - "loss": 35.8475, + "loss": 219545.85, "step": 28620 }, { "epoch": 0.05783441137376422, - "grad_norm": 204.24722290039062, + "grad_norm": 6549.81396484375, "learning_rate": 5.726000000000001e-06, - "loss": 29.3729, + "loss": 173465.875, "step": 28630 }, { "epoch": 0.05785461200644804, - "grad_norm": 374.9010009765625, + "grad_norm": 48529.30859375, "learning_rate": 5.728e-06, - "loss": 30.2926, + "loss": 112574.2375, "step": 28640 }, { "epoch": 0.057874812639131856, - "grad_norm": 182.42819213867188, + "grad_norm": 111951.953125, "learning_rate": 5.73e-06, - "loss": 22.1948, + "loss": 70113.0063, "step": 28650 }, { "epoch": 0.057895013271815676, - "grad_norm": 599.50146484375, + "grad_norm": 204858.71875, "learning_rate": 5.732000000000001e-06, - "loss": 21.8648, + "loss": 139519.15, "step": 28660 }, { "epoch": 0.05791521390449949, - "grad_norm": 717.0021362304688, + "grad_norm": 283485.15625, "learning_rate": 5.7340000000000005e-06, - "loss": 44.6701, + "loss": 192944.625, "step": 28670 }, { "epoch": 0.0579354145371833, - "grad_norm": 186.2231903076172, + "grad_norm": 216796.359375, "learning_rate": 5.736000000000001e-06, - "loss": 16.5107, + "loss": 108958.575, "step": 28680 }, { "epoch": 0.05795561516986712, - "grad_norm": 554.5145263671875, + "grad_norm": 383577.71875, "learning_rate": 5.738e-06, - "loss": 29.3926, + "loss": 165626.5875, "step": 28690 }, { "epoch": 0.057975815802550935, - "grad_norm": 155.5208740234375, + "grad_norm": 27847.23046875, "learning_rate": 5.74e-06, - "loss": 56.2564, + "loss": 490408.8, "step": 28700 }, { "epoch": 0.05799601643523475, - "grad_norm": 294.67913818359375, + "grad_norm": 145942.34375, "learning_rate": 5.742000000000001e-06, - "loss": 36.4084, + "loss": 204894.775, "step": 28710 }, { "epoch": 0.05801621706791857, - "grad_norm": 433.7984619140625, + "grad_norm": 69996.6484375, "learning_rate": 5.744e-06, - "loss": 34.2891, + "loss": 148765.525, "step": 28720 }, { "epoch": 0.05803641770060238, - "grad_norm": 609.890625, + "grad_norm": 10723.892578125, "learning_rate": 5.7460000000000006e-06, - "loss": 23.8575, + "loss": 87537.5875, "step": 28730 }, { "epoch": 0.0580566183332862, - "grad_norm": 654.7755126953125, + "grad_norm": 455177.875, "learning_rate": 5.748e-06, - "loss": 31.2064, + "loss": 170134.45, "step": 28740 }, { "epoch": 0.058076818965970015, - "grad_norm": 200.70631408691406, + "grad_norm": 82224.4375, "learning_rate": 5.75e-06, - "loss": 21.5167, + "loss": 74746.9187, "step": 28750 }, { "epoch": 0.05809701959865383, - "grad_norm": 600.5113525390625, + "grad_norm": 91165.7265625, "learning_rate": 5.752000000000001e-06, - "loss": 43.6205, + "loss": 337186.9, "step": 28760 }, { "epoch": 0.05811722023133765, - "grad_norm": 307.6865539550781, + "grad_norm": 200219.09375, "learning_rate": 5.754e-06, - "loss": 38.44, + "loss": 211413.65, "step": 28770 }, { "epoch": 0.05813742086402146, - "grad_norm": 643.4122924804688, + "grad_norm": 724633.0, "learning_rate": 5.7560000000000005e-06, - "loss": 33.5479, + "loss": 315921.75, "step": 28780 }, { "epoch": 0.058157621496705274, - "grad_norm": 401.5971984863281, + "grad_norm": 98929.390625, "learning_rate": 5.758000000000001e-06, - "loss": 27.5044, + "loss": 174714.4, "step": 28790 }, { "epoch": 0.058177822129389094, - "grad_norm": 169.2650146484375, + "grad_norm": 54533.1796875, "learning_rate": 5.76e-06, - "loss": 24.2614, + "loss": 146205.85, "step": 28800 }, { "epoch": 0.05819802276207291, - "grad_norm": 248.36412048339844, + "grad_norm": 61390.4296875, "learning_rate": 5.762000000000001e-06, - "loss": 42.2762, + "loss": 201132.0625, "step": 28810 }, { "epoch": 0.05821822339475673, - "grad_norm": 686.0226440429688, + "grad_norm": 829695.5, "learning_rate": 5.764000000000001e-06, - "loss": 42.2404, + "loss": 183809.6625, "step": 28820 }, { "epoch": 0.05823842402744054, - "grad_norm": 59.56367111206055, + "grad_norm": 2151.815185546875, "learning_rate": 5.766e-06, - "loss": 34.5825, + "loss": 262309.325, "step": 28830 }, { "epoch": 0.05825862466012435, - "grad_norm": 303.09417724609375, + "grad_norm": 197914.3125, "learning_rate": 5.7680000000000005e-06, - "loss": 28.0605, + "loss": 161171.375, "step": 28840 }, { "epoch": 0.05827882529280817, - "grad_norm": 253.83326721191406, + "grad_norm": 64684.9296875, "learning_rate": 5.77e-06, - "loss": 25.3181, + "loss": 176464.0625, "step": 28850 }, { "epoch": 0.058299025925491986, - "grad_norm": 206.4472198486328, + "grad_norm": 21910.404296875, "learning_rate": 5.772000000000001e-06, - "loss": 20.0452, + "loss": 80587.7125, "step": 28860 }, { "epoch": 0.0583192265581758, - "grad_norm": 387.855224609375, + "grad_norm": 691620.0, "learning_rate": 5.774000000000001e-06, - "loss": 34.5938, + "loss": 247202.75, "step": 28870 }, { "epoch": 0.05833942719085962, - "grad_norm": 243.99778747558594, + "grad_norm": 24297.91796875, "learning_rate": 5.776e-06, - "loss": 20.1394, + "loss": 104260.6875, "step": 28880 }, { "epoch": 0.05835962782354343, - "grad_norm": 798.8829956054688, + "grad_norm": 66190.859375, "learning_rate": 5.778e-06, - "loss": 28.9229, + "loss": 180267.5625, "step": 28890 }, { "epoch": 0.05837982845622725, - "grad_norm": 336.54730224609375, + "grad_norm": 134770.765625, "learning_rate": 5.78e-06, - "loss": 29.0429, + "loss": 170610.9375, "step": 28900 }, { "epoch": 0.058400029088911065, - "grad_norm": 357.3149108886719, + "grad_norm": 38167.41796875, "learning_rate": 5.782000000000001e-06, - "loss": 40.7843, + "loss": 188207.425, "step": 28910 }, { "epoch": 0.05842022972159488, - "grad_norm": 407.9903259277344, + "grad_norm": 211124.25, "learning_rate": 5.784000000000001e-06, - "loss": 23.2894, + "loss": 120412.225, "step": 28920 }, { "epoch": 0.0584404303542787, - "grad_norm": 243.77044677734375, + "grad_norm": 19981.037109375, "learning_rate": 5.786e-06, - "loss": 22.9751, + "loss": 192494.35, "step": 28930 }, { "epoch": 0.05846063098696251, - "grad_norm": 109.91024017333984, + "grad_norm": 21771.29296875, "learning_rate": 5.788e-06, - "loss": 24.473, + "loss": 120120.85, "step": 28940 }, { "epoch": 0.058480831619646324, - "grad_norm": 378.3873596191406, + "grad_norm": 23561.947265625, "learning_rate": 5.7900000000000005e-06, - "loss": 52.4523, + "loss": 428000.5, "step": 28950 }, { "epoch": 0.058501032252330144, - "grad_norm": 290.75213623046875, + "grad_norm": 68006.2890625, "learning_rate": 5.792000000000001e-06, - "loss": 56.7534, + "loss": 69036.025, "step": 28960 }, { "epoch": 0.05852123288501396, - "grad_norm": 239.42730712890625, + "grad_norm": 34454.42578125, "learning_rate": 5.794000000000001e-06, - "loss": 23.7062, + "loss": 96553.0312, "step": 28970 }, { "epoch": 0.05854143351769778, - "grad_norm": 149.8693389892578, + "grad_norm": 8986.6884765625, "learning_rate": 5.796000000000001e-06, - "loss": 38.8801, + "loss": 118653.65, "step": 28980 }, { "epoch": 0.05856163415038159, - "grad_norm": 311.30963134765625, + "grad_norm": 324573.6875, "learning_rate": 5.798e-06, - "loss": 34.3625, + "loss": 169417.525, "step": 28990 }, { "epoch": 0.058581834783065403, - "grad_norm": 298.5364685058594, + "grad_norm": 6451.81884765625, "learning_rate": 5.8e-06, - "loss": 17.1973, + "loss": 116586.875, "step": 29000 }, { "epoch": 0.058602035415749223, - "grad_norm": 242.7294921875, + "grad_norm": 133066.65625, "learning_rate": 5.802000000000001e-06, - "loss": 18.3321, + "loss": 86725.9937, "step": 29010 }, { "epoch": 0.05862223604843304, - "grad_norm": 378.9987487792969, + "grad_norm": 122334.171875, "learning_rate": 5.804000000000001e-06, - "loss": 22.1437, + "loss": 167047.4, "step": 29020 }, { "epoch": 0.05864243668111685, - "grad_norm": 178.19488525390625, + "grad_norm": 23423.798828125, "learning_rate": 5.806000000000001e-06, - "loss": 40.1051, + "loss": 337350.2, "step": 29030 }, { "epoch": 0.05866263731380067, - "grad_norm": 304.3574523925781, + "grad_norm": 115184.890625, "learning_rate": 5.808e-06, - "loss": 39.7005, + "loss": 230031.725, "step": 29040 }, { "epoch": 0.05868283794648448, - "grad_norm": 572.1640014648438, + "grad_norm": 663683.0625, "learning_rate": 5.81e-06, - "loss": 36.8209, + "loss": 338965.075, "step": 29050 }, { "epoch": 0.0587030385791683, - "grad_norm": 188.74029541015625, + "grad_norm": 36112.07421875, "learning_rate": 5.812000000000001e-06, - "loss": 30.6528, + "loss": 178083.1125, "step": 29060 }, { "epoch": 0.058723239211852116, - "grad_norm": 159.77755737304688, + "grad_norm": 68347.7734375, "learning_rate": 5.814000000000001e-06, - "loss": 28.0834, + "loss": 117744.175, "step": 29070 }, { "epoch": 0.05874343984453593, - "grad_norm": 236.99838256835938, + "grad_norm": 18058.4921875, "learning_rate": 5.816000000000001e-06, - "loss": 24.6492, + "loss": 95117.75, "step": 29080 }, { "epoch": 0.05876364047721975, - "grad_norm": 182.14535522460938, + "grad_norm": 20150.150390625, "learning_rate": 5.818e-06, - "loss": 25.6222, + "loss": 79140.575, "step": 29090 }, { "epoch": 0.05878384110990356, - "grad_norm": 383.1043701171875, + "grad_norm": 105673.1015625, "learning_rate": 5.82e-06, - "loss": 31.2367, + "loss": 129576.2875, "step": 29100 }, { "epoch": 0.058804041742587375, - "grad_norm": 423.5813293457031, + "grad_norm": 456150.8125, "learning_rate": 5.822000000000001e-06, - "loss": 28.9585, + "loss": 233830.0, "step": 29110 }, { "epoch": 0.058824242375271195, - "grad_norm": 323.51788330078125, + "grad_norm": 70755.5234375, "learning_rate": 5.8240000000000005e-06, - "loss": 36.8512, + "loss": 205274.6625, "step": 29120 }, { "epoch": 0.05884444300795501, - "grad_norm": 538.1864624023438, + "grad_norm": 71656.0078125, "learning_rate": 5.826000000000001e-06, - "loss": 31.3003, + "loss": 188417.3125, "step": 29130 }, { "epoch": 0.05886464364063883, - "grad_norm": 235.02914428710938, + "grad_norm": 49267.51953125, "learning_rate": 5.828e-06, - "loss": 17.9188, + "loss": 104440.2625, "step": 29140 }, { "epoch": 0.05888484427332264, - "grad_norm": 528.1428833007812, + "grad_norm": 286650.9375, "learning_rate": 5.83e-06, - "loss": 29.4412, + "loss": 151669.7125, "step": 29150 }, { "epoch": 0.058905044906006454, - "grad_norm": 203.73753356933594, + "grad_norm": 71668.1640625, "learning_rate": 5.832000000000001e-06, - "loss": 18.9356, + "loss": 134650.725, "step": 29160 }, { "epoch": 0.058925245538690274, - "grad_norm": 133.8951416015625, + "grad_norm": 2146.71044921875, "learning_rate": 5.834e-06, - "loss": 20.8691, + "loss": 89213.6625, "step": 29170 }, { "epoch": 0.05894544617137409, - "grad_norm": 357.4326171875, + "grad_norm": 248476.046875, "learning_rate": 5.8360000000000005e-06, - "loss": 27.035, + "loss": 199179.075, "step": 29180 }, { "epoch": 0.0589656468040579, - "grad_norm": 231.9644012451172, + "grad_norm": 142763.609375, "learning_rate": 5.838000000000001e-06, - "loss": 38.3608, + "loss": 258858.575, "step": 29190 }, { "epoch": 0.05898584743674172, - "grad_norm": 697.7938232421875, + "grad_norm": 697712.125, "learning_rate": 5.84e-06, - "loss": 23.6989, + "loss": 212457.25, "step": 29200 }, { "epoch": 0.05900604806942553, - "grad_norm": 510.7744140625, + "grad_norm": 955826.0625, "learning_rate": 5.842000000000001e-06, - "loss": 32.0237, + "loss": 263937.25, "step": 29210 }, { "epoch": 0.05902624870210935, - "grad_norm": 276.2771301269531, + "grad_norm": 49288.1328125, "learning_rate": 5.844000000000001e-06, - "loss": 22.8579, + "loss": 150400.475, "step": 29220 }, { "epoch": 0.059046449334793166, - "grad_norm": 437.34490966796875, + "grad_norm": 297537.5625, "learning_rate": 5.8460000000000004e-06, - "loss": 35.4377, + "loss": 219415.5, "step": 29230 }, { "epoch": 0.05906664996747698, - "grad_norm": 505.7890319824219, + "grad_norm": 44813.125, "learning_rate": 5.848000000000001e-06, - "loss": 20.1632, + "loss": 103139.6313, "step": 29240 }, { "epoch": 0.0590868506001608, - "grad_norm": 473.0169982910156, + "grad_norm": 148785.609375, "learning_rate": 5.85e-06, - "loss": 17.2294, + "loss": 102623.7875, "step": 29250 }, { "epoch": 0.05910705123284461, - "grad_norm": 142.6421661376953, + "grad_norm": 18824.087890625, "learning_rate": 5.852000000000001e-06, - "loss": 19.0041, + "loss": 85545.9187, "step": 29260 }, { "epoch": 0.059127251865528425, - "grad_norm": 243.39337158203125, + "grad_norm": 68725.375, "learning_rate": 5.854000000000001e-06, - "loss": 39.0202, + "loss": 249809.825, "step": 29270 }, { "epoch": 0.059147452498212245, - "grad_norm": 204.61212158203125, + "grad_norm": 187660.65625, "learning_rate": 5.856e-06, - "loss": 19.2828, + "loss": 136798.875, "step": 29280 }, { "epoch": 0.05916765313089606, - "grad_norm": 277.0043029785156, + "grad_norm": 145266.34375, "learning_rate": 5.8580000000000005e-06, - "loss": 27.9182, + "loss": 118977.1375, "step": 29290 }, { "epoch": 0.05918785376357988, - "grad_norm": 244.20718383789062, + "grad_norm": 85473.2578125, "learning_rate": 5.86e-06, - "loss": 17.527, + "loss": 61347.95, "step": 29300 }, { "epoch": 0.05920805439626369, - "grad_norm": 139.71414184570312, + "grad_norm": 87271.046875, "learning_rate": 5.862000000000001e-06, - "loss": 28.5032, + "loss": 160674.275, "step": 29310 }, { "epoch": 0.059228255028947505, - "grad_norm": 113.04032135009766, + "grad_norm": 11806.34375, "learning_rate": 5.864000000000001e-06, - "loss": 28.3933, + "loss": 269811.35, "step": 29320 }, { "epoch": 0.059248455661631325, - "grad_norm": 357.9707336425781, + "grad_norm": 73231.4296875, "learning_rate": 5.866e-06, - "loss": 25.2754, + "loss": 141605.2125, "step": 29330 }, { "epoch": 0.05926865629431514, - "grad_norm": 290.7755126953125, + "grad_norm": 49322.171875, "learning_rate": 5.868e-06, - "loss": 31.9646, + "loss": 132235.85, "step": 29340 }, { "epoch": 0.05928885692699895, - "grad_norm": 403.48736572265625, + "grad_norm": 277413.0625, "learning_rate": 5.8700000000000005e-06, - "loss": 27.902, + "loss": 130494.95, "step": 29350 }, { "epoch": 0.05930905755968277, - "grad_norm": 94.61563873291016, + "grad_norm": 4783.515625, "learning_rate": 5.872000000000001e-06, - "loss": 41.1359, + "loss": 267703.475, "step": 29360 }, { "epoch": 0.059329258192366584, - "grad_norm": 272.755615234375, + "grad_norm": 35877.1796875, "learning_rate": 5.874000000000001e-06, - "loss": 22.9157, + "loss": 155639.0375, "step": 29370 }, { "epoch": 0.059349458825050404, - "grad_norm": 546.8012084960938, + "grad_norm": 169829.671875, "learning_rate": 5.876000000000001e-06, - "loss": 36.2041, + "loss": 130172.4, "step": 29380 }, { "epoch": 0.05936965945773422, - "grad_norm": 219.43817138671875, + "grad_norm": 418614.53125, "learning_rate": 5.878e-06, - "loss": 36.0726, + "loss": 174364.5375, "step": 29390 }, { "epoch": 0.05938986009041803, - "grad_norm": 146.97850036621094, + "grad_norm": 40369.890625, "learning_rate": 5.8800000000000005e-06, - "loss": 45.6488, + "loss": 389933.525, "step": 29400 }, { "epoch": 0.05941006072310185, - "grad_norm": 149.57151794433594, + "grad_norm": 26616.296875, "learning_rate": 5.882e-06, - "loss": 28.8268, + "loss": 153495.2375, "step": 29410 }, { "epoch": 0.05943026135578566, - "grad_norm": 210.10450744628906, + "grad_norm": 29308.4453125, "learning_rate": 5.884000000000001e-06, - "loss": 28.2961, + "loss": 137472.9125, "step": 29420 }, { "epoch": 0.059450461988469476, - "grad_norm": 766.3397827148438, + "grad_norm": 91519.921875, "learning_rate": 5.886000000000001e-06, - "loss": 29.3491, + "loss": 102571.7563, "step": 29430 }, { "epoch": 0.059470662621153296, - "grad_norm": 628.2882080078125, + "grad_norm": 569477.625, "learning_rate": 5.888e-06, - "loss": 29.5668, + "loss": 169598.825, "step": 29440 }, { "epoch": 0.05949086325383711, - "grad_norm": 852.3947143554688, + "grad_norm": 366111.5, "learning_rate": 5.89e-06, - "loss": 34.4386, + "loss": 182033.4375, "step": 29450 }, { "epoch": 0.05951106388652093, - "grad_norm": 351.2292175292969, + "grad_norm": 197426.515625, "learning_rate": 5.892e-06, - "loss": 34.4647, + "loss": 131295.05, "step": 29460 }, { "epoch": 0.05953126451920474, - "grad_norm": 313.0265197753906, + "grad_norm": 74488.7578125, "learning_rate": 5.894000000000001e-06, - "loss": 26.2727, + "loss": 78165.9937, "step": 29470 }, { "epoch": 0.059551465151888555, - "grad_norm": 422.8077697753906, + "grad_norm": 459521.09375, "learning_rate": 5.896000000000001e-06, - "loss": 30.2988, + "loss": 242878.1, "step": 29480 }, { "epoch": 0.059571665784572375, - "grad_norm": 323.49346923828125, + "grad_norm": 276446.5625, "learning_rate": 5.898e-06, - "loss": 14.5331, + "loss": 82224.625, "step": 29490 }, { "epoch": 0.05959186641725619, - "grad_norm": 206.73532104492188, + "grad_norm": 10472.408203125, "learning_rate": 5.9e-06, - "loss": 34.6049, + "loss": 183039.6875, "step": 29500 }, { "epoch": 0.05961206704994, - "grad_norm": 245.5485382080078, + "grad_norm": 1505.60009765625, "learning_rate": 5.9019999999999996e-06, - "loss": 33.8908, + "loss": 135112.95, "step": 29510 }, { "epoch": 0.05963226768262382, - "grad_norm": 329.80767822265625, + "grad_norm": 366264.0, "learning_rate": 5.9040000000000006e-06, - "loss": 35.755, + "loss": 181480.975, "step": 29520 }, { "epoch": 0.059652468315307634, - "grad_norm": 599.2566528320312, + "grad_norm": 1173059.375, "learning_rate": 5.906000000000001e-06, - "loss": 37.4361, + "loss": 259640.15, "step": 29530 }, { "epoch": 0.059672668947991454, - "grad_norm": 309.95556640625, + "grad_norm": 56263.625, "learning_rate": 5.908e-06, - "loss": 29.5198, + "loss": 176022.8, "step": 29540 }, { "epoch": 0.05969286958067527, - "grad_norm": 354.0465087890625, + "grad_norm": 17359.328125, "learning_rate": 5.91e-06, - "loss": 26.3363, + "loss": 180475.4875, "step": 29550 }, { "epoch": 0.05971307021335908, - "grad_norm": 157.65206909179688, + "grad_norm": 42501.9375, "learning_rate": 5.912e-06, - "loss": 18.1046, + "loss": 70568.1, "step": 29560 }, { "epoch": 0.0597332708460429, - "grad_norm": 135.74368286132812, + "grad_norm": 1786.739990234375, "learning_rate": 5.9140000000000005e-06, - "loss": 22.1865, + "loss": 89335.7375, "step": 29570 }, { "epoch": 0.059753471478726713, - "grad_norm": 618.9219970703125, + "grad_norm": 646575.125, "learning_rate": 5.916000000000001e-06, - "loss": 40.3919, + "loss": 333718.5, "step": 29580 }, { "epoch": 0.05977367211141053, - "grad_norm": 1112.4710693359375, + "grad_norm": 1044161.0625, "learning_rate": 5.918000000000001e-06, - "loss": 29.0231, + "loss": 189591.875, "step": 29590 }, { "epoch": 0.05979387274409435, - "grad_norm": 381.2337951660156, + "grad_norm": 147292.5, "learning_rate": 5.92e-06, - "loss": 30.8188, + "loss": 161500.9125, "step": 29600 }, { "epoch": 0.05981407337677816, - "grad_norm": 331.70794677734375, + "grad_norm": 298196.59375, "learning_rate": 5.922e-06, - "loss": 26.3022, + "loss": 127930.0, "step": 29610 }, { "epoch": 0.05983427400946198, - "grad_norm": 208.14129638671875, + "grad_norm": 2170.864013671875, "learning_rate": 5.924000000000001e-06, - "loss": 39.2067, + "loss": 152917.15, "step": 29620 }, { "epoch": 0.05985447464214579, - "grad_norm": 378.42877197265625, + "grad_norm": 309007.28125, "learning_rate": 5.9260000000000005e-06, - "loss": 57.0727, + "loss": 451637.85, "step": 29630 }, { "epoch": 0.059874675274829606, - "grad_norm": 139.0298309326172, + "grad_norm": 19314.427734375, "learning_rate": 5.928000000000001e-06, - "loss": 19.168, + "loss": 101557.6625, "step": 29640 }, { "epoch": 0.059894875907513426, - "grad_norm": 226.4697723388672, + "grad_norm": 55401.23828125, "learning_rate": 5.93e-06, - "loss": 24.9292, + "loss": 135660.15, "step": 29650 }, { "epoch": 0.05991507654019724, - "grad_norm": 433.94189453125, + "grad_norm": 84454.6171875, "learning_rate": 5.932e-06, - "loss": 23.7183, + "loss": 105641.3875, "step": 29660 }, { "epoch": 0.05993527717288105, - "grad_norm": 169.28079223632812, + "grad_norm": 15125.4794921875, "learning_rate": 5.934000000000001e-06, - "loss": 32.3608, + "loss": 176208.6625, "step": 29670 }, { "epoch": 0.05995547780556487, - "grad_norm": 168.9295654296875, + "grad_norm": 8162.32177734375, "learning_rate": 5.9360000000000004e-06, - "loss": 25.6014, + "loss": 156707.4875, "step": 29680 }, { "epoch": 0.059975678438248685, - "grad_norm": 136.11447143554688, + "grad_norm": 16027.0859375, "learning_rate": 5.9380000000000006e-06, - "loss": 42.2129, + "loss": 241169.8, "step": 29690 }, { "epoch": 0.059995879070932505, - "grad_norm": 150.8358612060547, + "grad_norm": 12506.85546875, "learning_rate": 5.94e-06, - "loss": 26.7088, + "loss": 119226.975, "step": 29700 }, { "epoch": 0.06001607970361632, - "grad_norm": 400.7820739746094, + "grad_norm": 248040.515625, "learning_rate": 5.942e-06, - "loss": 25.5076, + "loss": 157702.5, "step": 29710 }, { "epoch": 0.06003628033630013, - "grad_norm": 749.4679565429688, + "grad_norm": 0.0, "learning_rate": 5.944000000000001e-06, - "loss": 32.5039, + "loss": 96244.9375, "step": 29720 }, { "epoch": 0.06005648096898395, - "grad_norm": 631.8466796875, + "grad_norm": 953924.6875, "learning_rate": 5.946e-06, - "loss": 30.8828, + "loss": 199173.375, "step": 29730 }, { "epoch": 0.060076681601667764, - "grad_norm": 219.17079162597656, + "grad_norm": 22771.580078125, "learning_rate": 5.9480000000000005e-06, - "loss": 37.6815, + "loss": 302993.65, "step": 29740 }, { "epoch": 0.06009688223435158, - "grad_norm": 386.05975341796875, + "grad_norm": 27566.54296875, "learning_rate": 5.950000000000001e-06, - "loss": 32.4698, + "loss": 205252.0375, "step": 29750 }, { "epoch": 0.0601170828670354, - "grad_norm": 212.24620056152344, + "grad_norm": 3427.554931640625, "learning_rate": 5.952e-06, - "loss": 20.2035, + "loss": 124722.425, "step": 29760 }, { "epoch": 0.06013728349971921, - "grad_norm": 238.9572296142578, + "grad_norm": 179806.484375, "learning_rate": 5.954000000000001e-06, - "loss": 13.4167, + "loss": 145592.6, "step": 29770 }, { "epoch": 0.06015748413240303, - "grad_norm": 165.2470703125, + "grad_norm": 24275.0390625, "learning_rate": 5.956000000000001e-06, - "loss": 33.2291, + "loss": 317794.35, "step": 29780 }, { "epoch": 0.06017768476508684, - "grad_norm": 300.0216064453125, + "grad_norm": 234969.96875, "learning_rate": 5.958e-06, - "loss": 36.237, + "loss": 179493.7125, "step": 29790 }, { "epoch": 0.060197885397770656, - "grad_norm": 821.6015014648438, + "grad_norm": 4608.55859375, "learning_rate": 5.9600000000000005e-06, - "loss": 30.3512, + "loss": 257167.825, "step": 29800 }, { "epoch": 0.060218086030454476, - "grad_norm": 2150.255615234375, + "grad_norm": 2589757.25, "learning_rate": 5.962e-06, - "loss": 36.7762, + "loss": 342912.3, "step": 29810 }, { "epoch": 0.06023828666313829, - "grad_norm": 508.111572265625, + "grad_norm": 983213.5625, "learning_rate": 5.964000000000001e-06, - "loss": 18.1586, + "loss": 295494.55, "step": 29820 }, { "epoch": 0.0602584872958221, - "grad_norm": 1313.7850341796875, + "grad_norm": 242221.640625, "learning_rate": 5.966000000000001e-06, - "loss": 43.5832, + "loss": 225921.7, "step": 29830 }, { "epoch": 0.06027868792850592, - "grad_norm": 229.95068359375, + "grad_norm": 45269.578125, "learning_rate": 5.968e-06, - "loss": 19.3392, + "loss": 156821.0125, "step": 29840 }, { "epoch": 0.060298888561189735, - "grad_norm": 296.32421875, + "grad_norm": 9182.2119140625, "learning_rate": 5.9700000000000004e-06, - "loss": 13.511, + "loss": 43009.9875, "step": 29850 }, { "epoch": 0.060319089193873555, - "grad_norm": 491.4680480957031, + "grad_norm": 573268.5, "learning_rate": 5.972e-06, - "loss": 50.0978, + "loss": 343442.45, "step": 29860 }, { "epoch": 0.06033928982655737, - "grad_norm": 830.8683471679688, + "grad_norm": 189501.28125, "learning_rate": 5.974000000000001e-06, - "loss": 39.9874, + "loss": 217095.75, "step": 29870 }, { "epoch": 0.06035949045924118, - "grad_norm": 277.2486877441406, + "grad_norm": 140336.640625, "learning_rate": 5.976000000000001e-06, - "loss": 19.6756, + "loss": 90515.4875, "step": 29880 }, { "epoch": 0.060379691091925, - "grad_norm": 227.3853302001953, + "grad_norm": 27140.251953125, "learning_rate": 5.978e-06, - "loss": 24.5435, + "loss": 125621.45, "step": 29890 }, { "epoch": 0.060399891724608815, - "grad_norm": 396.1295166015625, + "grad_norm": 463042.8125, "learning_rate": 5.98e-06, - "loss": 34.9831, + "loss": 283894.325, "step": 29900 }, { "epoch": 0.06042009235729263, - "grad_norm": 119.34272003173828, + "grad_norm": 19195.294921875, "learning_rate": 5.982e-06, - "loss": 17.9223, + "loss": 63973.4437, "step": 29910 }, { "epoch": 0.06044029298997645, - "grad_norm": 179.78720092773438, + "grad_norm": 35992.70703125, "learning_rate": 5.984000000000001e-06, - "loss": 19.886, + "loss": 85525.3062, "step": 29920 }, { "epoch": 0.06046049362266026, - "grad_norm": 241.3793487548828, + "grad_norm": 57890.7578125, "learning_rate": 5.986000000000001e-06, - "loss": 25.6962, + "loss": 242703.35, "step": 29930 }, { "epoch": 0.06048069425534408, - "grad_norm": 481.5415954589844, + "grad_norm": 347960.71875, "learning_rate": 5.988e-06, - "loss": 28.2497, + "loss": 265654.1, "step": 29940 }, { "epoch": 0.060500894888027894, - "grad_norm": 267.5933837890625, + "grad_norm": 18749.458984375, "learning_rate": 5.99e-06, - "loss": 36.0274, + "loss": 127266.4875, "step": 29950 }, { "epoch": 0.06052109552071171, - "grad_norm": 139.83847045898438, + "grad_norm": 14025.875, "learning_rate": 5.992e-06, - "loss": 23.3543, + "loss": 94280.9, "step": 29960 }, { "epoch": 0.06054129615339553, - "grad_norm": 136.5538787841797, + "grad_norm": 1073.66845703125, "learning_rate": 5.9940000000000005e-06, - "loss": 26.1079, + "loss": 93428.15, "step": 29970 }, { "epoch": 0.06056149678607934, - "grad_norm": 284.79254150390625, + "grad_norm": 11852.375, "learning_rate": 5.996000000000001e-06, - "loss": 21.3011, + "loss": 104589.5375, "step": 29980 }, { "epoch": 0.06058169741876315, - "grad_norm": 660.3438720703125, + "grad_norm": 345662.5625, "learning_rate": 5.998000000000001e-06, - "loss": 29.8056, + "loss": 145661.8125, "step": 29990 }, { "epoch": 0.06060189805144697, - "grad_norm": 293.27874755859375, + "grad_norm": 148838.265625, "learning_rate": 6e-06, - "loss": 32.3305, + "loss": 114709.6375, "step": 30000 }, { "epoch": 0.060622098684130786, - "grad_norm": 114.87731170654297, + "grad_norm": 127840.46875, "learning_rate": 6.002e-06, - "loss": 31.125, + "loss": 199820.925, "step": 30010 }, { "epoch": 0.060642299316814606, - "grad_norm": 225.98101806640625, + "grad_norm": 40815.19140625, "learning_rate": 6.004000000000001e-06, - "loss": 29.0658, + "loss": 98558.6875, "step": 30020 }, { "epoch": 0.06066249994949842, - "grad_norm": 150.27601623535156, + "grad_norm": 7691.06494140625, "learning_rate": 6.006000000000001e-06, - "loss": 25.6947, + "loss": 181201.4625, "step": 30030 }, { "epoch": 0.06068270058218223, - "grad_norm": 497.3393249511719, + "grad_norm": 451414.65625, "learning_rate": 6.008000000000001e-06, - "loss": 27.0551, + "loss": 138293.175, "step": 30040 }, { "epoch": 0.06070290121486605, - "grad_norm": 457.775146484375, + "grad_norm": 153086.34375, "learning_rate": 6.01e-06, - "loss": 27.2113, + "loss": 140538.9875, "step": 30050 }, { "epoch": 0.060723101847549865, - "grad_norm": 251.35533142089844, + "grad_norm": 126076.9140625, "learning_rate": 6.012e-06, - "loss": 35.466, + "loss": 158798.35, "step": 30060 }, { "epoch": 0.06074330248023368, - "grad_norm": 185.10580444335938, + "grad_norm": 54637.58984375, "learning_rate": 6.014000000000001e-06, - "loss": 26.3948, + "loss": 170384.5, "step": 30070 }, { "epoch": 0.0607635031129175, - "grad_norm": 156.08596801757812, + "grad_norm": 15011.2529296875, "learning_rate": 6.0160000000000005e-06, - "loss": 24.4032, + "loss": 107910.75, "step": 30080 }, { "epoch": 0.06078370374560131, - "grad_norm": 404.8869934082031, + "grad_norm": 437136.65625, "learning_rate": 6.018000000000001e-06, - "loss": 28.049, + "loss": 225953.3, "step": 30090 }, { "epoch": 0.06080390437828513, - "grad_norm": 307.8752136230469, + "grad_norm": 439568.625, "learning_rate": 6.02e-06, - "loss": 31.1489, + "loss": 295911.65, "step": 30100 }, { "epoch": 0.060824105010968944, - "grad_norm": 751.0390625, + "grad_norm": 181363.453125, "learning_rate": 6.022e-06, - "loss": 32.456, + "loss": 223589.0, "step": 30110 }, { "epoch": 0.06084430564365276, - "grad_norm": 133.58175659179688, + "grad_norm": 6156.3623046875, "learning_rate": 6.024000000000001e-06, - "loss": 19.7639, + "loss": 85443.25, "step": 30120 }, { "epoch": 0.06086450627633658, - "grad_norm": 276.15557861328125, + "grad_norm": 229434.3125, "learning_rate": 6.026e-06, - "loss": 44.7161, + "loss": 375379.25, "step": 30130 }, { "epoch": 0.06088470690902039, - "grad_norm": 220.90367126464844, + "grad_norm": 45349.6953125, "learning_rate": 6.0280000000000006e-06, - "loss": 25.6417, + "loss": 126191.7, "step": 30140 }, { "epoch": 0.060904907541704204, - "grad_norm": 304.4405822753906, + "grad_norm": 224654.21875, "learning_rate": 6.030000000000001e-06, - "loss": 35.9571, + "loss": 203911.1, "step": 30150 }, { "epoch": 0.060925108174388024, - "grad_norm": 359.6705627441406, + "grad_norm": 213854.640625, "learning_rate": 6.032e-06, - "loss": 39.6983, + "loss": 148548.6375, "step": 30160 }, { "epoch": 0.06094530880707184, - "grad_norm": 237.6664581298828, + "grad_norm": 252320.71875, "learning_rate": 6.034000000000001e-06, - "loss": 22.8935, + "loss": 155362.45, "step": 30170 }, { "epoch": 0.06096550943975566, - "grad_norm": 879.8140869140625, + "grad_norm": 338844.59375, "learning_rate": 6.036000000000001e-06, - "loss": 41.0201, + "loss": 194579.725, "step": 30180 }, { "epoch": 0.06098571007243947, - "grad_norm": 454.46734619140625, + "grad_norm": 54975.97265625, "learning_rate": 6.0380000000000005e-06, - "loss": 27.0023, + "loss": 134237.225, "step": 30190 }, { "epoch": 0.06100591070512328, - "grad_norm": 564.1406860351562, + "grad_norm": 889272.75, "learning_rate": 6.040000000000001e-06, - "loss": 40.1812, + "loss": 290269.7, "step": 30200 }, { "epoch": 0.0610261113378071, - "grad_norm": 5.1078667640686035, + "grad_norm": 6385.18359375, "learning_rate": 6.042e-06, - "loss": 24.9429, + "loss": 197857.05, "step": 30210 }, { "epoch": 0.061046311970490916, - "grad_norm": 154.20806884765625, + "grad_norm": 7812.51025390625, "learning_rate": 6.044000000000001e-06, - "loss": 18.952, + "loss": 171272.7, "step": 30220 }, { "epoch": 0.06106651260317473, - "grad_norm": 340.8790588378906, + "grad_norm": 87743.953125, "learning_rate": 6.046000000000001e-06, - "loss": 34.2021, + "loss": 324428.35, "step": 30230 }, { "epoch": 0.06108671323585855, - "grad_norm": 172.2555389404297, + "grad_norm": 367818.4375, "learning_rate": 6.048e-06, - "loss": 30.5673, + "loss": 193538.525, "step": 30240 }, { "epoch": 0.06110691386854236, - "grad_norm": 318.4809265136719, + "grad_norm": 186327.484375, "learning_rate": 6.0500000000000005e-06, - "loss": 13.6786, + "loss": 53157.25, "step": 30250 }, { "epoch": 0.06112711450122618, - "grad_norm": 360.1896667480469, + "grad_norm": 66420.1875, "learning_rate": 6.052e-06, - "loss": 24.7746, + "loss": 133389.4375, "step": 30260 }, { "epoch": 0.061147315133909995, - "grad_norm": 319.43060302734375, + "grad_norm": 764605.5625, "learning_rate": 6.054000000000001e-06, - "loss": 41.7367, + "loss": 292416.55, "step": 30270 }, { "epoch": 0.06116751576659381, - "grad_norm": 565.7113647460938, + "grad_norm": 135859.9375, "learning_rate": 6.056000000000001e-06, - "loss": 26.4553, + "loss": 82098.1438, "step": 30280 }, { "epoch": 0.06118771639927763, - "grad_norm": 480.1114501953125, + "grad_norm": 265722.21875, "learning_rate": 6.058e-06, - "loss": 20.8778, + "loss": 133739.375, "step": 30290 }, { "epoch": 0.06120791703196144, - "grad_norm": 239.09214782714844, + "grad_norm": 33968.53515625, "learning_rate": 6.0600000000000004e-06, - "loss": 41.7371, + "loss": 352169.425, "step": 30300 }, { "epoch": 0.061228117664645254, - "grad_norm": 453.1701965332031, + "grad_norm": 253723.9375, "learning_rate": 6.062e-06, - "loss": 52.8978, + "loss": 273077.85, "step": 30310 }, { "epoch": 0.061248318297329074, - "grad_norm": 503.0354309082031, + "grad_norm": 133485.796875, "learning_rate": 6.064000000000001e-06, - "loss": 31.5805, + "loss": 175618.225, "step": 30320 }, { "epoch": 0.06126851893001289, - "grad_norm": 299.6436767578125, + "grad_norm": 98015.0, "learning_rate": 6.066000000000001e-06, - "loss": 18.5376, + "loss": 159522.6125, "step": 30330 }, { "epoch": 0.06128871956269671, - "grad_norm": 326.1717224121094, + "grad_norm": 329543.625, "learning_rate": 6.068e-06, - "loss": 39.5791, + "loss": 249303.15, "step": 30340 }, { "epoch": 0.06130892019538052, - "grad_norm": 107.300048828125, + "grad_norm": 4209.78369140625, "learning_rate": 6.07e-06, - "loss": 26.4288, + "loss": 157708.8, "step": 30350 }, { "epoch": 0.06132912082806433, - "grad_norm": 548.6287841796875, + "grad_norm": 435422.5, "learning_rate": 6.0720000000000005e-06, - "loss": 27.0214, + "loss": 198819.5125, "step": 30360 }, { "epoch": 0.06134932146074815, - "grad_norm": 221.74899291992188, + "grad_norm": 36839.8828125, "learning_rate": 6.074000000000001e-06, - "loss": 33.1325, + "loss": 82601.0125, "step": 30370 }, { "epoch": 0.061369522093431966, - "grad_norm": 245.58372497558594, + "grad_norm": 167215.1875, "learning_rate": 6.076000000000001e-06, - "loss": 23.2985, + "loss": 100420.4812, "step": 30380 }, { "epoch": 0.06138972272611578, - "grad_norm": 458.6068115234375, + "grad_norm": 351291.625, "learning_rate": 6.078000000000001e-06, - "loss": 32.0391, + "loss": 120991.7, "step": 30390 }, { "epoch": 0.0614099233587996, - "grad_norm": 251.84043884277344, + "grad_norm": 103467.96875, "learning_rate": 6.08e-06, - "loss": 39.0284, + "loss": 306792.925, "step": 30400 }, { "epoch": 0.06143012399148341, - "grad_norm": 311.1361389160156, + "grad_norm": 167244.78125, "learning_rate": 6.082e-06, - "loss": 28.016, + "loss": 123972.85, "step": 30410 }, { "epoch": 0.061450324624167225, - "grad_norm": 208.8995361328125, + "grad_norm": 45425.0625, "learning_rate": 6.084000000000001e-06, - "loss": 38.4094, + "loss": 280787.275, "step": 30420 }, { "epoch": 0.061470525256851045, - "grad_norm": 204.97857666015625, + "grad_norm": 72201.4140625, "learning_rate": 6.086000000000001e-06, - "loss": 29.982, + "loss": 109172.6125, "step": 30430 }, { "epoch": 0.06149072588953486, - "grad_norm": 351.9819030761719, + "grad_norm": 208189.921875, "learning_rate": 6.088000000000001e-06, - "loss": 43.009, + "loss": 190053.275, "step": 30440 }, { "epoch": 0.06151092652221868, "grad_norm": 0.0, "learning_rate": 6.09e-06, - "loss": 16.1789, + "loss": 68029.3875, "step": 30450 }, { "epoch": 0.06153112715490249, - "grad_norm": 102.66375732421875, + "grad_norm": 90871.703125, "learning_rate": 6.092e-06, - "loss": 15.9975, + "loss": 47077.25, "step": 30460 }, { "epoch": 0.061551327787586305, - "grad_norm": 301.2502136230469, + "grad_norm": 46340.84375, "learning_rate": 6.094000000000001e-06, - "loss": 24.2382, + "loss": 67104.575, "step": 30470 }, { "epoch": 0.061571528420270125, - "grad_norm": 441.5412902832031, + "grad_norm": 322598.5, "learning_rate": 6.096000000000001e-06, - "loss": 22.9086, + "loss": 90690.0375, "step": 30480 }, { "epoch": 0.06159172905295394, - "grad_norm": 298.14483642578125, + "grad_norm": 20464.759765625, "learning_rate": 6.098000000000001e-06, - "loss": 42.6522, + "loss": 309951.775, "step": 30490 }, { "epoch": 0.06161192968563775, - "grad_norm": 222.50039672851562, + "grad_norm": 98717.015625, "learning_rate": 6.1e-06, - "loss": 34.7548, + "loss": 285814.825, "step": 30500 }, { "epoch": 0.06163213031832157, - "grad_norm": 199.57862854003906, + "grad_norm": 19187.4921875, "learning_rate": 6.102e-06, - "loss": 16.1869, + "loss": 84678.775, "step": 30510 }, { "epoch": 0.061652330951005384, - "grad_norm": 115.16866302490234, + "grad_norm": 18888.814453125, "learning_rate": 6.104000000000001e-06, - "loss": 21.8251, + "loss": 169734.2875, "step": 30520 }, { "epoch": 0.061672531583689204, - "grad_norm": 238.4964599609375, + "grad_norm": 57794.4375, "learning_rate": 6.1060000000000005e-06, - "loss": 32.4652, + "loss": 151463.5125, "step": 30530 }, { "epoch": 0.06169273221637302, - "grad_norm": 505.0570068359375, + "grad_norm": 900668.4375, "learning_rate": 6.108000000000001e-06, - "loss": 44.994, + "loss": 277162.675, "step": 30540 }, { "epoch": 0.06171293284905683, - "grad_norm": 402.1174011230469, + "grad_norm": 248063.796875, "learning_rate": 6.110000000000001e-06, - "loss": 25.0367, + "loss": 232524.5, "step": 30550 }, { "epoch": 0.06173313348174065, - "grad_norm": 77.92440795898438, + "grad_norm": 233626.8125, "learning_rate": 6.112e-06, - "loss": 24.3771, + "loss": 230017.85, "step": 30560 }, { "epoch": 0.06175333411442446, - "grad_norm": 43.44609832763672, + "grad_norm": 9860.8974609375, "learning_rate": 6.114000000000001e-06, - "loss": 16.8635, + "loss": 63564.0125, "step": 30570 }, { "epoch": 0.061773534747108276, - "grad_norm": 104.65845489501953, + "grad_norm": 36585.12890625, "learning_rate": 6.116000000000001e-06, - "loss": 25.8675, + "loss": 115814.025, "step": 30580 }, { "epoch": 0.061793735379792096, - "grad_norm": 294.6768493652344, + "grad_norm": 5457.92626953125, "learning_rate": 6.1180000000000005e-06, - "loss": 36.6797, + "loss": 307028.525, "step": 30590 }, { "epoch": 0.06181393601247591, - "grad_norm": 674.5152587890625, + "grad_norm": 1127463.125, "learning_rate": 6.120000000000001e-06, - "loss": 22.9528, + "loss": 171695.975, "step": 30600 }, { "epoch": 0.06183413664515973, - "grad_norm": 177.4771728515625, + "grad_norm": 7491.70849609375, "learning_rate": 6.122e-06, - "loss": 31.4151, + "loss": 138004.1, "step": 30610 }, { "epoch": 0.06185433727784354, - "grad_norm": 151.2828369140625, + "grad_norm": 41865.703125, "learning_rate": 6.124000000000001e-06, - "loss": 33.3632, + "loss": 202505.85, "step": 30620 }, { "epoch": 0.061874537910527355, - "grad_norm": 82.88737487792969, + "grad_norm": 4066.54736328125, "learning_rate": 6.126000000000001e-06, - "loss": 25.3052, + "loss": 202129.4375, "step": 30630 }, { "epoch": 0.061894738543211175, - "grad_norm": 496.4389343261719, + "grad_norm": 380730.9375, "learning_rate": 6.1280000000000005e-06, - "loss": 31.512, + "loss": 171513.0625, "step": 30640 }, { "epoch": 0.06191493917589499, - "grad_norm": 372.21466064453125, + "grad_norm": 155435.328125, "learning_rate": 6.130000000000001e-06, - "loss": 18.1448, + "loss": 101270.9688, "step": 30650 }, { "epoch": 0.0619351398085788, - "grad_norm": 576.8009033203125, + "grad_norm": 674800.6875, "learning_rate": 6.132e-06, - "loss": 23.0323, + "loss": 154738.175, "step": 30660 }, { "epoch": 0.06195534044126262, - "grad_norm": 256.25958251953125, + "grad_norm": 14235.0673828125, "learning_rate": 6.134e-06, - "loss": 20.0009, + "loss": 62817.9375, "step": 30670 }, { "epoch": 0.061975541073946434, - "grad_norm": 240.95333862304688, + "grad_norm": 98217.015625, "learning_rate": 6.136000000000001e-06, - "loss": 53.408, + "loss": 262190.425, "step": 30680 }, { "epoch": 0.061995741706630254, - "grad_norm": 934.2208862304688, + "grad_norm": 1103945.125, "learning_rate": 6.138e-06, - "loss": 28.7069, + "loss": 252288.4, "step": 30690 }, { "epoch": 0.06201594233931407, - "grad_norm": 139.9613494873047, + "grad_norm": 18004.037109375, "learning_rate": 6.1400000000000005e-06, - "loss": 28.2148, + "loss": 219710.125, "step": 30700 }, { "epoch": 0.06203614297199788, - "grad_norm": 492.6600646972656, + "grad_norm": 421875.1875, "learning_rate": 6.142e-06, - "loss": 28.5997, + "loss": 167893.2, "step": 30710 }, { "epoch": 0.0620563436046817, - "grad_norm": 224.6328582763672, + "grad_norm": 68709.8828125, "learning_rate": 6.144e-06, - "loss": 16.2472, + "loss": 97726.0625, "step": 30720 }, { "epoch": 0.062076544237365514, - "grad_norm": 581.1234130859375, + "grad_norm": 1113862.25, "learning_rate": 6.146000000000001e-06, - "loss": 20.6001, + "loss": 236787.1, "step": 30730 }, { "epoch": 0.06209674487004933, - "grad_norm": 419.7743835449219, + "grad_norm": 190479.4375, "learning_rate": 6.148e-06, - "loss": 30.4461, + "loss": 250967.175, "step": 30740 }, { "epoch": 0.06211694550273315, - "grad_norm": 193.66250610351562, + "grad_norm": 4935.890625, "learning_rate": 6.15e-06, - "loss": 28.1716, + "loss": 147999.6875, "step": 30750 }, { "epoch": 0.06213714613541696, - "grad_norm": 113.6976318359375, + "grad_norm": 9646.2734375, "learning_rate": 6.1520000000000006e-06, - "loss": 20.0639, + "loss": 150202.25, "step": 30760 }, { "epoch": 0.06215734676810078, - "grad_norm": 338.61114501953125, + "grad_norm": 135048.328125, "learning_rate": 6.154e-06, - "loss": 34.2324, + "loss": 238662.425, "step": 30770 }, { "epoch": 0.06217754740078459, - "grad_norm": 346.8319396972656, + "grad_norm": 327993.34375, "learning_rate": 6.156000000000001e-06, - "loss": 25.6069, + "loss": 199762.375, "step": 30780 }, { "epoch": 0.062197748033468406, - "grad_norm": 170.17576599121094, + "grad_norm": 9384.060546875, "learning_rate": 6.158000000000001e-06, - "loss": 37.54, + "loss": 171249.0375, "step": 30790 }, { "epoch": 0.062217948666152226, - "grad_norm": 621.9466552734375, + "grad_norm": 501698.28125, "learning_rate": 6.16e-06, - "loss": 30.7512, + "loss": 246662.675, "step": 30800 }, { "epoch": 0.06223814929883604, - "grad_norm": 348.8072204589844, + "grad_norm": 144729.3125, "learning_rate": 6.1620000000000005e-06, - "loss": 31.0073, + "loss": 154332.35, "step": 30810 }, { "epoch": 0.06225834993151985, - "grad_norm": 85.53361511230469, + "grad_norm": 31059.595703125, "learning_rate": 6.164e-06, - "loss": 38.4568, + "loss": 259896.85, "step": 30820 }, { "epoch": 0.06227855056420367, - "grad_norm": 825.8348999023438, + "grad_norm": 1389556.75, "learning_rate": 6.166000000000001e-06, - "loss": 33.1891, + "loss": 318008.125, "step": 30830 }, { "epoch": 0.062298751196887485, - "grad_norm": 377.9906921386719, + "grad_norm": 130884.1640625, "learning_rate": 6.168000000000001e-06, - "loss": 21.0851, + "loss": 81786.475, "step": 30840 }, { "epoch": 0.062318951829571305, - "grad_norm": 111.96500396728516, + "grad_norm": 12441.1650390625, "learning_rate": 6.17e-06, - "loss": 22.7266, + "loss": 110150.9125, "step": 30850 }, { "epoch": 0.06233915246225512, - "grad_norm": 224.50840759277344, + "grad_norm": 1809.977783203125, "learning_rate": 6.172e-06, - "loss": 20.1645, + "loss": 128926.5625, "step": 30860 }, { "epoch": 0.06235935309493893, - "grad_norm": 263.6773986816406, + "grad_norm": 58516.6171875, "learning_rate": 6.174e-06, - "loss": 22.9404, + "loss": 152078.375, "step": 30870 }, { "epoch": 0.06237955372762275, - "grad_norm": 698.050537109375, + "grad_norm": 1611979.625, "learning_rate": 6.176000000000001e-06, - "loss": 59.9724, + "loss": 557091.05, "step": 30880 }, { "epoch": 0.062399754360306564, - "grad_norm": 265.96722412109375, + "grad_norm": 240127.8125, "learning_rate": 6.178000000000001e-06, - "loss": 16.1057, + "loss": 51089.8688, "step": 30890 }, { "epoch": 0.06241995499299038, - "grad_norm": 248.72235107421875, + "grad_norm": 11539.6845703125, "learning_rate": 6.18e-06, - "loss": 36.1049, + "loss": 233247.6, "step": 30900 }, { "epoch": 0.0624401556256742, - "grad_norm": 650.3839721679688, + "grad_norm": 101372.7734375, "learning_rate": 6.182e-06, - "loss": 26.4691, + "loss": 135705.6125, "step": 30910 }, { "epoch": 0.06246035625835801, - "grad_norm": 492.22845458984375, + "grad_norm": 187066.09375, "learning_rate": 6.184e-06, - "loss": 15.0181, + "loss": 74687.0437, "step": 30920 }, { "epoch": 0.06248055689104183, - "grad_norm": 378.4203186035156, + "grad_norm": 362081.84375, "learning_rate": 6.1860000000000006e-06, - "loss": 36.7387, + "loss": 192833.5, "step": 30930 }, { "epoch": 0.06250075752372564, - "grad_norm": 249.523681640625, + "grad_norm": 5054.5966796875, "learning_rate": 6.188000000000001e-06, - "loss": 28.3755, + "loss": 117396.1625, "step": 30940 }, { "epoch": 0.06252095815640946, - "grad_norm": 113.86726379394531, + "grad_norm": 48891.203125, "learning_rate": 6.190000000000001e-06, - "loss": 30.0943, + "loss": 155562.1, "step": 30950 }, { "epoch": 0.06254115878909328, - "grad_norm": 415.4285888671875, + "grad_norm": 222119.65625, "learning_rate": 6.192e-06, - "loss": 20.979, + "loss": 70246.0375, "step": 30960 }, { "epoch": 0.0625613594217771, - "grad_norm": 153.79115295410156, + "grad_norm": 23089.513671875, "learning_rate": 6.194e-06, - "loss": 34.6167, + "loss": 168576.1875, "step": 30970 }, { "epoch": 0.0625815600544609, - "grad_norm": 390.0386047363281, + "grad_norm": 309073.34375, "learning_rate": 6.196000000000001e-06, - "loss": 37.6308, + "loss": 232390.575, "step": 30980 }, { "epoch": 0.06260176068714472, - "grad_norm": 786.6076049804688, + "grad_norm": 145489.9375, "learning_rate": 6.198000000000001e-06, - "loss": 19.3254, + "loss": 160943.425, "step": 30990 }, { "epoch": 0.06262196131982854, - "grad_norm": 593.9253540039062, + "grad_norm": 5509.23388671875, "learning_rate": 6.200000000000001e-06, - "loss": 45.8376, + "loss": 230726.875, "step": 31000 }, { "epoch": 0.06264216195251235, - "grad_norm": 267.5792236328125, + "grad_norm": 3584.968017578125, "learning_rate": 6.202e-06, - "loss": 19.2077, + "loss": 115250.275, "step": 31010 }, { "epoch": 0.06266236258519617, - "grad_norm": 1246.27587890625, + "grad_norm": 1371825.875, "learning_rate": 6.204e-06, - "loss": 29.5188, + "loss": 243036.425, "step": 31020 }, { "epoch": 0.06268256321787999, - "grad_norm": 200.8495330810547, + "grad_norm": 53440.70703125, "learning_rate": 6.206000000000001e-06, - "loss": 18.708, + "loss": 90375.425, "step": 31030 }, { "epoch": 0.0627027638505638, - "grad_norm": 508.18182373046875, + "grad_norm": 403394.09375, "learning_rate": 6.2080000000000005e-06, - "loss": 31.8805, + "loss": 353773.5, "step": 31040 }, { "epoch": 0.06272296448324761, - "grad_norm": 369.0675048828125, + "grad_norm": 243985.375, "learning_rate": 6.210000000000001e-06, - "loss": 25.7357, + "loss": 111164.4375, "step": 31050 }, { "epoch": 0.06274316511593143, - "grad_norm": 474.58843994140625, + "grad_norm": 565299.5, "learning_rate": 6.212e-06, - "loss": 38.1484, + "loss": 305006.9, "step": 31060 }, { "epoch": 0.06276336574861524, - "grad_norm": 140.6695556640625, + "grad_norm": 20234.97265625, "learning_rate": 6.214e-06, - "loss": 26.6929, + "loss": 190864.5375, "step": 31070 }, { "epoch": 0.06278356638129906, - "grad_norm": 300.88787841796875, + "grad_norm": 176376.453125, "learning_rate": 6.216000000000001e-06, - "loss": 17.8964, + "loss": 89856.75, "step": 31080 }, { "epoch": 0.06280376701398288, - "grad_norm": 99.71508026123047, + "grad_norm": 23408.58203125, "learning_rate": 6.2180000000000004e-06, - "loss": 24.024, + "loss": 236387.25, "step": 31090 }, { "epoch": 0.06282396764666669, - "grad_norm": 702.80908203125, + "grad_norm": 1087384.25, "learning_rate": 6.220000000000001e-06, - "loss": 32.6589, + "loss": 311187.8, "step": 31100 }, { "epoch": 0.0628441682793505, - "grad_norm": 196.9779510498047, + "grad_norm": 38666.11328125, "learning_rate": 6.222e-06, - "loss": 31.1702, + "loss": 206066.225, "step": 31110 }, { "epoch": 0.06286436891203433, - "grad_norm": 408.7449035644531, + "grad_norm": 89418.8203125, "learning_rate": 6.224e-06, - "loss": 35.4531, + "loss": 209434.175, "step": 31120 }, { "epoch": 0.06288456954471815, - "grad_norm": 805.6083984375, + "grad_norm": 647701.375, "learning_rate": 6.226000000000001e-06, - "loss": 47.5391, + "loss": 283103.375, "step": 31130 }, { "epoch": 0.06290477017740195, - "grad_norm": 193.94830322265625, + "grad_norm": 38975.53125, "learning_rate": 6.228e-06, - "loss": 21.1073, + "loss": 144790.625, "step": 31140 }, { "epoch": 0.06292497081008577, - "grad_norm": 92.54554748535156, + "grad_norm": 29912.81640625, "learning_rate": 6.2300000000000005e-06, - "loss": 20.5223, + "loss": 78643.4937, "step": 31150 }, { "epoch": 0.06294517144276959, - "grad_norm": 220.41717529296875, + "grad_norm": 90649.03125, "learning_rate": 6.232000000000001e-06, - "loss": 23.0221, + "loss": 241628.425, "step": 31160 }, { "epoch": 0.0629653720754534, - "grad_norm": 487.35113525390625, + "grad_norm": 281632.625, "learning_rate": 6.234e-06, - "loss": 46.811, + "loss": 263893.125, "step": 31170 }, { "epoch": 0.06298557270813722, - "grad_norm": 244.32127380371094, + "grad_norm": 15110.9775390625, "learning_rate": 6.236000000000001e-06, - "loss": 38.5928, + "loss": 218377.45, "step": 31180 }, { "epoch": 0.06300577334082104, - "grad_norm": 713.2606201171875, + "grad_norm": 582664.125, "learning_rate": 6.238000000000001e-06, - "loss": 34.265, + "loss": 288638.45, "step": 31190 }, { "epoch": 0.06302597397350485, - "grad_norm": 602.4276123046875, + "grad_norm": 456973.34375, "learning_rate": 6.24e-06, - "loss": 44.2316, + "loss": 317867.075, "step": 31200 }, { "epoch": 0.06304617460618867, - "grad_norm": 248.2340850830078, + "grad_norm": 251487.46875, "learning_rate": 6.2420000000000005e-06, - "loss": 15.4166, + "loss": 89111.975, "step": 31210 }, { "epoch": 0.06306637523887249, - "grad_norm": 357.87896728515625, + "grad_norm": 256327.25, "learning_rate": 6.244e-06, - "loss": 38.5904, + "loss": 210570.925, "step": 31220 }, { "epoch": 0.06308657587155629, - "grad_norm": 204.51394653320312, + "grad_norm": 27508.275390625, "learning_rate": 6.246000000000001e-06, - "loss": 28.7903, + "loss": 181278.0, "step": 31230 }, { "epoch": 0.06310677650424011, - "grad_norm": 668.6724243164062, + "grad_norm": 96134.5703125, "learning_rate": 6.248000000000001e-06, - "loss": 30.186, + "loss": 305221.5, "step": 31240 }, { "epoch": 0.06312697713692393, - "grad_norm": 209.66392517089844, + "grad_norm": 8087.7490234375, "learning_rate": 6.25e-06, - "loss": 30.3958, + "loss": 217536.125, "step": 31250 }, { "epoch": 0.06314717776960774, - "grad_norm": 170.11233520507812, + "grad_norm": 28421.01171875, "learning_rate": 6.2520000000000004e-06, - "loss": 27.4409, + "loss": 137295.175, "step": 31260 }, { "epoch": 0.06316737840229156, "grad_norm": 0.0, "learning_rate": 6.254e-06, - "loss": 35.9831, + "loss": 182738.15, "step": 31270 }, { "epoch": 0.06318757903497538, - "grad_norm": 345.4881591796875, + "grad_norm": 326572.28125, "learning_rate": 6.256000000000001e-06, - "loss": 18.772, + "loss": 164440.125, "step": 31280 }, { "epoch": 0.0632077796676592, - "grad_norm": 433.49755859375, + "grad_norm": 163298.84375, "learning_rate": 6.258000000000001e-06, - "loss": 32.9236, + "loss": 224470.8, "step": 31290 }, { "epoch": 0.063227980300343, - "grad_norm": 326.5909729003906, + "grad_norm": 74394.4296875, "learning_rate": 6.26e-06, - "loss": 20.2967, + "loss": 103181.1313, "step": 31300 }, { "epoch": 0.06324818093302682, - "grad_norm": 27.680173873901367, + "grad_norm": 211710.15625, "learning_rate": 6.262e-06, - "loss": 33.7101, + "loss": 236723.35, "step": 31310 }, { "epoch": 0.06326838156571064, - "grad_norm": 391.7144470214844, + "grad_norm": 75614.1640625, "learning_rate": 6.264e-06, - "loss": 41.8141, + "loss": 87532.5437, "step": 31320 }, { "epoch": 0.06328858219839445, - "grad_norm": 102.66294860839844, + "grad_norm": 3762.15234375, "learning_rate": 6.266000000000001e-06, - "loss": 27.3469, + "loss": 138109.8625, "step": 31330 }, { "epoch": 0.06330878283107827, - "grad_norm": 245.99940490722656, + "grad_norm": 44166.80859375, "learning_rate": 6.268000000000001e-06, - "loss": 21.1016, + "loss": 60582.9938, "step": 31340 }, { "epoch": 0.06332898346376209, - "grad_norm": 250.53732299804688, + "grad_norm": 142179.609375, "learning_rate": 6.27e-06, - "loss": 43.6522, + "loss": 233566.8, "step": 31350 }, { "epoch": 0.0633491840964459, - "grad_norm": 357.2115783691406, + "grad_norm": 81077.7890625, "learning_rate": 6.272e-06, - "loss": 25.7863, + "loss": 57592.025, "step": 31360 }, { "epoch": 0.06336938472912972, - "grad_norm": 429.6044006347656, + "grad_norm": 80475.9921875, "learning_rate": 6.274e-06, - "loss": 28.0361, + "loss": 257305.7, "step": 31370 }, { "epoch": 0.06338958536181354, - "grad_norm": 306.50421142578125, + "grad_norm": 27382.869140625, "learning_rate": 6.2760000000000006e-06, - "loss": 33.9684, + "loss": 330131.325, "step": 31380 }, { "epoch": 0.06340978599449734, - "grad_norm": 193.95425415039062, + "grad_norm": 12482.43359375, "learning_rate": 6.278000000000001e-06, - "loss": 34.4685, + "loss": 220547.3, "step": 31390 }, { "epoch": 0.06342998662718116, - "grad_norm": 387.4499816894531, + "grad_norm": 208194.15625, "learning_rate": 6.280000000000001e-06, - "loss": 23.8366, + "loss": 181153.05, "step": 31400 }, { "epoch": 0.06345018725986498, - "grad_norm": 345.3521728515625, + "grad_norm": 167051.78125, "learning_rate": 6.282e-06, - "loss": 27.6357, + "loss": 131457.7625, "step": 31410 }, { "epoch": 0.06347038789254879, - "grad_norm": 182.93849182128906, + "grad_norm": 30719.935546875, "learning_rate": 6.284e-06, - "loss": 22.5396, + "loss": 126827.4, "step": 31420 }, { "epoch": 0.06349058852523261, - "grad_norm": 446.3598937988281, + "grad_norm": 18992.66796875, "learning_rate": 6.286000000000001e-06, - "loss": 29.8193, + "loss": 127005.025, "step": 31430 }, { "epoch": 0.06351078915791643, - "grad_norm": 258.4228515625, + "grad_norm": 277253.4375, "learning_rate": 6.288000000000001e-06, - "loss": 40.5369, + "loss": 281330.5, "step": 31440 }, { "epoch": 0.06353098979060025, - "grad_norm": 193.42092895507812, + "grad_norm": 87177.1328125, "learning_rate": 6.290000000000001e-06, - "loss": 24.5259, + "loss": 93424.4, "step": 31450 }, { "epoch": 0.06355119042328405, - "grad_norm": 681.1078491210938, + "grad_norm": 736196.6875, "learning_rate": 6.292e-06, - "loss": 26.9826, + "loss": 165271.4375, "step": 31460 }, { "epoch": 0.06357139105596787, - "grad_norm": 435.4959716796875, + "grad_norm": 731404.5, "learning_rate": 6.294e-06, - "loss": 39.3159, + "loss": 324867.125, "step": 31470 }, { "epoch": 0.0635915916886517, - "grad_norm": 251.05712890625, + "grad_norm": 20739.255859375, "learning_rate": 6.296000000000001e-06, - "loss": 29.1976, + "loss": 126275.0625, "step": 31480 }, { "epoch": 0.0636117923213355, - "grad_norm": 446.5157165527344, + "grad_norm": 100522.515625, "learning_rate": 6.2980000000000005e-06, - "loss": 32.6056, + "loss": 163605.6375, "step": 31490 }, { "epoch": 0.06363199295401932, - "grad_norm": 611.6644287109375, + "grad_norm": 566816.8125, "learning_rate": 6.300000000000001e-06, - "loss": 31.2819, + "loss": 205476.625, "step": 31500 }, { "epoch": 0.06365219358670314, - "grad_norm": 117.74989318847656, + "grad_norm": 130816.1015625, "learning_rate": 6.302e-06, - "loss": 30.5737, + "loss": 94033.6938, "step": 31510 }, { "epoch": 0.06367239421938695, - "grad_norm": 289.89520263671875, + "grad_norm": 108413.546875, "learning_rate": 6.304e-06, - "loss": 18.713, + "loss": 190934.4, "step": 31520 }, { "epoch": 0.06369259485207077, - "grad_norm": 272.0415954589844, + "grad_norm": 12946.9853515625, "learning_rate": 6.306000000000001e-06, - "loss": 37.7071, + "loss": 246883.95, "step": 31530 }, { "epoch": 0.06371279548475459, - "grad_norm": 110.60374450683594, + "grad_norm": 41191.44140625, "learning_rate": 6.308e-06, - "loss": 18.2154, + "loss": 235272.6, "step": 31540 }, { "epoch": 0.06373299611743839, - "grad_norm": 244.5205841064453, + "grad_norm": 43918.86328125, "learning_rate": 6.3100000000000006e-06, - "loss": 26.8455, + "loss": 237968.3, "step": 31550 }, { "epoch": 0.06375319675012221, - "grad_norm": 417.72833251953125, + "grad_norm": 361059.9375, "learning_rate": 6.312000000000001e-06, - "loss": 33.2194, + "loss": 271137.775, "step": 31560 }, { "epoch": 0.06377339738280603, - "grad_norm": 368.2216796875, + "grad_norm": 169638.84375, "learning_rate": 6.314e-06, - "loss": 14.8553, + "loss": 86292.0813, "step": 31570 }, { "epoch": 0.06379359801548984, - "grad_norm": 584.34375, + "grad_norm": 823989.5625, "learning_rate": 6.316000000000001e-06, - "loss": 36.6712, + "loss": 252563.95, "step": 31580 }, { "epoch": 0.06381379864817366, - "grad_norm": 198.16441345214844, + "grad_norm": 10224.8349609375, "learning_rate": 6.318000000000001e-06, - "loss": 19.6327, + "loss": 376063.7, "step": 31590 }, { "epoch": 0.06383399928085748, - "grad_norm": 81.60736846923828, + "grad_norm": 6741.89404296875, "learning_rate": 6.3200000000000005e-06, - "loss": 43.3337, + "loss": 190380.175, "step": 31600 }, { "epoch": 0.0638541999135413, - "grad_norm": 449.2483215332031, + "grad_norm": 50299.16796875, "learning_rate": 6.322000000000001e-06, - "loss": 32.1987, + "loss": 88610.9187, "step": 31610 }, { "epoch": 0.0638744005462251, - "grad_norm": 416.2035217285156, + "grad_norm": 60604.03125, "learning_rate": 6.324e-06, - "loss": 16.5998, + "loss": 97217.8687, "step": 31620 }, { "epoch": 0.06389460117890892, - "grad_norm": 381.83648681640625, + "grad_norm": 134546.703125, "learning_rate": 6.326000000000001e-06, - "loss": 26.0645, + "loss": 133855.275, "step": 31630 }, { "epoch": 0.06391480181159274, - "grad_norm": 440.89459228515625, + "grad_norm": 29550.451171875, "learning_rate": 6.328000000000001e-06, - "loss": 17.4892, + "loss": 192277.0875, "step": 31640 }, { "epoch": 0.06393500244427655, - "grad_norm": 371.1136169433594, + "grad_norm": 40917.62890625, "learning_rate": 6.33e-06, - "loss": 24.7386, + "loss": 85258.275, "step": 31650 }, { "epoch": 0.06395520307696037, - "grad_norm": 351.368896484375, + "grad_norm": 349167.84375, "learning_rate": 6.3320000000000005e-06, - "loss": 38.4869, + "loss": 226790.825, "step": 31660 }, { "epoch": 0.06397540370964419, - "grad_norm": 278.1321716308594, + "grad_norm": 40921.98046875, "learning_rate": 6.334e-06, - "loss": 6.4429, + "loss": 41322.0281, "step": 31670 }, { "epoch": 0.063995604342328, - "grad_norm": 1031.544677734375, + "grad_norm": 208556.5625, "learning_rate": 6.336000000000001e-06, - "loss": 36.5749, + "loss": 170925.0, "step": 31680 }, { "epoch": 0.06401580497501182, - "grad_norm": 260.3399658203125, + "grad_norm": 135812.0625, "learning_rate": 6.338000000000001e-06, - "loss": 28.7344, + "loss": 134401.35, "step": 31690 }, { "epoch": 0.06403600560769564, - "grad_norm": 286.3230285644531, + "grad_norm": 78824.3828125, "learning_rate": 6.34e-06, - "loss": 23.257, + "loss": 78624.25, "step": 31700 }, { "epoch": 0.06405620624037944, - "grad_norm": 395.49169921875, + "grad_norm": 127946.7109375, "learning_rate": 6.3420000000000004e-06, - "loss": 47.73, + "loss": 145832.925, "step": 31710 }, { "epoch": 0.06407640687306326, - "grad_norm": 172.02305603027344, + "grad_norm": 7959.28173828125, "learning_rate": 6.344e-06, - "loss": 25.7619, + "loss": 100740.3375, "step": 31720 }, { "epoch": 0.06409660750574708, - "grad_norm": 28.446199417114258, + "grad_norm": 5534.4365234375, "learning_rate": 6.346000000000001e-06, - "loss": 27.4512, + "loss": 106152.4875, "step": 31730 }, { "epoch": 0.06411680813843089, - "grad_norm": 358.0504150390625, + "grad_norm": 186797.359375, "learning_rate": 6.348000000000001e-06, - "loss": 34.4209, + "loss": 202033.8, "step": 31740 }, { "epoch": 0.06413700877111471, - "grad_norm": 215.0141143798828, + "grad_norm": 43973.83984375, "learning_rate": 6.35e-06, - "loss": 24.7617, + "loss": 120490.55, "step": 31750 }, { "epoch": 0.06415720940379853, - "grad_norm": 670.1936645507812, + "grad_norm": 218369.890625, "learning_rate": 6.352e-06, - "loss": 18.105, + "loss": 122690.0, "step": 31760 }, { "epoch": 0.06417741003648235, - "grad_norm": 350.1632385253906, + "grad_norm": 75523.8984375, "learning_rate": 6.3540000000000005e-06, - "loss": 51.2723, + "loss": 329505.65, "step": 31770 }, { "epoch": 0.06419761066916616, - "grad_norm": 300.8078308105469, + "grad_norm": 366409.78125, "learning_rate": 6.356000000000001e-06, - "loss": 48.8145, + "loss": 360498.725, "step": 31780 }, { "epoch": 0.06421781130184998, - "grad_norm": 225.35494995117188, + "grad_norm": 2882.16162109375, "learning_rate": 6.358000000000001e-06, - "loss": 20.5653, + "loss": 78475.3562, "step": 31790 }, { "epoch": 0.0642380119345338, - "grad_norm": 263.5152893066406, + "grad_norm": 31246.837890625, "learning_rate": 6.360000000000001e-06, - "loss": 34.9176, + "loss": 192743.2, "step": 31800 }, { "epoch": 0.0642582125672176, - "grad_norm": 33.31119155883789, + "grad_norm": 6409.38134765625, "learning_rate": 6.362e-06, - "loss": 23.7906, + "loss": 117869.8875, "step": 31810 }, { "epoch": 0.06427841319990142, - "grad_norm": 763.8698120117188, + "grad_norm": 130460.1796875, "learning_rate": 6.364e-06, - "loss": 23.5817, + "loss": 131798.5, "step": 31820 }, { "epoch": 0.06429861383258524, - "grad_norm": 389.0904235839844, + "grad_norm": 87989.6640625, "learning_rate": 6.366000000000001e-06, - "loss": 42.6483, + "loss": 408337.3, "step": 31830 }, { "epoch": 0.06431881446526905, - "grad_norm": 371.7533264160156, + "grad_norm": 40056.76953125, "learning_rate": 6.368000000000001e-06, - "loss": 9.3499, + "loss": 18008.0672, "step": 31840 }, { "epoch": 0.06433901509795287, - "grad_norm": 501.480712890625, + "grad_norm": 327730.90625, "learning_rate": 6.370000000000001e-06, - "loss": 38.5982, + "loss": 250575.325, "step": 31850 }, { "epoch": 0.06435921573063669, - "grad_norm": 628.2760620117188, + "grad_norm": 546247.1875, "learning_rate": 6.372e-06, - "loss": 19.5583, + "loss": 276536.95, "step": 31860 }, { "epoch": 0.0643794163633205, - "grad_norm": 151.403564453125, + "grad_norm": 140821.390625, "learning_rate": 6.374e-06, - "loss": 37.5179, + "loss": 300621.075, "step": 31870 }, { "epoch": 0.06439961699600431, - "grad_norm": 260.3165283203125, + "grad_norm": 37181.90234375, "learning_rate": 6.376e-06, - "loss": 16.6509, + "loss": 73139.9125, "step": 31880 }, { "epoch": 0.06441981762868813, - "grad_norm": 461.09088134765625, + "grad_norm": 85598.3046875, "learning_rate": 6.378000000000001e-06, - "loss": 19.993, + "loss": 60820.8375, "step": 31890 }, { "epoch": 0.06444001826137194, - "grad_norm": 135.558349609375, + "grad_norm": 2068.194580078125, "learning_rate": 6.380000000000001e-06, - "loss": 28.1471, + "loss": 192063.575, "step": 31900 }, { "epoch": 0.06446021889405576, - "grad_norm": 80.57373809814453, + "grad_norm": 20643.572265625, "learning_rate": 6.382e-06, - "loss": 27.6211, + "loss": 161173.9, "step": 31910 }, { "epoch": 0.06448041952673958, - "grad_norm": 178.90760803222656, + "grad_norm": 137224.046875, "learning_rate": 6.384e-06, - "loss": 28.3989, + "loss": 215637.275, "step": 31920 }, { "epoch": 0.0645006201594234, - "grad_norm": 351.40704345703125, + "grad_norm": 104440.03125, "learning_rate": 6.386e-06, - "loss": 27.7896, + "loss": 86463.0312, "step": 31930 }, { "epoch": 0.0645208207921072, - "grad_norm": 311.6580810546875, + "grad_norm": 91087.421875, "learning_rate": 6.3880000000000005e-06, - "loss": 41.1878, + "loss": 282848.1, "step": 31940 }, { "epoch": 0.06454102142479103, - "grad_norm": 308.5960388183594, + "grad_norm": 56480.45703125, "learning_rate": 6.390000000000001e-06, - "loss": 22.5207, + "loss": 73633.2875, "step": 31950 }, { "epoch": 0.06456122205747485, - "grad_norm": 228.21206665039062, + "grad_norm": 11084.3916015625, "learning_rate": 6.392000000000001e-06, - "loss": 27.872, + "loss": 406196.425, "step": 31960 }, { "epoch": 0.06458142269015865, - "grad_norm": 795.2063598632812, + "grad_norm": 1195291.375, "learning_rate": 6.394e-06, - "loss": 35.4486, + "loss": 234593.9, "step": 31970 }, { "epoch": 0.06460162332284247, - "grad_norm": 408.62799072265625, + "grad_norm": 631701.0, "learning_rate": 6.396e-06, - "loss": 21.6402, + "loss": 178052.05, "step": 31980 }, { "epoch": 0.06462182395552629, - "grad_norm": 280.4447021484375, + "grad_norm": 41306.3359375, "learning_rate": 6.398000000000001e-06, - "loss": 43.0083, + "loss": 280570.6, "step": 31990 }, { "epoch": 0.0646420245882101, - "grad_norm": 950.8634033203125, + "grad_norm": 704671.25, "learning_rate": 6.4000000000000006e-06, - "loss": 31.1654, + "loss": 165298.325, "step": 32000 }, { "epoch": 0.06466222522089392, - "grad_norm": 231.40867614746094, + "grad_norm": 74420.5078125, "learning_rate": 6.402000000000001e-06, - "loss": 28.7127, + "loss": 255886.7, "step": 32010 }, { "epoch": 0.06468242585357774, "grad_norm": 0.0, "learning_rate": 6.404e-06, - "loss": 38.1864, + "loss": 352671.175, "step": 32020 }, { "epoch": 0.06470262648626154, - "grad_norm": 947.143310546875, + "grad_norm": 840895.8125, "learning_rate": 6.406e-06, - "loss": 37.0088, + "loss": 151499.175, "step": 32030 }, { "epoch": 0.06472282711894536, - "grad_norm": 140.33401489257812, + "grad_norm": 13292.431640625, "learning_rate": 6.408000000000001e-06, - "loss": 30.061, + "loss": 243117.65, "step": 32040 }, { "epoch": 0.06474302775162918, - "grad_norm": 315.9873962402344, + "grad_norm": 9267.3408203125, "learning_rate": 6.4100000000000005e-06, - "loss": 35.755, + "loss": 285545.225, "step": 32050 }, { "epoch": 0.06476322838431299, - "grad_norm": 656.0057983398438, + "grad_norm": 743837.3125, "learning_rate": 6.412000000000001e-06, - "loss": 34.4754, + "loss": 231264.525, "step": 32060 }, { "epoch": 0.06478342901699681, - "grad_norm": 739.964111328125, + "grad_norm": 66548.109375, "learning_rate": 6.414e-06, - "loss": 42.6976, + "loss": 170789.575, "step": 32070 }, { "epoch": 0.06480362964968063, - "grad_norm": 267.9989013671875, + "grad_norm": 33734.90625, "learning_rate": 6.416e-06, - "loss": 41.9579, + "loss": 175448.0375, "step": 32080 }, { "epoch": 0.06482383028236445, - "grad_norm": 375.5707702636719, + "grad_norm": 103330.125, "learning_rate": 6.418000000000001e-06, - "loss": 21.0082, + "loss": 101833.9625, "step": 32090 }, { "epoch": 0.06484403091504826, - "grad_norm": 374.5641174316406, + "grad_norm": 335544.28125, "learning_rate": 6.42e-06, - "loss": 32.6614, + "loss": 243760.9, "step": 32100 }, { "epoch": 0.06486423154773208, - "grad_norm": 322.2223205566406, + "grad_norm": 100911.1640625, "learning_rate": 6.4220000000000005e-06, - "loss": 28.5509, + "loss": 133228.05, "step": 32110 }, { "epoch": 0.0648844321804159, - "grad_norm": 175.63418579101562, + "grad_norm": 36629.14453125, "learning_rate": 6.424e-06, - "loss": 18.4089, + "loss": 110606.4875, "step": 32120 }, { "epoch": 0.0649046328130997, - "grad_norm": 450.5000915527344, + "grad_norm": 7866.10888671875, "learning_rate": 6.426e-06, - "loss": 28.6365, + "loss": 186370.3, "step": 32130 }, { "epoch": 0.06492483344578352, - "grad_norm": 251.46981811523438, + "grad_norm": 67186.15625, "learning_rate": 6.428000000000001e-06, - "loss": 26.0443, + "loss": 161186.45, "step": 32140 }, { "epoch": 0.06494503407846734, - "grad_norm": 338.2344665527344, + "grad_norm": 124656.796875, "learning_rate": 6.43e-06, - "loss": 13.9824, + "loss": 208322.8625, "step": 32150 }, { "epoch": 0.06496523471115115, - "grad_norm": 489.648681640625, + "grad_norm": 140472.953125, "learning_rate": 6.432e-06, - "loss": 25.5297, + "loss": 93747.5188, "step": 32160 }, { "epoch": 0.06498543534383497, - "grad_norm": 405.80755615234375, + "grad_norm": 32687.779296875, "learning_rate": 6.4340000000000006e-06, - "loss": 35.4353, + "loss": 147763.275, "step": 32170 }, { "epoch": 0.06500563597651879, - "grad_norm": 225.9192352294922, + "grad_norm": 345439.65625, "learning_rate": 6.436e-06, - "loss": 25.5489, + "loss": 120168.175, "step": 32180 }, { "epoch": 0.0650258366092026, - "grad_norm": 356.1609191894531, + "grad_norm": 262743.75, "learning_rate": 6.438000000000001e-06, - "loss": 24.6689, + "loss": 186993.875, "step": 32190 }, { "epoch": 0.06504603724188641, - "grad_norm": 813.9902954101562, + "grad_norm": 513356.40625, "learning_rate": 6.440000000000001e-06, - "loss": 31.878, + "loss": 236059.3, "step": 32200 }, { "epoch": 0.06506623787457023, - "grad_norm": 503.1162414550781, + "grad_norm": 6995.44580078125, "learning_rate": 6.442e-06, - "loss": 26.8903, + "loss": 163448.4125, "step": 32210 }, { "epoch": 0.06508643850725404, - "grad_norm": 291.009521484375, + "grad_norm": 70261.609375, "learning_rate": 6.4440000000000005e-06, - "loss": 24.9688, + "loss": 103375.8625, "step": 32220 }, { "epoch": 0.06510663913993786, - "grad_norm": 366.4163818359375, + "grad_norm": 17035.130859375, "learning_rate": 6.446e-06, - "loss": 27.8312, + "loss": 128495.25, "step": 32230 }, { "epoch": 0.06512683977262168, - "grad_norm": 602.8341674804688, + "grad_norm": 1138923.5, "learning_rate": 6.448000000000001e-06, - "loss": 22.8418, + "loss": 179054.5, "step": 32240 }, { "epoch": 0.0651470404053055, - "grad_norm": 767.76953125, + "grad_norm": 934575.375, "learning_rate": 6.450000000000001e-06, - "loss": 36.1939, + "loss": 186045.8625, "step": 32250 }, { "epoch": 0.0651672410379893, "grad_norm": 0.0, "learning_rate": 6.452e-06, - "loss": 23.4851, + "loss": 85536.2063, "step": 32260 }, { "epoch": 0.06518744167067313, - "grad_norm": 255.6028594970703, + "grad_norm": 12400.3974609375, "learning_rate": 6.454e-06, - "loss": 33.1797, + "loss": 133003.075, "step": 32270 }, { "epoch": 0.06520764230335695, - "grad_norm": 236.62069702148438, + "grad_norm": 52640.22265625, "learning_rate": 6.456e-06, - "loss": 44.0498, + "loss": 349253.425, "step": 32280 }, { "epoch": 0.06522784293604075, - "grad_norm": 281.0309143066406, + "grad_norm": 133886.515625, "learning_rate": 6.458000000000001e-06, - "loss": 35.3185, + "loss": 300516.825, "step": 32290 }, { "epoch": 0.06524804356872457, - "grad_norm": 532.2522583007812, + "grad_norm": 105293.3984375, "learning_rate": 6.460000000000001e-06, - "loss": 24.893, + "loss": 140922.275, "step": 32300 }, { "epoch": 0.06526824420140839, - "grad_norm": 290.1950988769531, + "grad_norm": 5668.5224609375, "learning_rate": 6.462e-06, - "loss": 23.176, + "loss": 128825.5375, "step": 32310 }, { "epoch": 0.0652884448340922, - "grad_norm": 836.5287475585938, + "grad_norm": 477219.5625, "learning_rate": 6.464e-06, - "loss": 27.6402, + "loss": 145901.9, "step": 32320 }, { "epoch": 0.06530864546677602, - "grad_norm": 125.80975341796875, + "grad_norm": 15447.349609375, "learning_rate": 6.4660000000000004e-06, - "loss": 36.2398, + "loss": 291405.175, "step": 32330 }, { "epoch": 0.06532884609945984, - "grad_norm": 180.412353515625, + "grad_norm": 7952.58203125, "learning_rate": 6.468000000000001e-06, - "loss": 32.0698, + "loss": 144711.6375, "step": 32340 }, { "epoch": 0.06534904673214365, - "grad_norm": 772.2728271484375, + "grad_norm": 35463.92578125, "learning_rate": 6.470000000000001e-06, - "loss": 17.634, + "loss": 83661.9312, "step": 32350 }, { "epoch": 0.06536924736482747, - "grad_norm": 126.90397644042969, + "grad_norm": 8143.98779296875, "learning_rate": 6.472000000000001e-06, - "loss": 20.8186, + "loss": 105642.35, "step": 32360 }, { "epoch": 0.06538944799751129, - "grad_norm": 134.90330505371094, + "grad_norm": 75199.21875, "learning_rate": 6.474e-06, - "loss": 20.7609, + "loss": 73456.95, "step": 32370 }, { "epoch": 0.06540964863019509, - "grad_norm": 316.5060119628906, + "grad_norm": 187549.640625, "learning_rate": 6.476e-06, - "loss": 35.8064, + "loss": 180375.625, "step": 32380 }, { "epoch": 0.06542984926287891, - "grad_norm": 235.25607299804688, + "grad_norm": 36081.56640625, "learning_rate": 6.478000000000001e-06, - "loss": 26.0847, + "loss": 204636.7625, "step": 32390 }, { "epoch": 0.06545004989556273, - "grad_norm": 132.6982421875, + "grad_norm": 14921.0830078125, "learning_rate": 6.480000000000001e-06, - "loss": 18.1465, + "loss": 116979.4125, "step": 32400 }, { "epoch": 0.06547025052824655, - "grad_norm": 556.5181884765625, + "grad_norm": 572494.5625, "learning_rate": 6.482000000000001e-06, - "loss": 41.7568, + "loss": 371911.225, "step": 32410 }, { "epoch": 0.06549045116093036, - "grad_norm": 319.8631896972656, + "grad_norm": 32281.91015625, "learning_rate": 6.484e-06, - "loss": 34.7693, + "loss": 232742.45, "step": 32420 }, { "epoch": 0.06551065179361418, - "grad_norm": 32.90923309326172, + "grad_norm": 2436.9755859375, "learning_rate": 6.486e-06, - "loss": 31.509, + "loss": 147488.225, "step": 32430 }, { "epoch": 0.065530852426298, - "grad_norm": 227.41973876953125, + "grad_norm": 51475.90234375, "learning_rate": 6.488000000000001e-06, - "loss": 27.032, + "loss": 168872.7625, "step": 32440 }, { "epoch": 0.0655510530589818, - "grad_norm": 784.7890625, + "grad_norm": 375262.40625, "learning_rate": 6.4900000000000005e-06, - "loss": 42.1183, + "loss": 126055.1125, "step": 32450 }, { "epoch": 0.06557125369166562, - "grad_norm": 265.337890625, + "grad_norm": 123066.125, "learning_rate": 6.492000000000001e-06, - "loss": 33.2225, + "loss": 264988.525, "step": 32460 }, { "epoch": 0.06559145432434944, - "grad_norm": 508.2408142089844, + "grad_norm": 467928.53125, "learning_rate": 6.494e-06, - "loss": 20.6025, + "loss": 187753.6125, "step": 32470 }, { "epoch": 0.06561165495703325, - "grad_norm": 342.9792785644531, + "grad_norm": 368037.4375, "learning_rate": 6.496e-06, - "loss": 34.3402, + "loss": 230267.175, "step": 32480 }, { "epoch": 0.06563185558971707, - "grad_norm": 257.61163330078125, + "grad_norm": 18510.427734375, "learning_rate": 6.498000000000001e-06, - "loss": 32.8761, + "loss": 180586.075, "step": 32490 }, { "epoch": 0.06565205622240089, - "grad_norm": 117.67272186279297, + "grad_norm": 11656.0634765625, "learning_rate": 6.5000000000000004e-06, - "loss": 12.7806, + "loss": 28693.6875, "step": 32500 }, { "epoch": 0.0656722568550847, - "grad_norm": 225.24710083007812, + "grad_norm": 41442.56640625, "learning_rate": 6.502000000000001e-06, - "loss": 50.6284, + "loss": 306193.325, "step": 32510 }, { "epoch": 0.06569245748776852, - "grad_norm": 237.55392456054688, + "grad_norm": 57293.38671875, "learning_rate": 6.504e-06, - "loss": 24.6255, + "loss": 129525.2125, "step": 32520 }, { "epoch": 0.06571265812045234, - "grad_norm": 369.5683288574219, + "grad_norm": 32152.27734375, "learning_rate": 6.506e-06, - "loss": 30.3659, + "loss": 284310.95, "step": 32530 }, { "epoch": 0.06573285875313614, - "grad_norm": 262.12298583984375, + "grad_norm": 6668.31396484375, "learning_rate": 6.508000000000001e-06, - "loss": 39.7136, + "loss": 162849.1125, "step": 32540 }, { "epoch": 0.06575305938581996, - "grad_norm": 325.452392578125, + "grad_norm": 168256.34375, "learning_rate": 6.51e-06, - "loss": 51.158, + "loss": 172954.075, "step": 32550 }, { "epoch": 0.06577326001850378, "grad_norm": 0.0, "learning_rate": 6.5120000000000005e-06, - "loss": 44.0087, + "loss": 198181.2875, "step": 32560 }, { "epoch": 0.0657934606511876, - "grad_norm": 320.4726867675781, + "grad_norm": 195098.359375, "learning_rate": 6.514000000000001e-06, - "loss": 28.109, + "loss": 83205.0063, "step": 32570 }, { "epoch": 0.06581366128387141, - "grad_norm": 147.89549255371094, + "grad_norm": 4382.5927734375, "learning_rate": 6.516e-06, - "loss": 32.9733, + "loss": 209993.975, "step": 32580 }, { "epoch": 0.06583386191655523, - "grad_norm": 506.1364440917969, + "grad_norm": 658533.0, "learning_rate": 6.518000000000001e-06, - "loss": 47.3512, + "loss": 207779.925, "step": 32590 }, { "epoch": 0.06585406254923905, - "grad_norm": 381.97509765625, + "grad_norm": 519536.71875, "learning_rate": 6.520000000000001e-06, - "loss": 39.4338, + "loss": 249410.575, "step": 32600 }, { "epoch": 0.06587426318192285, - "grad_norm": 173.8516082763672, + "grad_norm": 90257.203125, "learning_rate": 6.522e-06, - "loss": 32.7929, + "loss": 220435.9, "step": 32610 }, { "epoch": 0.06589446381460667, - "grad_norm": 489.1348571777344, + "grad_norm": 731843.75, "learning_rate": 6.5240000000000006e-06, - "loss": 22.801, + "loss": 114093.85, "step": 32620 }, { "epoch": 0.0659146644472905, - "grad_norm": 789.7333984375, + "grad_norm": 1146631.875, "learning_rate": 6.526e-06, - "loss": 33.7686, + "loss": 186409.0, "step": 32630 }, { "epoch": 0.0659348650799743, - "grad_norm": 449.74951171875, + "grad_norm": 120771.2734375, "learning_rate": 6.528000000000001e-06, - "loss": 25.6868, + "loss": 162445.6875, "step": 32640 }, { "epoch": 0.06595506571265812, - "grad_norm": 330.71246337890625, + "grad_norm": 78689.515625, "learning_rate": 6.530000000000001e-06, - "loss": 43.2862, + "loss": 226900.2, "step": 32650 }, { "epoch": 0.06597526634534194, - "grad_norm": 230.9396209716797, + "grad_norm": 33400.15234375, "learning_rate": 6.532e-06, - "loss": 34.1926, + "loss": 275308.925, "step": 32660 }, { "epoch": 0.06599546697802575, - "grad_norm": 439.2969055175781, + "grad_norm": 419733.03125, "learning_rate": 6.5340000000000005e-06, - "loss": 25.3017, + "loss": 129918.025, "step": 32670 }, { "epoch": 0.06601566761070957, - "grad_norm": 490.4678955078125, + "grad_norm": 556441.9375, "learning_rate": 6.536e-06, - "loss": 39.2081, + "loss": 266734.375, "step": 32680 }, { "epoch": 0.06603586824339339, - "grad_norm": 196.3701934814453, + "grad_norm": 29288.3984375, "learning_rate": 6.538000000000001e-06, - "loss": 41.4055, + "loss": 159414.375, "step": 32690 }, { "epoch": 0.06605606887607719, - "grad_norm": 615.54736328125, + "grad_norm": 578456.5, "learning_rate": 6.540000000000001e-06, - "loss": 43.3943, + "loss": 353232.775, "step": 32700 }, { "epoch": 0.06607626950876101, - "grad_norm": 73.74063873291016, + "grad_norm": 5988.1787109375, "learning_rate": 6.542e-06, - "loss": 32.3963, + "loss": 124830.775, "step": 32710 }, { "epoch": 0.06609647014144483, - "grad_norm": 108.1066665649414, + "grad_norm": 31887.556640625, "learning_rate": 6.544e-06, - "loss": 21.0127, + "loss": 106081.225, "step": 32720 }, { "epoch": 0.06611667077412865, - "grad_norm": 289.84039306640625, + "grad_norm": 2460.67431640625, "learning_rate": 6.5460000000000005e-06, - "loss": 31.1427, + "loss": 192103.225, "step": 32730 }, { "epoch": 0.06613687140681246, - "grad_norm": 168.12002563476562, + "grad_norm": 22585.08203125, "learning_rate": 6.548000000000001e-06, - "loss": 19.6645, + "loss": 78044.9563, "step": 32740 }, { "epoch": 0.06615707203949628, - "grad_norm": 339.53936767578125, + "grad_norm": 108651.46875, "learning_rate": 6.550000000000001e-06, - "loss": 30.5839, + "loss": 236366.775, "step": 32750 }, { "epoch": 0.0661772726721801, - "grad_norm": 383.97412109375, + "grad_norm": 59519.515625, "learning_rate": 6.552000000000001e-06, - "loss": 32.9635, + "loss": 176643.975, "step": 32760 }, { "epoch": 0.0661974733048639, - "grad_norm": 218.21884155273438, + "grad_norm": 19688.30078125, "learning_rate": 6.554e-06, - "loss": 27.9132, + "loss": 158985.2125, "step": 32770 }, { "epoch": 0.06621767393754772, - "grad_norm": 58.37517547607422, + "grad_norm": 5495.06494140625, "learning_rate": 6.556e-06, - "loss": 30.3665, + "loss": 185521.15, "step": 32780 }, { "epoch": 0.06623787457023154, - "grad_norm": 399.6036682128906, + "grad_norm": 241780.28125, "learning_rate": 6.558000000000001e-06, - "loss": 17.2957, + "loss": 123162.825, "step": 32790 }, { "epoch": 0.06625807520291535, - "grad_norm": 385.6497497558594, + "grad_norm": 820630.75, "learning_rate": 6.560000000000001e-06, - "loss": 29.2142, + "loss": 368379.825, "step": 32800 }, { "epoch": 0.06627827583559917, - "grad_norm": 294.7921142578125, + "grad_norm": 127624.5, "learning_rate": 6.562000000000001e-06, - "loss": 27.5119, + "loss": 135545.475, "step": 32810 }, { "epoch": 0.06629847646828299, - "grad_norm": 809.83740234375, + "grad_norm": 1807553.5, "learning_rate": 6.564e-06, - "loss": 31.7384, + "loss": 468101.1, "step": 32820 }, { "epoch": 0.0663186771009668, - "grad_norm": 337.61181640625, + "grad_norm": 60767.62890625, "learning_rate": 6.566e-06, - "loss": 33.6985, + "loss": 274470.575, "step": 32830 }, { "epoch": 0.06633887773365062, - "grad_norm": 579.421142578125, + "grad_norm": 6145.9990234375, "learning_rate": 6.568000000000001e-06, - "loss": 19.8273, + "loss": 83527.8625, "step": 32840 }, { "epoch": 0.06635907836633444, - "grad_norm": 186.95729064941406, + "grad_norm": 11377.4365234375, "learning_rate": 6.570000000000001e-06, - "loss": 15.9819, + "loss": 90754.3562, "step": 32850 }, { "epoch": 0.06637927899901824, - "grad_norm": 558.0016479492188, + "grad_norm": 645046.5, "learning_rate": 6.572000000000001e-06, - "loss": 20.1292, + "loss": 109226.325, "step": 32860 }, { "epoch": 0.06639947963170206, - "grad_norm": 165.0709686279297, + "grad_norm": 43985.91796875, "learning_rate": 6.574e-06, - "loss": 28.7833, + "loss": 99703.8188, "step": 32870 }, { "epoch": 0.06641968026438588, - "grad_norm": 327.4534606933594, + "grad_norm": 26117.34375, "learning_rate": 6.576e-06, - "loss": 22.7556, + "loss": 104589.8188, "step": 32880 }, { "epoch": 0.0664398808970697, - "grad_norm": 159.4688262939453, + "grad_norm": 20009.787109375, "learning_rate": 6.578000000000001e-06, - "loss": 28.5168, + "loss": 113359.7125, "step": 32890 }, { "epoch": 0.06646008152975351, - "grad_norm": 89.40301513671875, + "grad_norm": 228512.0625, "learning_rate": 6.5800000000000005e-06, - "loss": 19.6373, + "loss": 194774.2625, "step": 32900 }, { "epoch": 0.06648028216243733, - "grad_norm": 228.87832641601562, + "grad_norm": 109986.6640625, "learning_rate": 6.582000000000001e-06, - "loss": 20.3056, + "loss": 117905.75, "step": 32910 }, { "epoch": 0.06650048279512115, - "grad_norm": 63.90678024291992, + "grad_norm": 3838.99560546875, "learning_rate": 6.584e-06, - "loss": 28.0314, + "loss": 174555.825, "step": 32920 }, { "epoch": 0.06652068342780496, - "grad_norm": 383.80682373046875, + "grad_norm": 116111.4609375, "learning_rate": 6.586e-06, - "loss": 26.2479, + "loss": 97301.675, "step": 32930 }, { "epoch": 0.06654088406048878, - "grad_norm": 657.4545288085938, + "grad_norm": 275016.96875, "learning_rate": 6.588000000000001e-06, - "loss": 31.423, + "loss": 154568.425, "step": 32940 }, { "epoch": 0.0665610846931726, - "grad_norm": 375.2959289550781, + "grad_norm": 169915.171875, "learning_rate": 6.5900000000000004e-06, - "loss": 18.3765, + "loss": 166305.3375, "step": 32950 }, { "epoch": 0.0665812853258564, - "grad_norm": 262.6024475097656, + "grad_norm": 56538.16015625, "learning_rate": 6.592000000000001e-06, - "loss": 27.5531, + "loss": 215320.125, "step": 32960 }, { "epoch": 0.06660148595854022, - "grad_norm": 251.27822875976562, + "grad_norm": 26897.916015625, "learning_rate": 6.594000000000001e-06, - "loss": 23.6339, + "loss": 133542.5375, "step": 32970 }, { "epoch": 0.06662168659122404, - "grad_norm": 354.53167724609375, + "grad_norm": 92919.46875, "learning_rate": 6.596e-06, - "loss": 21.4067, + "loss": 177847.4125, "step": 32980 }, { "epoch": 0.06664188722390785, - "grad_norm": 223.07188415527344, + "grad_norm": 34389.046875, "learning_rate": 6.598000000000001e-06, - "loss": 20.9345, + "loss": 92243.0875, "step": 32990 }, { "epoch": 0.06666208785659167, - "grad_norm": 335.9605712890625, + "grad_norm": 218876.359375, "learning_rate": 6.600000000000001e-06, - "loss": 40.661, + "loss": 276646.8, "step": 33000 }, { "epoch": 0.06668228848927549, - "grad_norm": 203.1583251953125, + "grad_norm": 9057.298828125, "learning_rate": 6.6020000000000005e-06, - "loss": 28.0269, + "loss": 224726.725, "step": 33010 }, { "epoch": 0.0667024891219593, - "grad_norm": 370.2474670410156, + "grad_norm": 18651.525390625, "learning_rate": 6.604000000000001e-06, - "loss": 23.4031, + "loss": 253853.475, "step": 33020 }, { "epoch": 0.06672268975464311, - "grad_norm": 215.4385986328125, + "grad_norm": 46782.0703125, "learning_rate": 6.606e-06, - "loss": 18.5859, + "loss": 72680.3313, "step": 33030 }, { "epoch": 0.06674289038732693, - "grad_norm": 80.94508361816406, + "grad_norm": 174903.890625, "learning_rate": 6.608000000000001e-06, - "loss": 34.3004, + "loss": 122506.9125, "step": 33040 }, { "epoch": 0.06676309102001075, - "grad_norm": 636.4049682617188, + "grad_norm": 452568.125, "learning_rate": 6.610000000000001e-06, - "loss": 37.3925, + "loss": 289156.075, "step": 33050 }, { "epoch": 0.06678329165269456, - "grad_norm": 375.9504699707031, + "grad_norm": 44343.83984375, "learning_rate": 6.612e-06, - "loss": 15.6265, + "loss": 61252.7875, "step": 33060 }, { "epoch": 0.06680349228537838, - "grad_norm": 197.15830993652344, + "grad_norm": 32266.013671875, "learning_rate": 6.6140000000000005e-06, - "loss": 18.0247, + "loss": 172499.55, "step": 33070 }, { "epoch": 0.0668236929180622, - "grad_norm": 266.53692626953125, + "grad_norm": 506483.375, "learning_rate": 6.616e-06, - "loss": 22.4474, + "loss": 110383.8, "step": 33080 }, { "epoch": 0.066843893550746, - "grad_norm": 581.1676025390625, + "grad_norm": 277725.0, "learning_rate": 6.618000000000001e-06, - "loss": 36.025, + "loss": 195896.425, "step": 33090 }, { "epoch": 0.06686409418342983, - "grad_norm": 386.5500793457031, + "grad_norm": 140096.0, "learning_rate": 6.620000000000001e-06, - "loss": 19.8663, + "loss": 76145.6875, "step": 33100 }, { "epoch": 0.06688429481611365, - "grad_norm": 96.17058563232422, + "grad_norm": 21415.4296875, "learning_rate": 6.622e-06, - "loss": 33.4399, + "loss": 117340.4125, "step": 33110 }, { "epoch": 0.06690449544879745, - "grad_norm": 459.32244873046875, + "grad_norm": 301033.8125, "learning_rate": 6.6240000000000004e-06, - "loss": 20.7274, + "loss": 138176.6875, "step": 33120 }, { "epoch": 0.06692469608148127, - "grad_norm": 77.63929748535156, + "grad_norm": 9255.666015625, "learning_rate": 6.626000000000001e-06, - "loss": 31.7773, + "loss": 195450.5375, "step": 33130 }, { "epoch": 0.06694489671416509, - "grad_norm": 224.4654998779297, + "grad_norm": 6731.8125, "learning_rate": 6.628e-06, - "loss": 24.23, + "loss": 178181.15, "step": 33140 }, { "epoch": 0.0669650973468489, - "grad_norm": 627.4200439453125, + "grad_norm": 254578.625, "learning_rate": 6.630000000000001e-06, - "loss": 39.5084, + "loss": 151310.2625, "step": 33150 }, { "epoch": 0.06698529797953272, - "grad_norm": 551.1626586914062, + "grad_norm": 342724.21875, "learning_rate": 6.632000000000001e-06, - "loss": 22.4483, + "loss": 81360.125, "step": 33160 }, { "epoch": 0.06700549861221654, - "grad_norm": 250.13424682617188, + "grad_norm": 156688.6875, "learning_rate": 6.634e-06, - "loss": 29.2267, + "loss": 164082.825, "step": 33170 }, { "epoch": 0.06702569924490034, - "grad_norm": 604.459716796875, + "grad_norm": 739407.25, "learning_rate": 6.6360000000000005e-06, - "loss": 32.5056, + "loss": 179058.3, "step": 33180 }, { "epoch": 0.06704589987758416, - "grad_norm": 236.87158203125, + "grad_norm": 5318.35400390625, "learning_rate": 6.638e-06, - "loss": 47.2783, + "loss": 174383.0375, "step": 33190 }, { "epoch": 0.06706610051026798, - "grad_norm": 313.0286865234375, + "grad_norm": 118763.7890625, "learning_rate": 6.640000000000001e-06, - "loss": 14.8039, + "loss": 136048.125, "step": 33200 }, { "epoch": 0.0670863011429518, - "grad_norm": 989.0570678710938, + "grad_norm": 489774.34375, "learning_rate": 6.642000000000001e-06, - "loss": 34.107, + "loss": 145079.3, "step": 33210 }, { "epoch": 0.06710650177563561, - "grad_norm": 192.0631561279297, + "grad_norm": 5931.62060546875, "learning_rate": 6.644e-06, - "loss": 27.1685, + "loss": 130601.8625, "step": 33220 }, { "epoch": 0.06712670240831943, - "grad_norm": 255.91061401367188, + "grad_norm": 64596.359375, "learning_rate": 6.646e-06, - "loss": 22.7502, + "loss": 132838.4, "step": 33230 }, { "epoch": 0.06714690304100325, - "grad_norm": 518.427734375, + "grad_norm": 25411.76171875, "learning_rate": 6.648e-06, - "loss": 34.9704, + "loss": 235883.275, "step": 33240 }, { "epoch": 0.06716710367368706, - "grad_norm": 560.7759399414062, + "grad_norm": 154928.703125, "learning_rate": 6.650000000000001e-06, - "loss": 39.8107, + "loss": 172806.675, "step": 33250 }, { "epoch": 0.06718730430637088, - "grad_norm": 434.3753662109375, + "grad_norm": 178819.203125, "learning_rate": 6.652000000000001e-06, - "loss": 33.0775, + "loss": 371718.275, "step": 33260 }, { "epoch": 0.0672075049390547, - "grad_norm": 322.3213195800781, + "grad_norm": 151886.109375, "learning_rate": 6.654e-06, - "loss": 30.8232, + "loss": 223629.6, "step": 33270 }, { "epoch": 0.0672277055717385, - "grad_norm": 388.7810363769531, + "grad_norm": 14699.8916015625, "learning_rate": 6.656e-06, - "loss": 50.0331, + "loss": 189255.1625, "step": 33280 }, { "epoch": 0.06724790620442232, - "grad_norm": 631.3594360351562, + "grad_norm": 695814.0, "learning_rate": 6.658e-06, - "loss": 30.9435, + "loss": 267456.8, "step": 33290 }, { "epoch": 0.06726810683710614, - "grad_norm": 185.04061889648438, + "grad_norm": 24350.685546875, "learning_rate": 6.660000000000001e-06, - "loss": 21.1964, + "loss": 129780.95, "step": 33300 }, { "epoch": 0.06728830746978995, - "grad_norm": 298.79351806640625, + "grad_norm": 60954.0625, "learning_rate": 6.662000000000001e-06, - "loss": 15.2041, + "loss": 65508.375, "step": 33310 }, { "epoch": 0.06730850810247377, - "grad_norm": 249.6124267578125, + "grad_norm": 96797.8125, "learning_rate": 6.664e-06, - "loss": 31.553, + "loss": 290029.425, "step": 33320 }, { "epoch": 0.06732870873515759, - "grad_norm": 262.3807067871094, + "grad_norm": 16619.962890625, "learning_rate": 6.666e-06, - "loss": 27.6135, + "loss": 186668.4875, "step": 33330 }, { "epoch": 0.0673489093678414, - "grad_norm": 130.87686157226562, + "grad_norm": 22212.90234375, "learning_rate": 6.668e-06, - "loss": 30.4478, + "loss": 288273.3, "step": 33340 }, { "epoch": 0.06736911000052521, - "grad_norm": 285.0330810546875, + "grad_norm": 93141.328125, "learning_rate": 6.6700000000000005e-06, - "loss": 21.2741, + "loss": 107586.1875, "step": 33350 }, { "epoch": 0.06738931063320903, - "grad_norm": 237.6591796875, + "grad_norm": 57603.77734375, "learning_rate": 6.672000000000001e-06, - "loss": 27.5899, + "loss": 171636.2, "step": 33360 }, { "epoch": 0.06740951126589285, - "grad_norm": 379.7192077636719, + "grad_norm": 414289.84375, "learning_rate": 6.674000000000001e-06, - "loss": 21.7786, + "loss": 140747.125, "step": 33370 }, { "epoch": 0.06742971189857666, - "grad_norm": 126.19786071777344, + "grad_norm": 49647.00390625, "learning_rate": 6.676e-06, - "loss": 36.2694, + "loss": 228675.575, "step": 33380 }, { "epoch": 0.06744991253126048, - "grad_norm": 159.53851318359375, + "grad_norm": 13102.4736328125, "learning_rate": 6.678e-06, - "loss": 26.82, + "loss": 244959.875, "step": 33390 }, { "epoch": 0.0674701131639443, - "grad_norm": 114.0098876953125, + "grad_norm": 431528.90625, "learning_rate": 6.680000000000001e-06, - "loss": 28.3109, + "loss": 217596.675, "step": 33400 }, { "epoch": 0.0674903137966281, - "grad_norm": 60.16526794433594, + "grad_norm": 66820.734375, "learning_rate": 6.6820000000000006e-06, - "loss": 27.0503, + "loss": 171097.6875, "step": 33410 }, { "epoch": 0.06751051442931193, - "grad_norm": 110.24915313720703, + "grad_norm": 13947.2080078125, "learning_rate": 6.684000000000001e-06, - "loss": 24.2053, + "loss": 203747.2375, "step": 33420 }, { "epoch": 0.06753071506199575, - "grad_norm": 329.45391845703125, + "grad_norm": 8890.4345703125, "learning_rate": 6.686e-06, - "loss": 31.3305, + "loss": 119040.1875, "step": 33430 }, { "epoch": 0.06755091569467955, - "grad_norm": 333.7715148925781, + "grad_norm": 82773.8359375, "learning_rate": 6.688e-06, - "loss": 23.5648, + "loss": 181205.6375, "step": 33440 }, { "epoch": 0.06757111632736337, - "grad_norm": 99.59442138671875, + "grad_norm": 25600.685546875, "learning_rate": 6.690000000000001e-06, - "loss": 34.6192, + "loss": 368786.425, "step": 33450 }, { "epoch": 0.06759131696004719, - "grad_norm": 430.8699035644531, + "grad_norm": 392453.03125, "learning_rate": 6.6920000000000005e-06, - "loss": 26.0351, + "loss": 108874.8375, "step": 33460 }, { "epoch": 0.067611517592731, - "grad_norm": 340.1473693847656, + "grad_norm": 248982.03125, "learning_rate": 6.694000000000001e-06, - "loss": 26.3428, + "loss": 140110.225, "step": 33470 }, { "epoch": 0.06763171822541482, - "grad_norm": 135.01898193359375, + "grad_norm": 99969.671875, "learning_rate": 6.696e-06, - "loss": 24.8854, + "loss": 157197.7, "step": 33480 }, { "epoch": 0.06765191885809864, - "grad_norm": 373.7020568847656, + "grad_norm": 446514.125, "learning_rate": 6.698e-06, - "loss": 29.3576, + "loss": 147830.7, "step": 33490 }, { "epoch": 0.06767211949078245, - "grad_norm": 249.206787109375, + "grad_norm": 36650.87890625, "learning_rate": 6.700000000000001e-06, - "loss": 22.7511, + "loss": 169824.2375, "step": 33500 }, { "epoch": 0.06769232012346627, - "grad_norm": 780.90576171875, + "grad_norm": 1035436.75, "learning_rate": 6.702e-06, - "loss": 29.4561, + "loss": 247743.925, "step": 33510 }, { "epoch": 0.06771252075615009, - "grad_norm": 396.869384765625, + "grad_norm": 250291.9375, "learning_rate": 6.7040000000000005e-06, - "loss": 18.4295, + "loss": 90600.2437, "step": 33520 }, { "epoch": 0.06773272138883389, - "grad_norm": 549.2235717773438, + "grad_norm": 736187.625, "learning_rate": 6.706000000000001e-06, - "loss": 29.0248, + "loss": 270366.675, "step": 33530 }, { "epoch": 0.06775292202151771, - "grad_norm": 343.7025146484375, + "grad_norm": 245419.78125, "learning_rate": 6.708e-06, - "loss": 24.1803, + "loss": 190903.8625, "step": 33540 }, { "epoch": 0.06777312265420153, - "grad_norm": 306.8595886230469, + "grad_norm": 69948.9609375, "learning_rate": 6.710000000000001e-06, - "loss": 22.515, + "loss": 80254.8062, "step": 33550 }, { "epoch": 0.06779332328688535, - "grad_norm": 243.19854736328125, + "grad_norm": 19790.171875, "learning_rate": 6.712000000000001e-06, - "loss": 40.1751, + "loss": 280468.05, "step": 33560 }, { "epoch": 0.06781352391956916, - "grad_norm": 265.84185791015625, + "grad_norm": 202504.8125, "learning_rate": 6.7140000000000004e-06, - "loss": 24.2147, + "loss": 106379.325, "step": 33570 }, { "epoch": 0.06783372455225298, - "grad_norm": 432.4148254394531, + "grad_norm": 95737.15625, "learning_rate": 6.716000000000001e-06, - "loss": 29.5186, + "loss": 103662.1938, "step": 33580 }, { "epoch": 0.0678539251849368, - "grad_norm": 140.1952362060547, + "grad_norm": 3740.70556640625, "learning_rate": 6.718e-06, - "loss": 31.6897, + "loss": 135328.1875, "step": 33590 }, { "epoch": 0.0678741258176206, - "grad_norm": 488.43701171875, + "grad_norm": 741448.125, "learning_rate": 6.720000000000001e-06, - "loss": 29.2323, + "loss": 148993.65, "step": 33600 }, { "epoch": 0.06789432645030442, - "grad_norm": 271.6667175292969, + "grad_norm": 26088.189453125, "learning_rate": 6.722000000000001e-06, - "loss": 63.0262, + "loss": 251530.4, "step": 33610 }, { "epoch": 0.06791452708298824, - "grad_norm": 726.9092407226562, + "grad_norm": 1016539.8125, "learning_rate": 6.724e-06, - "loss": 43.2171, + "loss": 331533.575, "step": 33620 }, { "epoch": 0.06793472771567205, - "grad_norm": 896.5654907226562, + "grad_norm": 1335833.125, "learning_rate": 6.7260000000000005e-06, - "loss": 33.0095, + "loss": 293732.9, "step": 33630 }, { "epoch": 0.06795492834835587, - "grad_norm": 138.67282104492188, + "grad_norm": 36837.63671875, "learning_rate": 6.728e-06, - "loss": 23.7521, + "loss": 127325.075, "step": 33640 }, { "epoch": 0.06797512898103969, - "grad_norm": 308.3063659667969, + "grad_norm": 135079.03125, "learning_rate": 6.730000000000001e-06, - "loss": 26.7069, + "loss": 115356.8625, "step": 33650 }, { "epoch": 0.0679953296137235, - "grad_norm": 545.8862915039062, + "grad_norm": 481398.8125, "learning_rate": 6.732000000000001e-06, - "loss": 26.4732, + "loss": 226735.975, "step": 33660 }, { "epoch": 0.06801553024640732, - "grad_norm": 162.01895141601562, + "grad_norm": 47591.54296875, "learning_rate": 6.734e-06, - "loss": 20.6606, + "loss": 173316.8, "step": 33670 }, { "epoch": 0.06803573087909114, - "grad_norm": 446.10760498046875, + "grad_norm": 136330.65625, "learning_rate": 6.736e-06, - "loss": 35.1819, + "loss": 186567.0875, "step": 33680 }, { "epoch": 0.06805593151177494, - "grad_norm": 197.63351440429688, + "grad_norm": 20114.26171875, "learning_rate": 6.738e-06, - "loss": 32.4311, + "loss": 236927.875, "step": 33690 }, { "epoch": 0.06807613214445876, - "grad_norm": 669.9923095703125, + "grad_norm": 810718.8125, "learning_rate": 6.740000000000001e-06, - "loss": 34.6298, + "loss": 216101.75, "step": 33700 }, { "epoch": 0.06809633277714258, - "grad_norm": 251.99420166015625, + "grad_norm": 139429.140625, "learning_rate": 6.742000000000001e-06, - "loss": 20.4015, + "loss": 94219.3562, "step": 33710 }, { "epoch": 0.0681165334098264, - "grad_norm": 1387.9864501953125, + "grad_norm": 2171998.25, "learning_rate": 6.744e-06, - "loss": 43.5895, + "loss": 339112.55, "step": 33720 }, { "epoch": 0.06813673404251021, - "grad_norm": 466.648681640625, + "grad_norm": 179379.171875, "learning_rate": 6.746e-06, - "loss": 50.2172, + "loss": 216186.45, "step": 33730 }, { "epoch": 0.06815693467519403, - "grad_norm": 296.6073913574219, + "grad_norm": 41162.390625, "learning_rate": 6.7480000000000004e-06, - "loss": 31.7947, + "loss": 178355.375, "step": 33740 }, { "epoch": 0.06817713530787785, - "grad_norm": 183.97279357910156, + "grad_norm": 71273.109375, "learning_rate": 6.750000000000001e-06, - "loss": 22.4225, + "loss": 77220.375, "step": 33750 }, { "epoch": 0.06819733594056165, - "grad_norm": 541.4666748046875, + "grad_norm": 1101036.0, "learning_rate": 6.752000000000001e-06, - "loss": 33.7121, + "loss": 317588.45, "step": 33760 }, { "epoch": 0.06821753657324547, - "grad_norm": 422.0052185058594, + "grad_norm": 55918.0, "learning_rate": 6.754000000000001e-06, - "loss": 22.4945, + "loss": 115410.7, "step": 33770 }, { "epoch": 0.0682377372059293, - "grad_norm": 290.2746276855469, + "grad_norm": 105697.9296875, "learning_rate": 6.756e-06, - "loss": 24.8973, + "loss": 239006.95, "step": 33780 }, { "epoch": 0.0682579378386131, - "grad_norm": 307.8942565917969, + "grad_norm": 13366.73828125, "learning_rate": 6.758e-06, - "loss": 28.1959, + "loss": 126829.825, "step": 33790 }, { "epoch": 0.06827813847129692, - "grad_norm": 43.59174346923828, + "grad_norm": 86043.390625, "learning_rate": 6.760000000000001e-06, - "loss": 34.4528, + "loss": 288918.45, "step": 33800 }, { "epoch": 0.06829833910398074, - "grad_norm": 489.6438293457031, + "grad_norm": 168410.59375, "learning_rate": 6.762000000000001e-06, - "loss": 30.3156, + "loss": 86893.8875, "step": 33810 }, { "epoch": 0.06831853973666455, - "grad_norm": 204.8790740966797, + "grad_norm": 32578.181640625, "learning_rate": 6.764000000000001e-06, - "loss": 25.7045, + "loss": 104417.7125, "step": 33820 }, { "epoch": 0.06833874036934837, - "grad_norm": 406.2287902832031, + "grad_norm": 48563.4296875, "learning_rate": 6.766e-06, - "loss": 31.5095, + "loss": 315958.2, "step": 33830 }, { "epoch": 0.06835894100203219, - "grad_norm": 279.4950866699219, + "grad_norm": 138320.15625, "learning_rate": 6.768e-06, - "loss": 29.7517, + "loss": 488990.4, "step": 33840 }, { "epoch": 0.06837914163471599, - "grad_norm": 928.823486328125, + "grad_norm": 288953.59375, "learning_rate": 6.770000000000001e-06, - "loss": 41.224, + "loss": 240147.95, "step": 33850 }, { "epoch": 0.06839934226739981, - "grad_norm": 317.24188232421875, + "grad_norm": 37967.60546875, "learning_rate": 6.7720000000000006e-06, - "loss": 19.2596, + "loss": 121385.8, "step": 33860 }, { "epoch": 0.06841954290008363, - "grad_norm": 297.966552734375, + "grad_norm": 51038.65625, "learning_rate": 6.774000000000001e-06, - "loss": 38.4275, + "loss": 170466.25, "step": 33870 }, { "epoch": 0.06843974353276745, - "grad_norm": 214.70057678222656, + "grad_norm": 18836.14453125, "learning_rate": 6.776e-06, - "loss": 33.0122, + "loss": 230285.375, "step": 33880 }, { "epoch": 0.06845994416545126, - "grad_norm": 333.35205078125, + "grad_norm": 6419.3056640625, "learning_rate": 6.778e-06, - "loss": 22.0829, + "loss": 77552.7812, "step": 33890 }, { "epoch": 0.06848014479813508, - "grad_norm": 229.147216796875, + "grad_norm": 148835.765625, "learning_rate": 6.780000000000001e-06, - "loss": 22.9107, + "loss": 160508.375, "step": 33900 }, { "epoch": 0.0685003454308189, - "grad_norm": 262.1947326660156, + "grad_norm": 82415.7890625, "learning_rate": 6.7820000000000005e-06, - "loss": 48.3566, + "loss": 417398.3, "step": 33910 }, { "epoch": 0.0685205460635027, - "grad_norm": 332.7762451171875, + "grad_norm": 131013.4921875, "learning_rate": 6.784000000000001e-06, - "loss": 27.2738, + "loss": 172359.75, "step": 33920 }, { "epoch": 0.06854074669618652, - "grad_norm": 184.263427734375, + "grad_norm": 5190.44140625, "learning_rate": 6.786000000000001e-06, - "loss": 31.9823, + "loss": 200839.9875, "step": 33930 }, { "epoch": 0.06856094732887034, - "grad_norm": 719.2918090820312, + "grad_norm": 401092.9375, "learning_rate": 6.788e-06, - "loss": 38.1228, + "loss": 227711.075, "step": 33940 }, { "epoch": 0.06858114796155415, - "grad_norm": 319.15362548828125, + "grad_norm": 130424.8515625, "learning_rate": 6.790000000000001e-06, - "loss": 25.3327, + "loss": 116358.9, "step": 33950 }, { "epoch": 0.06860134859423797, - "grad_norm": 182.20993041992188, + "grad_norm": 3153.482421875, "learning_rate": 6.792000000000001e-06, - "loss": 34.7985, + "loss": 533933.05, "step": 33960 }, { "epoch": 0.06862154922692179, - "grad_norm": 426.1497802734375, + "grad_norm": 201818.609375, "learning_rate": 6.7940000000000005e-06, - "loss": 35.3077, + "loss": 223894.025, "step": 33970 }, { "epoch": 0.0686417498596056, - "grad_norm": 167.3736114501953, + "grad_norm": 30921.060546875, "learning_rate": 6.796000000000001e-06, - "loss": 41.2279, + "loss": 248354.45, "step": 33980 }, { "epoch": 0.06866195049228942, - "grad_norm": 0.0, + "grad_norm": 1890397.875, "learning_rate": 6.798e-06, - "loss": 69.9124, + "loss": 300891.5, "step": 33990 }, { "epoch": 0.06868215112497324, - "grad_norm": 205.83233642578125, + "grad_norm": 39389.2109375, "learning_rate": 6.800000000000001e-06, - "loss": 27.4275, + "loss": 203816.175, "step": 34000 }, { "epoch": 0.06870235175765704, - "grad_norm": 229.70562744140625, + "grad_norm": 124940.9921875, "learning_rate": 6.802000000000001e-06, - "loss": 27.7193, + "loss": 73752.7688, "step": 34010 }, { "epoch": 0.06872255239034086, - "grad_norm": 277.613037109375, + "grad_norm": 73864.9140625, "learning_rate": 6.804e-06, - "loss": 33.3919, + "loss": 149320.4125, "step": 34020 }, { "epoch": 0.06874275302302468, - "grad_norm": 455.9620056152344, + "grad_norm": 8464.3330078125, "learning_rate": 6.8060000000000006e-06, - "loss": 31.3386, + "loss": 89930.4375, "step": 34030 }, { "epoch": 0.0687629536557085, - "grad_norm": 76.26141357421875, + "grad_norm": 98493.84375, "learning_rate": 6.808e-06, - "loss": 22.4672, + "loss": 183471.3, "step": 34040 }, { "epoch": 0.06878315428839231, - "grad_norm": 1023.4114379882812, + "grad_norm": 215467.5625, "learning_rate": 6.810000000000001e-06, - "loss": 28.7594, + "loss": 139962.65, "step": 34050 }, { "epoch": 0.06880335492107613, - "grad_norm": 261.6186828613281, + "grad_norm": 3155.677978515625, "learning_rate": 6.812000000000001e-06, - "loss": 37.1378, + "loss": 305563.875, "step": 34060 }, { "epoch": 0.06882355555375995, - "grad_norm": 296.997314453125, + "grad_norm": 6158.5478515625, "learning_rate": 6.814e-06, - "loss": 22.5021, + "loss": 303060.1, "step": 34070 }, { "epoch": 0.06884375618644376, - "grad_norm": 603.8565673828125, + "grad_norm": 753670.0625, "learning_rate": 6.8160000000000005e-06, - "loss": 34.0662, + "loss": 314137.2, "step": 34080 }, { "epoch": 0.06886395681912758, - "grad_norm": 265.9648132324219, + "grad_norm": 470467.875, "learning_rate": 6.818e-06, - "loss": 24.5765, + "loss": 130133.4375, "step": 34090 }, { "epoch": 0.0688841574518114, - "grad_norm": 60.6540641784668, + "grad_norm": 109620.8984375, "learning_rate": 6.820000000000001e-06, - "loss": 43.553, + "loss": 274826.25, "step": 34100 }, { "epoch": 0.0689043580844952, - "grad_norm": 162.8219451904297, + "grad_norm": 26176.486328125, "learning_rate": 6.822000000000001e-06, - "loss": 26.4204, + "loss": 185784.5625, "step": 34110 }, { "epoch": 0.06892455871717902, - "grad_norm": 327.31219482421875, + "grad_norm": 376950.71875, "learning_rate": 6.824e-06, - "loss": 24.6038, + "loss": 161719.9625, "step": 34120 }, { "epoch": 0.06894475934986284, - "grad_norm": 298.9954528808594, + "grad_norm": 33262.70703125, "learning_rate": 6.826e-06, - "loss": 15.5414, + "loss": 86024.5688, "step": 34130 }, { "epoch": 0.06896495998254665, - "grad_norm": 378.151123046875, + "grad_norm": 296915.5, "learning_rate": 6.8280000000000005e-06, - "loss": 35.4214, + "loss": 183889.45, "step": 34140 }, { "epoch": 0.06898516061523047, - "grad_norm": 407.6482849121094, + "grad_norm": 214384.578125, "learning_rate": 6.830000000000001e-06, - "loss": 16.3223, + "loss": 82782.5312, "step": 34150 }, { "epoch": 0.06900536124791429, - "grad_norm": 313.89410400390625, + "grad_norm": 139849.0, "learning_rate": 6.832000000000001e-06, - "loss": 19.986, + "loss": 185893.9, "step": 34160 }, { "epoch": 0.0690255618805981, - "grad_norm": 450.92047119140625, + "grad_norm": 305590.96875, "learning_rate": 6.834000000000001e-06, - "loss": 23.1717, + "loss": 165258.1625, "step": 34170 }, { "epoch": 0.06904576251328191, - "grad_norm": 367.4051208496094, + "grad_norm": 18947.52734375, "learning_rate": 6.836e-06, - "loss": 18.2367, + "loss": 31661.1437, "step": 34180 }, { "epoch": 0.06906596314596573, - "grad_norm": 200.5437469482422, + "grad_norm": 54536.4296875, "learning_rate": 6.8380000000000004e-06, - "loss": 27.8734, + "loss": 161688.9375, "step": 34190 }, { "epoch": 0.06908616377864955, - "grad_norm": 167.73825073242188, + "grad_norm": 332783.84375, "learning_rate": 6.8400000000000014e-06, - "loss": 37.5091, + "loss": 189571.85, "step": 34200 }, { "epoch": 0.06910636441133336, - "grad_norm": 175.97230529785156, + "grad_norm": 4430.40771484375, "learning_rate": 6.842000000000001e-06, - "loss": 37.9024, + "loss": 140239.5375, "step": 34210 }, { "epoch": 0.06912656504401718, - "grad_norm": 276.8113708496094, + "grad_norm": 53314.01171875, "learning_rate": 6.844000000000001e-06, - "loss": 30.3383, + "loss": 256663.8, "step": 34220 }, { "epoch": 0.069146765676701, - "grad_norm": 413.5763854980469, + "grad_norm": 374757.5625, "learning_rate": 6.846e-06, - "loss": 22.2407, + "loss": 217984.475, "step": 34230 }, { "epoch": 0.0691669663093848, - "grad_norm": 54.51414108276367, + "grad_norm": 2158.666748046875, "learning_rate": 6.848e-06, - "loss": 20.599, + "loss": 192054.625, "step": 34240 }, { "epoch": 0.06918716694206863, - "grad_norm": 547.967041015625, + "grad_norm": 283575.46875, "learning_rate": 6.850000000000001e-06, - "loss": 30.5517, + "loss": 226378.425, "step": 34250 }, { "epoch": 0.06920736757475245, - "grad_norm": 153.64317321777344, + "grad_norm": 48087.62890625, "learning_rate": 6.852000000000001e-06, - "loss": 29.968, + "loss": 156373.1875, "step": 34260 }, { "epoch": 0.06922756820743625, - "grad_norm": 510.6077575683594, + "grad_norm": 170161.96875, "learning_rate": 6.854000000000001e-06, - "loss": 30.9965, + "loss": 171165.2375, "step": 34270 }, { "epoch": 0.06924776884012007, - "grad_norm": 513.1456298828125, + "grad_norm": 235537.4375, "learning_rate": 6.856e-06, - "loss": 28.7472, + "loss": 204764.1125, "step": 34280 }, { "epoch": 0.06926796947280389, - "grad_norm": 260.1783447265625, + "grad_norm": 59506.94140625, "learning_rate": 6.858e-06, - "loss": 35.7646, + "loss": 129674.2375, "step": 34290 }, { "epoch": 0.0692881701054877, - "grad_norm": 1510.2899169921875, + "grad_norm": 2219941.75, "learning_rate": 6.860000000000001e-06, - "loss": 25.1878, + "loss": 325731.25, "step": 34300 }, { "epoch": 0.06930837073817152, - "grad_norm": 482.1159362792969, + "grad_norm": 41011.171875, "learning_rate": 6.8620000000000005e-06, - "loss": 26.38, + "loss": 128239.375, "step": 34310 }, { "epoch": 0.06932857137085534, - "grad_norm": 530.0428466796875, + "grad_norm": 624148.875, "learning_rate": 6.864000000000001e-06, - "loss": 25.9121, + "loss": 156246.4875, "step": 34320 }, { "epoch": 0.06934877200353914, - "grad_norm": 584.091064453125, + "grad_norm": 426805.5, "learning_rate": 6.866000000000001e-06, - "loss": 49.2923, + "loss": 243619.075, "step": 34330 }, { "epoch": 0.06936897263622296, - "grad_norm": 301.7872009277344, + "grad_norm": 6245.896484375, "learning_rate": 6.868e-06, - "loss": 17.6216, + "loss": 58261.0437, "step": 34340 }, { "epoch": 0.06938917326890678, - "grad_norm": 342.6329040527344, + "grad_norm": 160398.5625, "learning_rate": 6.870000000000001e-06, - "loss": 21.9526, + "loss": 159762.175, "step": 34350 }, { "epoch": 0.0694093739015906, - "grad_norm": 1223.290283203125, + "grad_norm": 1747062.125, "learning_rate": 6.872000000000001e-06, - "loss": 43.0305, + "loss": 382229.9, "step": 34360 }, { "epoch": 0.06942957453427441, - "grad_norm": 513.8219604492188, + "grad_norm": 456534.28125, "learning_rate": 6.874000000000001e-06, - "loss": 39.9957, + "loss": 334579.375, "step": 34370 }, { "epoch": 0.06944977516695823, - "grad_norm": 573.5376586914062, + "grad_norm": 85144.1328125, "learning_rate": 6.876000000000001e-06, - "loss": 35.5843, + "loss": 192968.1875, "step": 34380 }, { "epoch": 0.06946997579964205, - "grad_norm": 321.61480712890625, + "grad_norm": 225830.96875, "learning_rate": 6.878e-06, - "loss": 33.41, + "loss": 223017.375, "step": 34390 }, { "epoch": 0.06949017643232586, - "grad_norm": 319.8245544433594, + "grad_norm": 72508.7890625, "learning_rate": 6.88e-06, - "loss": 16.906, + "loss": 82190.125, "step": 34400 }, { "epoch": 0.06951037706500968, - "grad_norm": 409.17474365234375, + "grad_norm": 95663.7734375, "learning_rate": 6.882000000000001e-06, - "loss": 31.1368, + "loss": 228802.275, "step": 34410 }, { "epoch": 0.0695305776976935, - "grad_norm": 329.41058349609375, + "grad_norm": 103070.6953125, "learning_rate": 6.8840000000000005e-06, - "loss": 18.909, + "loss": 125900.275, "step": 34420 }, { "epoch": 0.0695507783303773, - "grad_norm": 87.27323913574219, + "grad_norm": 86409.359375, "learning_rate": 6.886000000000001e-06, - "loss": 26.7308, + "loss": 150314.85, "step": 34430 }, { "epoch": 0.06957097896306112, - "grad_norm": 313.5718994140625, + "grad_norm": 17514.947265625, "learning_rate": 6.888e-06, - "loss": 42.2084, + "loss": 307237.175, "step": 34440 }, { "epoch": 0.06959117959574494, - "grad_norm": 469.9633483886719, + "grad_norm": 360485.3125, "learning_rate": 6.89e-06, - "loss": 35.2144, + "loss": 134461.925, "step": 34450 }, { "epoch": 0.06961138022842875, - "grad_norm": 277.5743713378906, + "grad_norm": 269148.65625, "learning_rate": 6.892000000000001e-06, - "loss": 26.8681, + "loss": 151918.9, "step": 34460 }, { "epoch": 0.06963158086111257, - "grad_norm": 663.92041015625, + "grad_norm": 1007528.0625, "learning_rate": 6.894e-06, - "loss": 28.4116, + "loss": 180524.4875, "step": 34470 }, { "epoch": 0.06965178149379639, - "grad_norm": 149.6378173828125, + "grad_norm": 33964.1171875, "learning_rate": 6.8960000000000006e-06, - "loss": 15.5917, + "loss": 50540.175, "step": 34480 }, { "epoch": 0.0696719821264802, - "grad_norm": 220.55062866210938, + "grad_norm": 55261.67578125, "learning_rate": 6.898e-06, - "loss": 37.5737, + "loss": 215642.625, "step": 34490 }, { "epoch": 0.06969218275916401, - "grad_norm": 89.65522766113281, + "grad_norm": 2991.419677734375, "learning_rate": 6.9e-06, - "loss": 21.6902, + "loss": 84422.9375, "step": 34500 }, { "epoch": 0.06971238339184783, - "grad_norm": 479.03692626953125, + "grad_norm": 154387.375, "learning_rate": 6.902000000000001e-06, - "loss": 15.2559, + "loss": 69173.575, "step": 34510 }, { "epoch": 0.06973258402453165, - "grad_norm": 141.52882385253906, + "grad_norm": 8994.103515625, "learning_rate": 6.904e-06, - "loss": 14.7233, + "loss": 87702.4, "step": 34520 }, { "epoch": 0.06975278465721546, - "grad_norm": 237.5696258544922, + "grad_norm": 198875.546875, "learning_rate": 6.9060000000000005e-06, - "loss": 22.7162, + "loss": 106409.3125, "step": 34530 }, { "epoch": 0.06977298528989928, - "grad_norm": 185.33267211914062, + "grad_norm": 17450.037109375, "learning_rate": 6.908000000000001e-06, - "loss": 27.6367, + "loss": 249902.625, "step": 34540 }, { "epoch": 0.0697931859225831, - "grad_norm": 603.0072021484375, + "grad_norm": 741228.1875, "learning_rate": 6.91e-06, - "loss": 28.158, + "loss": 134141.5875, "step": 34550 }, { "epoch": 0.06981338655526691, - "grad_norm": 386.4322814941406, + "grad_norm": 390409.25, "learning_rate": 6.912000000000001e-06, - "loss": 50.5451, + "loss": 355807.9, "step": 34560 }, { "epoch": 0.06983358718795073, - "grad_norm": 209.9215850830078, + "grad_norm": 12433.986328125, "learning_rate": 6.914000000000001e-06, - "loss": 44.3601, + "loss": 269102.575, "step": 34570 }, { "epoch": 0.06985378782063455, - "grad_norm": 481.27508544921875, + "grad_norm": 417647.09375, "learning_rate": 6.916e-06, - "loss": 26.0773, + "loss": 150502.25, "step": 34580 }, { "epoch": 0.06987398845331835, - "grad_norm": 579.3642578125, + "grad_norm": 355893.5, "learning_rate": 6.9180000000000005e-06, - "loss": 22.9499, + "loss": 291071.45, "step": 34590 }, { "epoch": 0.06989418908600217, - "grad_norm": 461.774169921875, + "grad_norm": 355918.1875, "learning_rate": 6.92e-06, - "loss": 29.6425, + "loss": 305429.45, "step": 34600 }, { "epoch": 0.06991438971868599, - "grad_norm": 297.2777099609375, + "grad_norm": 108761.96875, "learning_rate": 6.922000000000001e-06, - "loss": 26.7038, + "loss": 93837.85, "step": 34610 }, { "epoch": 0.0699345903513698, - "grad_norm": 370.90728759765625, + "grad_norm": 175510.109375, "learning_rate": 6.924000000000001e-06, - "loss": 34.0258, + "loss": 188973.25, "step": 34620 }, { "epoch": 0.06995479098405362, - "grad_norm": 438.7866516113281, + "grad_norm": 66748.625, "learning_rate": 6.926e-06, - "loss": 31.7307, + "loss": 243009.975, "step": 34630 }, { "epoch": 0.06997499161673744, - "grad_norm": 42.228172302246094, + "grad_norm": 1192.8851318359375, "learning_rate": 6.928e-06, - "loss": 29.1728, + "loss": 243138.025, "step": 34640 }, { "epoch": 0.06999519224942125, - "grad_norm": 156.34921264648438, + "grad_norm": 3264.728515625, "learning_rate": 6.93e-06, - "loss": 31.3085, + "loss": 235351.625, "step": 34650 }, { "epoch": 0.07001539288210507, - "grad_norm": 129.1876678466797, + "grad_norm": 1193.1123046875, "learning_rate": 6.932000000000001e-06, - "loss": 17.2462, + "loss": 212156.15, "step": 34660 }, { "epoch": 0.07003559351478889, - "grad_norm": 183.5938262939453, + "grad_norm": 59658.97265625, "learning_rate": 6.934000000000001e-06, - "loss": 28.9883, + "loss": 224669.125, "step": 34670 }, { "epoch": 0.0700557941474727, - "grad_norm": 298.6161804199219, + "grad_norm": 112521.1796875, "learning_rate": 6.936e-06, - "loss": 32.2716, + "loss": 291341.725, "step": 34680 }, { "epoch": 0.07007599478015651, - "grad_norm": 276.34808349609375, + "grad_norm": 120566.765625, "learning_rate": 6.938e-06, - "loss": 32.5951, + "loss": 189878.6375, "step": 34690 }, { "epoch": 0.07009619541284033, - "grad_norm": 225.87469482421875, + "grad_norm": 34317.9375, "learning_rate": 6.9400000000000005e-06, - "loss": 30.0686, + "loss": 272149.125, "step": 34700 }, { "epoch": 0.07011639604552415, - "grad_norm": 622.208984375, + "grad_norm": 134185.671875, "learning_rate": 6.942000000000001e-06, - "loss": 45.3452, + "loss": 239128.4, "step": 34710 }, { "epoch": 0.07013659667820796, - "grad_norm": 329.3135681152344, + "grad_norm": 241182.625, "learning_rate": 6.944000000000001e-06, - "loss": 18.9175, + "loss": 138826.925, "step": 34720 }, { "epoch": 0.07015679731089178, - "grad_norm": 364.23193359375, + "grad_norm": 188013.84375, "learning_rate": 6.946000000000001e-06, - "loss": 39.9556, + "loss": 162116.6, "step": 34730 }, { "epoch": 0.0701769979435756, - "grad_norm": 692.3369750976562, + "grad_norm": 432944.0, "learning_rate": 6.948e-06, - "loss": 48.0362, + "loss": 352976.45, "step": 34740 }, { "epoch": 0.0701971985762594, - "grad_norm": 507.91473388671875, + "grad_norm": 539391.8125, "learning_rate": 6.95e-06, - "loss": 38.6651, + "loss": 232996.2, "step": 34750 }, { "epoch": 0.07021739920894322, - "grad_norm": 513.9027709960938, + "grad_norm": 663123.4375, "learning_rate": 6.952000000000001e-06, - "loss": 38.2413, + "loss": 251594.775, "step": 34760 }, { "epoch": 0.07023759984162704, - "grad_norm": 298.8250427246094, + "grad_norm": 368700.15625, "learning_rate": 6.954000000000001e-06, - "loss": 23.3547, + "loss": 171913.275, "step": 34770 }, { "epoch": 0.07025780047431085, - "grad_norm": 152.75511169433594, + "grad_norm": 34651.046875, "learning_rate": 6.956000000000001e-06, - "loss": 28.2933, + "loss": 102833.0437, "step": 34780 }, { "epoch": 0.07027800110699467, - "grad_norm": 220.9697265625, + "grad_norm": 41765.88671875, "learning_rate": 6.958e-06, - "loss": 17.977, + "loss": 52209.9187, "step": 34790 }, { "epoch": 0.07029820173967849, - "grad_norm": 122.43717956542969, + "grad_norm": 8276.517578125, "learning_rate": 6.96e-06, - "loss": 16.0281, + "loss": 39909.9969, "step": 34800 }, { "epoch": 0.0703184023723623, - "grad_norm": 57.243896484375, + "grad_norm": 13382.6376953125, "learning_rate": 6.962000000000001e-06, - "loss": 55.2343, + "loss": 345680.75, "step": 34810 }, { "epoch": 0.07033860300504612, - "grad_norm": 39.22971725463867, + "grad_norm": 21046.171875, "learning_rate": 6.964000000000001e-06, - "loss": 34.3773, + "loss": 203432.075, "step": 34820 }, { "epoch": 0.07035880363772994, - "grad_norm": 378.3753662109375, + "grad_norm": 24292.412109375, "learning_rate": 6.966000000000001e-06, - "loss": 37.3846, + "loss": 82887.0188, "step": 34830 }, { "epoch": 0.07037900427041376, - "grad_norm": 426.9247131347656, + "grad_norm": 482763.125, "learning_rate": 6.968e-06, - "loss": 30.9604, + "loss": 499863.9, "step": 34840 }, { "epoch": 0.07039920490309756, - "grad_norm": 584.5142211914062, + "grad_norm": 191884.25, "learning_rate": 6.97e-06, - "loss": 32.2506, + "loss": 223112.025, "step": 34850 }, { "epoch": 0.07041940553578138, - "grad_norm": 357.4947509765625, + "grad_norm": 5494.14111328125, "learning_rate": 6.972000000000001e-06, - "loss": 22.4271, + "loss": 129893.925, "step": 34860 }, { "epoch": 0.0704396061684652, - "grad_norm": 393.0754089355469, + "grad_norm": 33429.77734375, "learning_rate": 6.9740000000000005e-06, - "loss": 20.3699, + "loss": 217895.775, "step": 34870 }, { "epoch": 0.07045980680114901, - "grad_norm": 422.4024658203125, + "grad_norm": 267466.3125, "learning_rate": 6.976000000000001e-06, - "loss": 28.3317, + "loss": 143116.675, "step": 34880 }, { "epoch": 0.07048000743383283, - "grad_norm": 229.6375274658203, + "grad_norm": 79872.2421875, "learning_rate": 6.978e-06, - "loss": 17.2899, + "loss": 147806.6125, "step": 34890 }, { "epoch": 0.07050020806651665, - "grad_norm": 212.41357421875, + "grad_norm": 118887.3984375, "learning_rate": 6.98e-06, - "loss": 23.0419, + "loss": 148605.875, "step": 34900 }, { "epoch": 0.07052040869920045, - "grad_norm": 297.3955993652344, + "grad_norm": 6696.01025390625, "learning_rate": 6.982000000000001e-06, - "loss": 37.1577, + "loss": 212504.8, "step": 34910 }, { "epoch": 0.07054060933188427, - "grad_norm": 415.9945068359375, + "grad_norm": 150487.578125, "learning_rate": 6.984e-06, - "loss": 18.8616, + "loss": 114319.4, "step": 34920 }, { "epoch": 0.0705608099645681, - "grad_norm": 282.5414123535156, + "grad_norm": 25704.06640625, "learning_rate": 6.9860000000000005e-06, - "loss": 14.3868, + "loss": 58725.7, "step": 34930 }, { "epoch": 0.0705810105972519, - "grad_norm": 990.1007690429688, + "grad_norm": 2090961.625, "learning_rate": 6.988000000000001e-06, - "loss": 58.3404, + "loss": 458793.55, "step": 34940 }, { "epoch": 0.07060121122993572, - "grad_norm": 184.29627990722656, + "grad_norm": 48626.23046875, "learning_rate": 6.99e-06, - "loss": 14.2599, + "loss": 89247.575, "step": 34950 }, { "epoch": 0.07062141186261954, - "grad_norm": 319.30511474609375, + "grad_norm": 344411.3125, "learning_rate": 6.992000000000001e-06, - "loss": 40.0757, + "loss": 155771.8375, "step": 34960 }, { "epoch": 0.07064161249530335, - "grad_norm": 137.1524200439453, + "grad_norm": 14094.3193359375, "learning_rate": 6.994000000000001e-06, - "loss": 30.0043, + "loss": 168959.45, "step": 34970 }, { "epoch": 0.07066181312798717, - "grad_norm": 83.04960632324219, + "grad_norm": 16497.265625, "learning_rate": 6.9960000000000004e-06, - "loss": 17.2658, + "loss": 116519.1, "step": 34980 }, { "epoch": 0.07068201376067099, - "grad_norm": 161.24586486816406, + "grad_norm": 8210.072265625, "learning_rate": 6.998000000000001e-06, - "loss": 32.1134, + "loss": 239814.375, "step": 34990 }, { "epoch": 0.0707022143933548, - "grad_norm": 156.23355102539062, + "grad_norm": 9489.005859375, "learning_rate": 7e-06, - "loss": 28.3132, + "loss": 214609.3, "step": 35000 }, { "epoch": 0.07072241502603861, - "grad_norm": 406.73956298828125, + "grad_norm": 401533.3125, "learning_rate": 7.002000000000001e-06, - "loss": 31.5436, + "loss": 179053.35, "step": 35010 }, { "epoch": 0.07074261565872243, - "grad_norm": 823.199462890625, + "grad_norm": 401569.375, "learning_rate": 7.004000000000001e-06, - "loss": 44.0885, + "loss": 247692.95, "step": 35020 }, { "epoch": 0.07076281629140625, - "grad_norm": 526.9590454101562, + "grad_norm": 738730.0, "learning_rate": 7.006e-06, - "loss": 24.7382, + "loss": 198218.5125, "step": 35030 }, { "epoch": 0.07078301692409006, - "grad_norm": 58.91459655761719, + "grad_norm": 26343.3515625, "learning_rate": 7.0080000000000005e-06, - "loss": 17.9061, + "loss": 97041.9375, "step": 35040 }, { "epoch": 0.07080321755677388, - "grad_norm": 776.4654541015625, + "grad_norm": 1204943.75, "learning_rate": 7.01e-06, - "loss": 34.614, + "loss": 293098.55, "step": 35050 }, { "epoch": 0.0708234181894577, - "grad_norm": 507.73492431640625, + "grad_norm": 374602.5625, "learning_rate": 7.012000000000001e-06, - "loss": 21.6508, + "loss": 119611.975, "step": 35060 }, { "epoch": 0.0708436188221415, - "grad_norm": 498.3752746582031, + "grad_norm": 84950.4296875, "learning_rate": 7.014000000000001e-06, - "loss": 25.3131, + "loss": 181759.675, "step": 35070 }, { "epoch": 0.07086381945482532, - "grad_norm": 183.4510955810547, + "grad_norm": 9882.341796875, "learning_rate": 7.016e-06, - "loss": 25.0299, + "loss": 108583.3875, "step": 35080 }, { "epoch": 0.07088402008750914, - "grad_norm": 395.6617431640625, + "grad_norm": 252965.78125, "learning_rate": 7.018e-06, - "loss": 36.4378, + "loss": 201521.5625, "step": 35090 }, { "epoch": 0.07090422072019295, - "grad_norm": 446.00994873046875, + "grad_norm": 530746.125, "learning_rate": 7.0200000000000006e-06, - "loss": 45.5589, + "loss": 267360.825, "step": 35100 }, { "epoch": 0.07092442135287677, - "grad_norm": 208.86793518066406, + "grad_norm": 173621.59375, "learning_rate": 7.022000000000001e-06, - "loss": 31.6502, + "loss": 116445.325, "step": 35110 }, { "epoch": 0.07094462198556059, - "grad_norm": 165.08340454101562, + "grad_norm": 109762.78125, "learning_rate": 7.024000000000001e-06, - "loss": 34.0186, + "loss": 152070.8375, "step": 35120 }, { "epoch": 0.0709648226182444, - "grad_norm": 166.9569549560547, + "grad_norm": 21242.03515625, "learning_rate": 7.026000000000001e-06, - "loss": 19.1239, + "loss": 81882.5562, "step": 35130 }, { "epoch": 0.07098502325092822, - "grad_norm": 247.7082977294922, + "grad_norm": 16669.5625, "learning_rate": 7.028e-06, - "loss": 24.5042, + "loss": 136022.3375, "step": 35140 }, { "epoch": 0.07100522388361204, - "grad_norm": 296.1555480957031, + "grad_norm": 49006.04296875, "learning_rate": 7.0300000000000005e-06, - "loss": 31.8144, + "loss": 366566.9, "step": 35150 }, { "epoch": 0.07102542451629586, - "grad_norm": 271.96258544921875, + "grad_norm": 140191.125, "learning_rate": 7.0320000000000015e-06, - "loss": 29.6839, + "loss": 191443.65, "step": 35160 }, { "epoch": 0.07104562514897966, - "grad_norm": 373.5399169921875, + "grad_norm": 248137.703125, "learning_rate": 7.034000000000001e-06, - "loss": 17.8588, + "loss": 92096.6687, "step": 35170 }, { "epoch": 0.07106582578166348, - "grad_norm": 143.0919952392578, + "grad_norm": 34587.484375, "learning_rate": 7.036000000000001e-06, - "loss": 38.991, + "loss": 205651.225, "step": 35180 }, { "epoch": 0.0710860264143473, - "grad_norm": 365.6285400390625, + "grad_norm": 169998.703125, "learning_rate": 7.038e-06, - "loss": 36.4588, + "loss": 255704.85, "step": 35190 }, { "epoch": 0.07110622704703111, - "grad_norm": 227.5874786376953, + "grad_norm": 367318.375, "learning_rate": 7.04e-06, - "loss": 43.24, + "loss": 315166.475, "step": 35200 }, { "epoch": 0.07112642767971493, - "grad_norm": 172.9547119140625, + "grad_norm": 16831.9140625, "learning_rate": 7.042000000000001e-06, - "loss": 31.8345, + "loss": 250153.275, "step": 35210 }, { "epoch": 0.07114662831239875, - "grad_norm": 235.5738067626953, + "grad_norm": 109840.7890625, "learning_rate": 7.044000000000001e-06, - "loss": 26.454, + "loss": 200450.4875, "step": 35220 }, { "epoch": 0.07116682894508256, - "grad_norm": 1110.907958984375, + "grad_norm": 1499340.125, "learning_rate": 7.046000000000001e-06, - "loss": 34.7562, + "loss": 239388.55, "step": 35230 }, { "epoch": 0.07118702957776638, "grad_norm": 0.0, "learning_rate": 7.048e-06, - "loss": 37.1723, + "loss": 265908.225, "step": 35240 }, { "epoch": 0.0712072302104502, - "grad_norm": 69.67388153076172, + "grad_norm": 23898.2578125, "learning_rate": 7.05e-06, - "loss": 30.0476, + "loss": 315218.5, "step": 35250 }, { "epoch": 0.071227430843134, - "grad_norm": 180.0526580810547, + "grad_norm": 24830.474609375, "learning_rate": 7.052000000000001e-06, - "loss": 20.4527, + "loss": 164989.65, "step": 35260 }, { "epoch": 0.07124763147581782, - "grad_norm": 189.87799072265625, + "grad_norm": 55254.14453125, "learning_rate": 7.0540000000000006e-06, - "loss": 27.1138, + "loss": 173135.325, "step": 35270 }, { "epoch": 0.07126783210850164, - "grad_norm": 117.58113861083984, + "grad_norm": 72021.9140625, "learning_rate": 7.056000000000001e-06, - "loss": 17.698, + "loss": 110190.4375, "step": 35280 }, { "epoch": 0.07128803274118545, - "grad_norm": 586.3466796875, + "grad_norm": 13581.9501953125, "learning_rate": 7.058e-06, - "loss": 31.3433, + "loss": 152358.9375, "step": 35290 }, { "epoch": 0.07130823337386927, - "grad_norm": 226.22317504882812, + "grad_norm": 62400.89453125, "learning_rate": 7.06e-06, - "loss": 34.7721, + "loss": 146064.175, "step": 35300 }, { "epoch": 0.07132843400655309, - "grad_norm": 290.9054260253906, + "grad_norm": 29030.974609375, "learning_rate": 7.062000000000001e-06, - "loss": 19.6257, + "loss": 47495.5031, "step": 35310 }, { "epoch": 0.07134863463923691, - "grad_norm": 183.03421020507812, + "grad_norm": 25151.271484375, "learning_rate": 7.0640000000000005e-06, - "loss": 33.1939, + "loss": 240215.875, "step": 35320 }, { "epoch": 0.07136883527192071, - "grad_norm": 909.1346435546875, + "grad_norm": 2027011.5, "learning_rate": 7.066000000000001e-06, - "loss": 34.783, + "loss": 304656.6, "step": 35330 }, { "epoch": 0.07138903590460453, - "grad_norm": 719.4769287109375, + "grad_norm": 834706.75, "learning_rate": 7.068000000000001e-06, - "loss": 29.6891, + "loss": 192032.375, "step": 35340 }, { "epoch": 0.07140923653728835, - "grad_norm": 197.17449951171875, + "grad_norm": 31836.537109375, "learning_rate": 7.07e-06, - "loss": 63.2718, + "loss": 207743.3625, "step": 35350 }, { "epoch": 0.07142943716997216, - "grad_norm": 511.7129821777344, + "grad_norm": 119261.1015625, "learning_rate": 7.072000000000001e-06, - "loss": 24.5842, + "loss": 154707.55, "step": 35360 }, { "epoch": 0.07144963780265598, - "grad_norm": 406.55767822265625, + "grad_norm": 91942.125, "learning_rate": 7.074000000000001e-06, - "loss": 31.0791, + "loss": 193949.075, "step": 35370 }, { "epoch": 0.0714698384353398, - "grad_norm": 135.95860290527344, + "grad_norm": 34324.125, "learning_rate": 7.0760000000000005e-06, - "loss": 33.5035, + "loss": 208438.2625, "step": 35380 }, { "epoch": 0.0714900390680236, - "grad_norm": 218.39500427246094, + "grad_norm": 32462.59375, "learning_rate": 7.078000000000001e-06, - "loss": 28.668, + "loss": 124558.0375, "step": 35390 }, { "epoch": 0.07151023970070743, - "grad_norm": 411.2699890136719, + "grad_norm": 584042.8125, "learning_rate": 7.08e-06, - "loss": 26.1219, + "loss": 154713.0, "step": 35400 }, { "epoch": 0.07153044033339125, - "grad_norm": 566.1672973632812, + "grad_norm": 362886.625, "learning_rate": 7.082000000000001e-06, - "loss": 33.2163, + "loss": 169725.975, "step": 35410 }, { "epoch": 0.07155064096607505, - "grad_norm": 140.92298889160156, + "grad_norm": 52822.6640625, "learning_rate": 7.084000000000001e-06, - "loss": 19.9689, + "loss": 191376.9125, "step": 35420 }, { "epoch": 0.07157084159875887, - "grad_norm": 614.938232421875, + "grad_norm": 604069.1875, "learning_rate": 7.0860000000000004e-06, - "loss": 26.4136, + "loss": 137295.475, "step": 35430 }, { "epoch": 0.07159104223144269, - "grad_norm": 447.7917785644531, + "grad_norm": 355501.46875, "learning_rate": 7.088000000000001e-06, - "loss": 21.5825, + "loss": 93564.9062, "step": 35440 }, { "epoch": 0.0716112428641265, - "grad_norm": 164.03590393066406, + "grad_norm": 63088.6015625, "learning_rate": 7.09e-06, - "loss": 26.8849, + "loss": 189431.4625, "step": 35450 }, { "epoch": 0.07163144349681032, - "grad_norm": 349.9858703613281, + "grad_norm": 193936.875, "learning_rate": 7.092000000000001e-06, - "loss": 41.2733, + "loss": 159654.9875, "step": 35460 }, { "epoch": 0.07165164412949414, - "grad_norm": 274.734375, + "grad_norm": 72110.6171875, "learning_rate": 7.094000000000001e-06, - "loss": 28.6498, + "loss": 178177.3375, "step": 35470 }, { "epoch": 0.07167184476217796, - "grad_norm": 225.19076538085938, + "grad_norm": 18262.923828125, "learning_rate": 7.096e-06, - "loss": 18.517, + "loss": 144514.275, "step": 35480 }, { "epoch": 0.07169204539486176, - "grad_norm": 450.4459533691406, + "grad_norm": 128698.0234375, "learning_rate": 7.0980000000000005e-06, - "loss": 17.9318, + "loss": 98187.9812, "step": 35490 }, { "epoch": 0.07171224602754558, - "grad_norm": 401.98333740234375, + "grad_norm": 98240.25, "learning_rate": 7.100000000000001e-06, - "loss": 21.0995, + "loss": 343158.775, "step": 35500 }, { "epoch": 0.0717324466602294, - "grad_norm": 110.38678741455078, + "grad_norm": 21298.97265625, "learning_rate": 7.102000000000001e-06, - "loss": 15.5251, + "loss": 73024.4937, "step": 35510 }, { "epoch": 0.07175264729291321, - "grad_norm": 263.2588195800781, + "grad_norm": 147380.296875, "learning_rate": 7.104000000000001e-06, - "loss": 26.2152, + "loss": 119154.65, "step": 35520 }, { "epoch": 0.07177284792559703, - "grad_norm": 483.0096130371094, + "grad_norm": 299748.8125, "learning_rate": 7.106000000000001e-06, - "loss": 26.1094, + "loss": 178229.45, "step": 35530 }, { "epoch": 0.07179304855828085, - "grad_norm": 361.9169921875, + "grad_norm": 137447.4375, "learning_rate": 7.108e-06, - "loss": 30.8617, + "loss": 126419.8375, "step": 35540 }, { "epoch": 0.07181324919096466, - "grad_norm": 82.24534606933594, + "grad_norm": 10185.205078125, "learning_rate": 7.1100000000000005e-06, - "loss": 40.0268, + "loss": 259575.075, "step": 35550 }, { "epoch": 0.07183344982364848, - "grad_norm": 210.82386779785156, + "grad_norm": 5481.67529296875, "learning_rate": 7.1120000000000015e-06, - "loss": 28.475, + "loss": 201070.75, "step": 35560 }, { "epoch": 0.0718536504563323, - "grad_norm": 164.13160705566406, + "grad_norm": 51632.125, "learning_rate": 7.114000000000001e-06, - "loss": 11.687, + "loss": 89977.95, "step": 35570 }, { "epoch": 0.0718738510890161, - "grad_norm": 162.2367401123047, + "grad_norm": 39194.38671875, "learning_rate": 7.116000000000001e-06, - "loss": 27.1376, + "loss": 133041.7, "step": 35580 }, { "epoch": 0.07189405172169992, - "grad_norm": 437.4132995605469, + "grad_norm": 1055159.5, "learning_rate": 7.118e-06, - "loss": 34.7301, + "loss": 419616.95, "step": 35590 }, { "epoch": 0.07191425235438374, - "grad_norm": 242.47047424316406, + "grad_norm": 51022.62109375, "learning_rate": 7.1200000000000004e-06, - "loss": 11.6893, + "loss": 40151.225, "step": 35600 }, { "epoch": 0.07193445298706755, - "grad_norm": 160.79574584960938, + "grad_norm": 141875.71875, "learning_rate": 7.1220000000000014e-06, - "loss": 15.1122, + "loss": 125950.9625, "step": 35610 }, { "epoch": 0.07195465361975137, - "grad_norm": 371.9103698730469, + "grad_norm": 138770.390625, "learning_rate": 7.124000000000001e-06, - "loss": 19.397, + "loss": 126227.875, "step": 35620 }, { "epoch": 0.07197485425243519, - "grad_norm": 465.20501708984375, + "grad_norm": 187937.875, "learning_rate": 7.126000000000001e-06, - "loss": 25.835, + "loss": 77534.3562, "step": 35630 }, { "epoch": 0.07199505488511901, - "grad_norm": 170.93829345703125, + "grad_norm": 1643.87109375, "learning_rate": 7.128e-06, - "loss": 26.0983, + "loss": 104965.725, "step": 35640 }, { "epoch": 0.07201525551780281, - "grad_norm": 394.7083435058594, + "grad_norm": 42637.40625, "learning_rate": 7.13e-06, - "loss": 23.7062, + "loss": 115430.175, "step": 35650 }, { "epoch": 0.07203545615048663, - "grad_norm": 258.1009826660156, + "grad_norm": 61003.02734375, "learning_rate": 7.132e-06, - "loss": 14.3192, + "loss": 59183.7, "step": 35660 }, { "epoch": 0.07205565678317045, - "grad_norm": 246.60574340820312, + "grad_norm": 232815.421875, "learning_rate": 7.134000000000001e-06, - "loss": 21.8068, + "loss": 149258.4625, "step": 35670 }, { "epoch": 0.07207585741585426, - "grad_norm": 361.5950622558594, + "grad_norm": 257723.21875, "learning_rate": 7.136000000000001e-06, - "loss": 36.3729, + "loss": 154767.1625, "step": 35680 }, { "epoch": 0.07209605804853808, - "grad_norm": 130.7515411376953, + "grad_norm": 38758.66015625, "learning_rate": 7.138e-06, - "loss": 30.8072, + "loss": 241899.825, "step": 35690 }, { "epoch": 0.0721162586812219, - "grad_norm": 558.0682983398438, + "grad_norm": 207948.90625, "learning_rate": 7.14e-06, - "loss": 39.1854, + "loss": 361626.525, "step": 35700 }, { "epoch": 0.07213645931390571, - "grad_norm": 71.65023040771484, + "grad_norm": 4047.79296875, "learning_rate": 7.142e-06, - "loss": 27.4871, + "loss": 156265.7375, "step": 35710 }, { "epoch": 0.07215665994658953, - "grad_norm": 288.7943115234375, + "grad_norm": 265291.75, "learning_rate": 7.1440000000000005e-06, - "loss": 14.6281, + "loss": 102173.95, "step": 35720 }, { "epoch": 0.07217686057927335, - "grad_norm": 246.38893127441406, + "grad_norm": 29739.109375, "learning_rate": 7.146000000000001e-06, - "loss": 22.2566, + "loss": 124683.85, "step": 35730 }, { "epoch": 0.07219706121195715, - "grad_norm": 134.45681762695312, + "grad_norm": 14168.83984375, "learning_rate": 7.148000000000001e-06, - "loss": 22.4863, + "loss": 169601.9375, "step": 35740 }, { "epoch": 0.07221726184464097, - "grad_norm": 59.040748596191406, + "grad_norm": 3290.4169921875, "learning_rate": 7.15e-06, - "loss": 40.3208, + "loss": 292644.8, "step": 35750 }, { "epoch": 0.0722374624773248, - "grad_norm": 475.1350402832031, + "grad_norm": 169424.640625, "learning_rate": 7.152e-06, - "loss": 26.8597, + "loss": 120379.95, "step": 35760 }, { "epoch": 0.0722576631100086, - "grad_norm": 172.88192749023438, + "grad_norm": 21024.640625, "learning_rate": 7.154000000000001e-06, - "loss": 32.3942, + "loss": 346440.25, "step": 35770 }, { "epoch": 0.07227786374269242, - "grad_norm": 478.24957275390625, + "grad_norm": 277838.0625, "learning_rate": 7.156000000000001e-06, - "loss": 36.7713, + "loss": 348587.6, "step": 35780 }, { "epoch": 0.07229806437537624, - "grad_norm": 255.5506591796875, + "grad_norm": 114636.5, "learning_rate": 7.158000000000001e-06, - "loss": 22.5891, + "loss": 148827.2125, "step": 35790 }, { "epoch": 0.07231826500806006, - "grad_norm": 439.4625549316406, + "grad_norm": 261682.6875, "learning_rate": 7.16e-06, - "loss": 37.9358, + "loss": 145813.725, "step": 35800 }, { "epoch": 0.07233846564074387, - "grad_norm": 282.32830810546875, + "grad_norm": 42060.16796875, "learning_rate": 7.162e-06, - "loss": 17.1687, + "loss": 128479.0375, "step": 35810 }, { "epoch": 0.07235866627342769, - "grad_norm": 421.70745849609375, + "grad_norm": 53589.8515625, "learning_rate": 7.164000000000001e-06, - "loss": 37.5946, + "loss": 179491.925, "step": 35820 }, { "epoch": 0.0723788669061115, - "grad_norm": 313.3034973144531, + "grad_norm": 14724.2509765625, "learning_rate": 7.1660000000000005e-06, - "loss": 17.9735, + "loss": 91932.65, "step": 35830 }, { "epoch": 0.07239906753879531, - "grad_norm": 397.9144592285156, + "grad_norm": 163289.859375, "learning_rate": 7.168000000000001e-06, - "loss": 20.7027, + "loss": 89644.5125, "step": 35840 }, { "epoch": 0.07241926817147913, - "grad_norm": 290.4940490722656, + "grad_norm": 423820.21875, "learning_rate": 7.17e-06, - "loss": 27.6429, + "loss": 167489.2, "step": 35850 }, { "epoch": 0.07243946880416295, - "grad_norm": 176.52755737304688, + "grad_norm": 43009.7734375, "learning_rate": 7.172e-06, - "loss": 21.9893, + "loss": 182233.7, "step": 35860 }, { "epoch": 0.07245966943684676, - "grad_norm": 411.03887939453125, + "grad_norm": 39894.765625, "learning_rate": 7.174000000000001e-06, - "loss": 35.1925, + "loss": 151014.75, "step": 35870 }, { "epoch": 0.07247987006953058, "grad_norm": 0.0, "learning_rate": 7.176e-06, - "loss": 24.5947, + "loss": 180799.55, "step": 35880 }, { "epoch": 0.0725000707022144, - "grad_norm": 161.65664672851562, + "grad_norm": 6585.6494140625, "learning_rate": 7.1780000000000006e-06, - "loss": 16.1081, + "loss": 78682.5813, "step": 35890 }, { "epoch": 0.0725202713348982, - "grad_norm": 436.17327880859375, + "grad_norm": 198267.265625, "learning_rate": 7.180000000000001e-06, - "loss": 39.479, + "loss": 149168.9625, "step": 35900 }, { "epoch": 0.07254047196758202, - "grad_norm": 475.31060791015625, + "grad_norm": 466388.90625, "learning_rate": 7.182e-06, - "loss": 25.5334, + "loss": 193810.9, "step": 35910 }, { "epoch": 0.07256067260026584, - "grad_norm": 437.42547607421875, + "grad_norm": 553730.875, "learning_rate": 7.184000000000001e-06, - "loss": 40.1187, + "loss": 302715.7, "step": 35920 }, { "epoch": 0.07258087323294965, - "grad_norm": 106.16314697265625, + "grad_norm": 7327.90576171875, "learning_rate": 7.186000000000001e-06, - "loss": 34.4375, + "loss": 271670.425, "step": 35930 }, { "epoch": 0.07260107386563347, - "grad_norm": 118.01982879638672, + "grad_norm": 24947.4375, "learning_rate": 7.1880000000000005e-06, - "loss": 24.4265, + "loss": 141929.975, "step": 35940 }, { "epoch": 0.07262127449831729, - "grad_norm": 371.4693908691406, + "grad_norm": 481513.375, "learning_rate": 7.190000000000001e-06, - "loss": 17.0825, + "loss": 134693.075, "step": 35950 }, { "epoch": 0.07264147513100111, - "grad_norm": 385.18011474609375, + "grad_norm": 136599.78125, "learning_rate": 7.192e-06, - "loss": 26.4692, + "loss": 171251.325, "step": 35960 }, { "epoch": 0.07266167576368492, - "grad_norm": 134.06768798828125, + "grad_norm": 11775.322265625, "learning_rate": 7.194000000000001e-06, - "loss": 19.1417, + "loss": 151986.4, "step": 35970 }, { "epoch": 0.07268187639636874, - "grad_norm": 271.87060546875, + "grad_norm": 77979.8359375, "learning_rate": 7.196000000000001e-06, - "loss": 32.2219, + "loss": 302544.475, "step": 35980 }, { "epoch": 0.07270207702905256, - "grad_norm": 228.697265625, + "grad_norm": 58335.671875, "learning_rate": 7.198e-06, - "loss": 15.1079, + "loss": 106980.0, "step": 35990 }, { "epoch": 0.07272227766173636, - "grad_norm": 830.1871948242188, + "grad_norm": 733720.25, "learning_rate": 7.2000000000000005e-06, - "loss": 32.8203, + "loss": 245121.45, "step": 36000 }, { "epoch": 0.07274247829442018, - "grad_norm": 510.1455993652344, + "grad_norm": 285773.71875, "learning_rate": 7.202e-06, - "loss": 38.1677, + "loss": 225743.5, "step": 36010 }, { "epoch": 0.072762678927104, - "grad_norm": 582.5565185546875, + "grad_norm": 59906.2578125, "learning_rate": 7.204000000000001e-06, - "loss": 25.6832, + "loss": 179209.675, "step": 36020 }, { "epoch": 0.07278287955978781, - "grad_norm": 218.80935668945312, + "grad_norm": 24963.626953125, "learning_rate": 7.206000000000001e-06, - "loss": 27.2106, + "loss": 148382.1, "step": 36030 }, { "epoch": 0.07280308019247163, - "grad_norm": 294.893310546875, + "grad_norm": 102977.625, "learning_rate": 7.208e-06, - "loss": 21.015, + "loss": 99285.4688, "step": 36040 }, { "epoch": 0.07282328082515545, - "grad_norm": 334.14752197265625, + "grad_norm": 15370.603515625, "learning_rate": 7.2100000000000004e-06, - "loss": 18.0615, + "loss": 83979.2625, "step": 36050 }, { "epoch": 0.07284348145783925, - "grad_norm": 288.8575134277344, + "grad_norm": 92383.1640625, "learning_rate": 7.212e-06, - "loss": 22.7721, + "loss": 103093.8, "step": 36060 }, { "epoch": 0.07286368209052307, - "grad_norm": 358.3299255371094, + "grad_norm": 252197.15625, "learning_rate": 7.214000000000001e-06, - "loss": 21.3283, + "loss": 127646.7125, "step": 36070 }, { "epoch": 0.0728838827232069, - "grad_norm": 315.20782470703125, + "grad_norm": 159819.21875, "learning_rate": 7.216000000000001e-06, - "loss": 15.5972, + "loss": 96138.6313, "step": 36080 }, { "epoch": 0.0729040833558907, - "grad_norm": 361.19549560546875, + "grad_norm": 372101.78125, "learning_rate": 7.218e-06, - "loss": 25.5272, + "loss": 195128.475, "step": 36090 }, { "epoch": 0.07292428398857452, - "grad_norm": 177.4301300048828, + "grad_norm": 38087.03125, "learning_rate": 7.22e-06, - "loss": 34.7283, + "loss": 184969.475, "step": 36100 }, { "epoch": 0.07294448462125834, - "grad_norm": 199.40557861328125, + "grad_norm": 7025.376953125, "learning_rate": 7.2220000000000005e-06, - "loss": 18.4738, + "loss": 74064.1938, "step": 36110 }, { "epoch": 0.07296468525394216, - "grad_norm": 711.8712768554688, + "grad_norm": 1348815.875, "learning_rate": 7.224000000000001e-06, - "loss": 38.1349, + "loss": 350450.9, "step": 36120 }, { "epoch": 0.07298488588662597, - "grad_norm": 207.1460723876953, + "grad_norm": 104294.7890625, "learning_rate": 7.226000000000001e-06, - "loss": 14.6274, + "loss": 83540.55, "step": 36130 }, { "epoch": 0.07300508651930979, - "grad_norm": 218.71484375, + "grad_norm": 532119.125, "learning_rate": 7.228000000000001e-06, - "loss": 22.8234, + "loss": 300447.55, "step": 36140 }, { "epoch": 0.0730252871519936, - "grad_norm": 232.43943786621094, + "grad_norm": 79951.5703125, "learning_rate": 7.23e-06, - "loss": 23.2104, + "loss": 123095.5375, "step": 36150 }, { "epoch": 0.07304548778467741, - "grad_norm": 500.0850830078125, + "grad_norm": 789532.125, "learning_rate": 7.232e-06, - "loss": 18.8728, + "loss": 153764.7125, "step": 36160 }, { "epoch": 0.07306568841736123, - "grad_norm": 403.644775390625, + "grad_norm": 93703.9296875, "learning_rate": 7.234000000000001e-06, - "loss": 25.2967, + "loss": 136434.025, "step": 36170 }, { "epoch": 0.07308588905004505, - "grad_norm": 321.7872619628906, + "grad_norm": 55290.05859375, "learning_rate": 7.236000000000001e-06, - "loss": 33.8395, + "loss": 221552.675, "step": 36180 }, { "epoch": 0.07310608968272886, - "grad_norm": 103.05240631103516, + "grad_norm": 8435.3525390625, "learning_rate": 7.238000000000001e-06, - "loss": 21.2051, + "loss": 144921.2, "step": 36190 }, { "epoch": 0.07312629031541268, - "grad_norm": 210.76727294921875, + "grad_norm": 38115.47265625, "learning_rate": 7.24e-06, - "loss": 13.8069, + "loss": 71046.0375, "step": 36200 }, { "epoch": 0.0731464909480965, - "grad_norm": 250.00503540039062, + "grad_norm": 89983.8046875, "learning_rate": 7.242e-06, - "loss": 27.6499, + "loss": 179388.05, "step": 36210 }, { "epoch": 0.0731666915807803, - "grad_norm": 211.04156494140625, + "grad_norm": 91236.9453125, "learning_rate": 7.244000000000001e-06, - "loss": 25.0457, + "loss": 177788.2375, "step": 36220 }, { "epoch": 0.07318689221346412, - "grad_norm": 104.9522476196289, + "grad_norm": 14139.8154296875, "learning_rate": 7.246000000000001e-06, - "loss": 30.3264, + "loss": 293566.65, "step": 36230 }, { "epoch": 0.07320709284614794, - "grad_norm": 280.8627014160156, + "grad_norm": 30156.767578125, "learning_rate": 7.248000000000001e-06, - "loss": 17.7601, + "loss": 144215.5875, "step": 36240 }, { "epoch": 0.07322729347883175, - "grad_norm": 923.1336059570312, + "grad_norm": 665026.1875, "learning_rate": 7.25e-06, - "loss": 23.2485, + "loss": 197199.3625, "step": 36250 }, { "epoch": 0.07324749411151557, - "grad_norm": 609.1475219726562, + "grad_norm": 755082.8125, "learning_rate": 7.252e-06, - "loss": 41.1142, + "loss": 237957.75, "step": 36260 }, { "epoch": 0.07326769474419939, - "grad_norm": 644.6527099609375, + "grad_norm": 470094.59375, "learning_rate": 7.254000000000001e-06, - "loss": 25.7066, + "loss": 345355.9, "step": 36270 }, { "epoch": 0.07328789537688321, - "grad_norm": 189.8779296875, + "grad_norm": 85025.828125, "learning_rate": 7.2560000000000005e-06, - "loss": 27.0845, + "loss": 124273.7875, "step": 36280 }, { "epoch": 0.07330809600956702, - "grad_norm": 502.730712890625, + "grad_norm": 612717.375, "learning_rate": 7.258000000000001e-06, - "loss": 25.2417, + "loss": 188208.4375, "step": 36290 }, { "epoch": 0.07332829664225084, - "grad_norm": 344.7130126953125, + "grad_norm": 231342.875, "learning_rate": 7.260000000000001e-06, - "loss": 34.3557, + "loss": 243893.6, "step": 36300 }, { "epoch": 0.07334849727493466, - "grad_norm": 240.31741333007812, + "grad_norm": 51275.53515625, "learning_rate": 7.262e-06, - "loss": 34.4685, + "loss": 161828.85, "step": 36310 }, { "epoch": 0.07336869790761846, - "grad_norm": 356.3848876953125, + "grad_norm": 6684.08251953125, "learning_rate": 7.264000000000001e-06, - "loss": 21.1377, + "loss": 69391.6187, "step": 36320 }, { "epoch": 0.07338889854030228, - "grad_norm": 177.12232971191406, + "grad_norm": 33146.76953125, "learning_rate": 7.266000000000001e-06, - "loss": 27.8209, + "loss": 162910.0875, "step": 36330 }, { "epoch": 0.0734090991729861, - "grad_norm": 492.5040588378906, + "grad_norm": 495341.6875, "learning_rate": 7.2680000000000005e-06, - "loss": 32.8956, + "loss": 404437.925, "step": 36340 }, { "epoch": 0.07342929980566991, - "grad_norm": 448.3660583496094, + "grad_norm": 83189.5703125, "learning_rate": 7.270000000000001e-06, - "loss": 24.272, + "loss": 129668.0, "step": 36350 }, { "epoch": 0.07344950043835373, - "grad_norm": 336.73162841796875, + "grad_norm": 51294.35546875, "learning_rate": 7.272e-06, - "loss": 26.5316, + "loss": 252012.9, "step": 36360 }, { "epoch": 0.07346970107103755, - "grad_norm": 362.0407409667969, + "grad_norm": 79760.5859375, "learning_rate": 7.274000000000001e-06, - "loss": 30.0662, + "loss": 178186.0, "step": 36370 }, { "epoch": 0.07348990170372136, - "grad_norm": 234.1678009033203, + "grad_norm": 68193.0234375, "learning_rate": 7.276000000000001e-06, - "loss": 25.709, + "loss": 261762.9, "step": 36380 }, { "epoch": 0.07351010233640518, - "grad_norm": 317.2973937988281, + "grad_norm": 76958.40625, "learning_rate": 7.2780000000000005e-06, - "loss": 35.7133, + "loss": 146958.1375, "step": 36390 }, { "epoch": 0.073530302969089, - "grad_norm": 180.00048828125, + "grad_norm": 25969.359375, "learning_rate": 7.280000000000001e-06, - "loss": 18.6612, + "loss": 112492.275, "step": 36400 }, { "epoch": 0.0735505036017728, - "grad_norm": 533.5408325195312, + "grad_norm": 1096316.125, "learning_rate": 7.282e-06, - "loss": 36.5794, + "loss": 282088.1, "step": 36410 }, { "epoch": 0.07357070423445662, - "grad_norm": 228.9940643310547, + "grad_norm": 1359.409912109375, "learning_rate": 7.284000000000001e-06, - "loss": 28.2655, + "loss": 209507.05, "step": 36420 }, { "epoch": 0.07359090486714044, - "grad_norm": 518.9951171875, + "grad_norm": 525699.8125, "learning_rate": 7.286000000000001e-06, - "loss": 32.3436, + "loss": 241964.025, "step": 36430 }, { "epoch": 0.07361110549982426, - "grad_norm": 299.5155334472656, + "grad_norm": 51637.08984375, "learning_rate": 7.288e-06, - "loss": 23.8606, + "loss": 115971.2375, "step": 36440 }, { "epoch": 0.07363130613250807, - "grad_norm": 226.43161010742188, + "grad_norm": 27699.09375, "learning_rate": 7.2900000000000005e-06, - "loss": 15.8627, + "loss": 102482.9375, "step": 36450 }, { "epoch": 0.07365150676519189, - "grad_norm": 645.637939453125, + "grad_norm": 294215.09375, "learning_rate": 7.292e-06, - "loss": 45.8218, + "loss": 309704.225, "step": 36460 }, { "epoch": 0.07367170739787571, - "grad_norm": 293.798583984375, + "grad_norm": 207839.8125, "learning_rate": 7.294000000000001e-06, - "loss": 32.9752, + "loss": 181627.5875, "step": 36470 }, { "epoch": 0.07369190803055951, - "grad_norm": 520.4822998046875, + "grad_norm": 189069.328125, "learning_rate": 7.296000000000001e-06, - "loss": 22.4962, + "loss": 46999.0563, "step": 36480 }, { "epoch": 0.07371210866324333, - "grad_norm": 254.99899291992188, + "grad_norm": 139624.140625, "learning_rate": 7.298e-06, - "loss": 32.3449, + "loss": 193818.5375, "step": 36490 }, { "epoch": 0.07373230929592715, - "grad_norm": 393.9755554199219, + "grad_norm": 85785.0625, "learning_rate": 7.3e-06, - "loss": 24.0851, + "loss": 175589.5375, "step": 36500 }, { "epoch": 0.07375250992861096, - "grad_norm": 236.24923706054688, + "grad_norm": 2779.15283203125, "learning_rate": 7.3020000000000006e-06, - "loss": 21.1192, + "loss": 120825.5625, "step": 36510 }, { "epoch": 0.07377271056129478, - "grad_norm": 236.36317443847656, + "grad_norm": 77657.09375, "learning_rate": 7.304000000000001e-06, - "loss": 23.2971, + "loss": 146539.35, "step": 36520 }, { "epoch": 0.0737929111939786, - "grad_norm": 440.5664367675781, + "grad_norm": 76410.78125, "learning_rate": 7.306000000000001e-06, - "loss": 25.8003, + "loss": 148780.0875, "step": 36530 }, { "epoch": 0.0738131118266624, - "grad_norm": 278.49822998046875, + "grad_norm": 72972.1640625, "learning_rate": 7.308000000000001e-06, - "loss": 37.1441, + "loss": 201964.675, "step": 36540 }, { "epoch": 0.07383331245934623, - "grad_norm": 51.29787826538086, + "grad_norm": 27249.66015625, "learning_rate": 7.31e-06, - "loss": 30.1834, + "loss": 261703.15, "step": 36550 }, { "epoch": 0.07385351309203005, - "grad_norm": 241.89549255371094, + "grad_norm": 117770.1796875, "learning_rate": 7.3120000000000005e-06, - "loss": 13.0701, + "loss": 265124.6, "step": 36560 }, { "epoch": 0.07387371372471385, - "grad_norm": 296.49591064453125, + "grad_norm": 127801.9765625, "learning_rate": 7.3140000000000015e-06, - "loss": 49.7003, + "loss": 447048.3, "step": 36570 }, { "epoch": 0.07389391435739767, - "grad_norm": 270.5970764160156, + "grad_norm": 177938.6875, "learning_rate": 7.316000000000001e-06, - "loss": 25.1562, + "loss": 187368.175, "step": 36580 }, { "epoch": 0.07391411499008149, - "grad_norm": 243.77528381347656, + "grad_norm": 238098.765625, "learning_rate": 7.318000000000001e-06, - "loss": 29.228, + "loss": 270173.8, "step": 36590 }, { "epoch": 0.0739343156227653, - "grad_norm": 1103.41943359375, + "grad_norm": 1403589.0, "learning_rate": 7.32e-06, - "loss": 59.5533, + "loss": 406549.45, "step": 36600 }, { "epoch": 0.07395451625544912, - "grad_norm": 283.4581604003906, + "grad_norm": 124704.7734375, "learning_rate": 7.322e-06, - "loss": 19.2951, + "loss": 128463.9125, "step": 36610 }, { "epoch": 0.07397471688813294, - "grad_norm": 365.1043395996094, + "grad_norm": 78338.6171875, "learning_rate": 7.324000000000001e-06, - "loss": 40.0341, + "loss": 216183.175, "step": 36620 }, { "epoch": 0.07399491752081676, - "grad_norm": 139.1345672607422, + "grad_norm": 26011.033203125, "learning_rate": 7.326000000000001e-06, - "loss": 39.2212, + "loss": 311608.4, "step": 36630 }, { "epoch": 0.07401511815350056, - "grad_norm": 112.60505676269531, + "grad_norm": 11753.8818359375, "learning_rate": 7.328000000000001e-06, - "loss": 22.9324, + "loss": 146343.575, "step": 36640 }, { "epoch": 0.07403531878618438, - "grad_norm": 593.785400390625, + "grad_norm": 536564.3125, "learning_rate": 7.33e-06, - "loss": 34.1412, + "loss": 238287.8, "step": 36650 }, { "epoch": 0.0740555194188682, - "grad_norm": 553.2237548828125, + "grad_norm": 1113318.25, "learning_rate": 7.332e-06, - "loss": 47.1545, + "loss": 245337.35, "step": 36660 }, { "epoch": 0.07407572005155201, - "grad_norm": 208.7756805419922, + "grad_norm": 16223.2333984375, "learning_rate": 7.334000000000001e-06, - "loss": 21.578, + "loss": 119862.4375, "step": 36670 }, { "epoch": 0.07409592068423583, - "grad_norm": 476.149658203125, + "grad_norm": 534176.25, "learning_rate": 7.3360000000000006e-06, - "loss": 37.0521, + "loss": 179945.8625, "step": 36680 }, { "epoch": 0.07411612131691965, - "grad_norm": 177.48573303222656, + "grad_norm": 169297.640625, "learning_rate": 7.338000000000001e-06, - "loss": 16.8563, + "loss": 292763.275, "step": 36690 }, { "epoch": 0.07413632194960346, - "grad_norm": 308.2784118652344, + "grad_norm": 95946.3125, "learning_rate": 7.340000000000001e-06, - "loss": 17.1021, + "loss": 87390.9625, "step": 36700 }, { "epoch": 0.07415652258228728, - "grad_norm": 208.59652709960938, + "grad_norm": 33237.91796875, "learning_rate": 7.342e-06, - "loss": 17.2225, + "loss": 118707.5, "step": 36710 }, { "epoch": 0.0741767232149711, - "grad_norm": 55.97196578979492, + "grad_norm": 16934.466796875, "learning_rate": 7.344000000000001e-06, - "loss": 29.164, + "loss": 71419.8313, "step": 36720 }, { "epoch": 0.0741969238476549, - "grad_norm": 121.7552490234375, + "grad_norm": 54569.21875, "learning_rate": 7.346000000000001e-06, - "loss": 40.1761, + "loss": 342453.9, "step": 36730 }, { "epoch": 0.07421712448033872, - "grad_norm": 199.17617797851562, + "grad_norm": 83423.640625, "learning_rate": 7.348000000000001e-06, - "loss": 49.4342, + "loss": 273000.85, "step": 36740 }, { "epoch": 0.07423732511302254, - "grad_norm": 212.97402954101562, + "grad_norm": 79040.4765625, "learning_rate": 7.350000000000001e-06, - "loss": 25.9432, + "loss": 112357.7625, "step": 36750 }, { "epoch": 0.07425752574570635, - "grad_norm": 287.5586242675781, + "grad_norm": 102257.1875, "learning_rate": 7.352e-06, - "loss": 28.8848, + "loss": 200101.125, "step": 36760 }, { "epoch": 0.07427772637839017, - "grad_norm": 570.6173095703125, + "grad_norm": 193234.203125, "learning_rate": 7.354000000000001e-06, - "loss": 19.9024, + "loss": 54353.8, "step": 36770 }, { "epoch": 0.07429792701107399, - "grad_norm": 256.5933837890625, + "grad_norm": 154999.5, "learning_rate": 7.356000000000001e-06, - "loss": 28.7767, + "loss": 184691.6375, "step": 36780 }, { "epoch": 0.07431812764375781, - "grad_norm": 204.2556610107422, + "grad_norm": 32786.75390625, "learning_rate": 7.3580000000000005e-06, - "loss": 38.7115, + "loss": 270968.9, "step": 36790 }, { "epoch": 0.07433832827644161, - "grad_norm": 113.79093933105469, + "grad_norm": 57426.9296875, "learning_rate": 7.360000000000001e-06, - "loss": 17.7016, + "loss": 81249.8188, "step": 36800 }, { "epoch": 0.07435852890912543, - "grad_norm": 116.8794937133789, + "grad_norm": 16473.92578125, "learning_rate": 7.362e-06, - "loss": 43.6426, + "loss": 317623.025, "step": 36810 }, { "epoch": 0.07437872954180925, - "grad_norm": 284.8735656738281, + "grad_norm": 10670.650390625, "learning_rate": 7.364000000000001e-06, - "loss": 47.9132, + "loss": 381618.0, "step": 36820 }, { "epoch": 0.07439893017449306, - "grad_norm": 127.1036376953125, + "grad_norm": 5610.51318359375, "learning_rate": 7.366000000000001e-06, - "loss": 22.8843, + "loss": 153328.9625, "step": 36830 }, { "epoch": 0.07441913080717688, - "grad_norm": 182.51087951660156, + "grad_norm": 16478.318359375, "learning_rate": 7.3680000000000004e-06, - "loss": 18.0095, + "loss": 113104.1375, "step": 36840 }, { "epoch": 0.0744393314398607, - "grad_norm": 461.25006103515625, + "grad_norm": 124921.734375, "learning_rate": 7.370000000000001e-06, - "loss": 27.5946, + "loss": 200099.6375, "step": 36850 }, { "epoch": 0.07445953207254451, - "grad_norm": 233.8284149169922, + "grad_norm": 31860.978515625, "learning_rate": 7.372e-06, - "loss": 29.7888, + "loss": 102751.3562, "step": 36860 }, { "epoch": 0.07447973270522833, - "grad_norm": 129.74957275390625, + "grad_norm": 203419.875, "learning_rate": 7.374000000000001e-06, - "loss": 9.5533, + "loss": 47564.95, "step": 36870 }, { "epoch": 0.07449993333791215, - "grad_norm": 470.76922607421875, + "grad_norm": 406888.875, "learning_rate": 7.376000000000001e-06, - "loss": 20.9399, + "loss": 78737.1062, "step": 36880 }, { "epoch": 0.07452013397059595, - "grad_norm": 285.8863220214844, + "grad_norm": 11617.3349609375, "learning_rate": 7.378e-06, - "loss": 19.7481, + "loss": 88797.3313, "step": 36890 }, { "epoch": 0.07454033460327977, - "grad_norm": 264.63531494140625, + "grad_norm": 57776.13671875, "learning_rate": 7.3800000000000005e-06, - "loss": 25.8509, + "loss": 143063.3625, "step": 36900 }, { "epoch": 0.0745605352359636, - "grad_norm": 287.1709899902344, + "grad_norm": 315282.875, "learning_rate": 7.382000000000001e-06, - "loss": 27.0948, + "loss": 188454.55, "step": 36910 }, { "epoch": 0.0745807358686474, - "grad_norm": 441.5046081542969, + "grad_norm": 37841.7265625, "learning_rate": 7.384e-06, - "loss": 34.6011, + "loss": 182589.0125, "step": 36920 }, { "epoch": 0.07460093650133122, - "grad_norm": 277.13397216796875, + "grad_norm": 52763.04296875, "learning_rate": 7.386000000000001e-06, - "loss": 24.864, + "loss": 178556.825, "step": 36930 }, { "epoch": 0.07462113713401504, - "grad_norm": 443.7454833984375, + "grad_norm": 768855.75, "learning_rate": 7.388000000000001e-06, - "loss": 29.8873, + "loss": 213602.6, "step": 36940 }, { "epoch": 0.07464133776669886, - "grad_norm": 242.94688415527344, + "grad_norm": 56227.00390625, "learning_rate": 7.39e-06, - "loss": 31.2921, + "loss": 114028.4, "step": 36950 }, { "epoch": 0.07466153839938267, - "grad_norm": 211.485595703125, + "grad_norm": 20104.46484375, "learning_rate": 7.3920000000000005e-06, - "loss": 17.1606, + "loss": 106307.8625, "step": 36960 }, { "epoch": 0.07468173903206649, - "grad_norm": 212.78688049316406, + "grad_norm": 55920.72265625, "learning_rate": 7.394e-06, - "loss": 23.3433, + "loss": 87490.8875, "step": 36970 }, { "epoch": 0.0747019396647503, - "grad_norm": 402.8143005371094, + "grad_norm": 206095.453125, "learning_rate": 7.396000000000001e-06, - "loss": 30.0493, + "loss": 147335.775, "step": 36980 }, { "epoch": 0.07472214029743411, - "grad_norm": 73.41931915283203, + "grad_norm": 20581.775390625, "learning_rate": 7.398000000000001e-06, - "loss": 20.3851, + "loss": 115394.5375, "step": 36990 }, { "epoch": 0.07474234093011793, - "grad_norm": 119.01841735839844, + "grad_norm": 17047.212890625, "learning_rate": 7.4e-06, - "loss": 28.2371, + "loss": 128798.475, "step": 37000 }, { "epoch": 0.07476254156280175, - "grad_norm": 245.3097686767578, + "grad_norm": 122553.3203125, "learning_rate": 7.4020000000000005e-06, - "loss": 27.0031, + "loss": 179454.9875, "step": 37010 }, { "epoch": 0.07478274219548556, - "grad_norm": 382.00225830078125, + "grad_norm": 10999.4931640625, "learning_rate": 7.404e-06, - "loss": 21.0526, + "loss": 170714.075, "step": 37020 }, { "epoch": 0.07480294282816938, - "grad_norm": 417.96441650390625, + "grad_norm": 311768.34375, "learning_rate": 7.406000000000001e-06, - "loss": 22.2644, + "loss": 156309.5875, "step": 37030 }, { "epoch": 0.0748231434608532, - "grad_norm": 262.6156921386719, + "grad_norm": 6090.5361328125, "learning_rate": 7.408000000000001e-06, - "loss": 21.3585, + "loss": 136192.475, "step": 37040 }, { "epoch": 0.074843344093537, - "grad_norm": 147.9910125732422, + "grad_norm": 9195.271484375, "learning_rate": 7.41e-06, - "loss": 31.688, + "loss": 233352.475, "step": 37050 }, { "epoch": 0.07486354472622082, - "grad_norm": 330.61212158203125, + "grad_norm": 13675.1474609375, "learning_rate": 7.412e-06, - "loss": 15.5659, + "loss": 185102.0125, "step": 37060 }, { "epoch": 0.07488374535890464, - "grad_norm": 355.9070129394531, + "grad_norm": 11475.548828125, "learning_rate": 7.4140000000000005e-06, - "loss": 31.2333, + "loss": 285474.85, "step": 37070 }, { "epoch": 0.07490394599158845, - "grad_norm": 415.4013977050781, + "grad_norm": 111927.515625, "learning_rate": 7.416000000000001e-06, - "loss": 31.6887, + "loss": 361421.2, "step": 37080 }, { "epoch": 0.07492414662427227, - "grad_norm": 215.07850646972656, + "grad_norm": 16193.0322265625, "learning_rate": 7.418000000000001e-06, - "loss": 31.2258, + "loss": 209661.7625, "step": 37090 }, { "epoch": 0.07494434725695609, - "grad_norm": 142.98353576660156, + "grad_norm": 10378.0751953125, "learning_rate": 7.420000000000001e-06, - "loss": 38.7785, + "loss": 193485.575, "step": 37100 }, { "epoch": 0.07496454788963991, - "grad_norm": 105.02717590332031, + "grad_norm": 10024.1748046875, "learning_rate": 7.422e-06, - "loss": 23.1669, + "loss": 214735.8, "step": 37110 }, { "epoch": 0.07498474852232372, - "grad_norm": 336.7762145996094, + "grad_norm": 264175.78125, "learning_rate": 7.424e-06, - "loss": 14.6662, + "loss": 75851.3562, "step": 37120 }, { "epoch": 0.07500494915500754, - "grad_norm": 409.864990234375, + "grad_norm": 314511.03125, "learning_rate": 7.426000000000001e-06, - "loss": 27.6743, + "loss": 101922.8562, "step": 37130 }, { "epoch": 0.07502514978769136, - "grad_norm": 268.7269287109375, + "grad_norm": 34757.32421875, "learning_rate": 7.428000000000001e-06, - "loss": 24.6566, + "loss": 147863.0, "step": 37140 }, { "epoch": 0.07504535042037516, - "grad_norm": 312.6243896484375, + "grad_norm": 36175.4453125, "learning_rate": 7.430000000000001e-06, - "loss": 43.2453, + "loss": 236362.15, "step": 37150 }, { "epoch": 0.07506555105305898, - "grad_norm": 260.17401123046875, + "grad_norm": 588888.3125, "learning_rate": 7.432e-06, - "loss": 20.4968, + "loss": 123136.7375, "step": 37160 }, { "epoch": 0.0750857516857428, - "grad_norm": 255.1354522705078, + "grad_norm": 84944.0234375, "learning_rate": 7.434e-06, - "loss": 22.17, + "loss": 86139.1625, "step": 37170 }, { "epoch": 0.07510595231842661, - "grad_norm": 217.8333282470703, + "grad_norm": 47826.80078125, "learning_rate": 7.436000000000001e-06, - "loss": 12.0146, + "loss": 63831.2562, "step": 37180 }, { "epoch": 0.07512615295111043, - "grad_norm": 790.7473754882812, + "grad_norm": 1329057.875, "learning_rate": 7.438000000000001e-06, - "loss": 53.3113, + "loss": 309855.625, "step": 37190 }, { "epoch": 0.07514635358379425, - "grad_norm": 408.5172119140625, + "grad_norm": 541241.625, "learning_rate": 7.440000000000001e-06, - "loss": 37.1765, + "loss": 168273.55, "step": 37200 }, { "epoch": 0.07516655421647805, - "grad_norm": 571.9297485351562, + "grad_norm": 626862.5, "learning_rate": 7.442e-06, - "loss": 42.8719, + "loss": 312702.05, "step": 37210 }, { "epoch": 0.07518675484916187, - "grad_norm": 215.4970703125, + "grad_norm": 329.510986328125, "learning_rate": 7.444e-06, - "loss": 19.6267, + "loss": 62083.6062, "step": 37220 }, { "epoch": 0.0752069554818457, - "grad_norm": 76.0966567993164, + "grad_norm": 108272.4921875, "learning_rate": 7.446000000000001e-06, - "loss": 23.3036, + "loss": 230593.675, "step": 37230 }, { "epoch": 0.0752271561145295, - "grad_norm": 254.29229736328125, + "grad_norm": 38244.9140625, "learning_rate": 7.4480000000000005e-06, - "loss": 35.8291, + "loss": 207454.625, "step": 37240 }, { "epoch": 0.07524735674721332, - "grad_norm": 223.7094268798828, + "grad_norm": 84364.734375, "learning_rate": 7.450000000000001e-06, - "loss": 23.6933, + "loss": 186628.0, "step": 37250 }, { "epoch": 0.07526755737989714, - "grad_norm": 251.1807861328125, + "grad_norm": 149219.25, "learning_rate": 7.452e-06, - "loss": 27.0036, + "loss": 196822.425, "step": 37260 }, { "epoch": 0.07528775801258096, - "grad_norm": 342.2859802246094, + "grad_norm": 33531.48828125, "learning_rate": 7.454e-06, - "loss": 27.8568, + "loss": 202974.3125, "step": 37270 }, { "epoch": 0.07530795864526477, - "grad_norm": 245.50550842285156, + "grad_norm": 8856.0302734375, "learning_rate": 7.456000000000001e-06, - "loss": 35.6972, + "loss": 294620.25, "step": 37280 }, { "epoch": 0.07532815927794859, - "grad_norm": 215.06735229492188, + "grad_norm": 405579.9375, "learning_rate": 7.458e-06, - "loss": 16.0826, + "loss": 71632.7688, "step": 37290 }, { "epoch": 0.0753483599106324, - "grad_norm": 246.951904296875, + "grad_norm": 81877.6953125, "learning_rate": 7.4600000000000006e-06, - "loss": 19.3, + "loss": 42354.6031, "step": 37300 }, { "epoch": 0.07536856054331621, - "grad_norm": 383.22308349609375, + "grad_norm": 337603.0625, "learning_rate": 7.462000000000001e-06, - "loss": 25.5818, + "loss": 113233.0625, "step": 37310 }, { "epoch": 0.07538876117600003, - "grad_norm": 252.7657470703125, + "grad_norm": 34022.140625, "learning_rate": 7.464e-06, - "loss": 36.939, + "loss": 170634.875, "step": 37320 }, { "epoch": 0.07540896180868385, - "grad_norm": 163.82577514648438, + "grad_norm": 158115.953125, "learning_rate": 7.466000000000001e-06, - "loss": 29.758, + "loss": 134262.9375, "step": 37330 }, { "epoch": 0.07542916244136766, - "grad_norm": 460.1439514160156, + "grad_norm": 146158.4375, "learning_rate": 7.468000000000001e-06, - "loss": 33.5917, + "loss": 96306.325, "step": 37340 }, { "epoch": 0.07544936307405148, - "grad_norm": 194.50894165039062, + "grad_norm": 76870.4453125, "learning_rate": 7.4700000000000005e-06, - "loss": 27.0045, + "loss": 224199.8, "step": 37350 }, { "epoch": 0.0754695637067353, - "grad_norm": 326.9264831542969, + "grad_norm": 278500.71875, "learning_rate": 7.472000000000001e-06, - "loss": 19.6958, + "loss": 100066.225, "step": 37360 }, { "epoch": 0.0754897643394191, - "grad_norm": 203.56382751464844, + "grad_norm": 12682.4384765625, "learning_rate": 7.474e-06, - "loss": 20.1575, + "loss": 104605.9438, "step": 37370 }, { "epoch": 0.07550996497210292, - "grad_norm": 374.2593994140625, + "grad_norm": 105241.6015625, "learning_rate": 7.476000000000001e-06, - "loss": 19.7323, + "loss": 71978.4688, "step": 37380 }, { "epoch": 0.07553016560478674, - "grad_norm": 535.1461181640625, + "grad_norm": 204191.3125, "learning_rate": 7.478000000000001e-06, - "loss": 31.1814, + "loss": 181679.975, "step": 37390 }, { "epoch": 0.07555036623747055, - "grad_norm": 367.5732116699219, + "grad_norm": 167650.671875, "learning_rate": 7.48e-06, - "loss": 24.6602, + "loss": 153860.575, "step": 37400 }, { "epoch": 0.07557056687015437, - "grad_norm": 663.7953491210938, + "grad_norm": 264731.28125, "learning_rate": 7.4820000000000005e-06, - "loss": 33.5646, + "loss": 306202.7, "step": 37410 }, { "epoch": 0.07559076750283819, - "grad_norm": 411.58148193359375, + "grad_norm": 370651.25, "learning_rate": 7.484e-06, - "loss": 27.3003, + "loss": 198145.3375, "step": 37420 }, { "epoch": 0.07561096813552201, - "grad_norm": 427.3364562988281, + "grad_norm": 131756.9375, "learning_rate": 7.486000000000001e-06, - "loss": 29.1163, + "loss": 67270.55, "step": 37430 }, { "epoch": 0.07563116876820582, - "grad_norm": 224.48916625976562, + "grad_norm": 95831.5078125, "learning_rate": 7.488000000000001e-06, - "loss": 36.7709, + "loss": 184798.5125, "step": 37440 }, { "epoch": 0.07565136940088964, - "grad_norm": 288.510986328125, + "grad_norm": 69985.9921875, "learning_rate": 7.49e-06, - "loss": 35.8084, + "loss": 146386.9875, "step": 37450 }, { "epoch": 0.07567157003357346, - "grad_norm": 149.86940002441406, + "grad_norm": 23015.556640625, "learning_rate": 7.4920000000000004e-06, - "loss": 24.0802, + "loss": 129323.2625, "step": 37460 }, { "epoch": 0.07569177066625726, - "grad_norm": 169.2025146484375, + "grad_norm": 18367.109375, "learning_rate": 7.494000000000001e-06, - "loss": 25.7512, + "loss": 131249.4875, "step": 37470 }, { "epoch": 0.07571197129894108, - "grad_norm": 202.79107666015625, + "grad_norm": 57705.2421875, "learning_rate": 7.496000000000001e-06, - "loss": 24.6906, + "loss": 154920.25, "step": 37480 }, { "epoch": 0.0757321719316249, - "grad_norm": 120.4333724975586, + "grad_norm": 8396.484375, "learning_rate": 7.498000000000001e-06, - "loss": 14.6184, + "loss": 88191.1938, "step": 37490 }, { "epoch": 0.07575237256430871, - "grad_norm": 324.7414855957031, + "grad_norm": 206876.015625, "learning_rate": 7.500000000000001e-06, - "loss": 32.847, + "loss": 232775.85, "step": 37500 }, { "epoch": 0.07577257319699253, - "grad_norm": 238.66554260253906, + "grad_norm": 38324.24609375, "learning_rate": 7.502e-06, - "loss": 20.7424, + "loss": 192397.525, "step": 37510 }, { "epoch": 0.07579277382967635, - "grad_norm": 293.48760986328125, + "grad_norm": 20345.330078125, "learning_rate": 7.5040000000000005e-06, - "loss": 22.57, + "loss": 146046.4625, "step": 37520 }, { "epoch": 0.07581297446236016, - "grad_norm": 440.31182861328125, + "grad_norm": 59619.01171875, "learning_rate": 7.506000000000001e-06, - "loss": 29.815, + "loss": 222533.025, "step": 37530 }, { "epoch": 0.07583317509504398, - "grad_norm": 137.69004821777344, + "grad_norm": 32335.97265625, "learning_rate": 7.508000000000001e-06, - "loss": 20.7491, + "loss": 116094.025, "step": 37540 }, { "epoch": 0.0758533757277278, - "grad_norm": 526.4642944335938, + "grad_norm": 411395.46875, "learning_rate": 7.510000000000001e-06, - "loss": 15.096, + "loss": 70878.475, "step": 37550 }, { "epoch": 0.0758735763604116, - "grad_norm": 470.8855285644531, + "grad_norm": 340745.84375, "learning_rate": 7.512e-06, - "loss": 29.3815, + "loss": 137261.325, "step": 37560 }, { "epoch": 0.07589377699309542, - "grad_norm": 161.48501586914062, + "grad_norm": 31712.197265625, "learning_rate": 7.514e-06, - "loss": 20.7652, + "loss": 63488.6625, "step": 37570 }, { "epoch": 0.07591397762577924, "grad_norm": 0.0, "learning_rate": 7.516000000000001e-06, - "loss": 17.8332, + "loss": 117150.15, "step": 37580 }, { "epoch": 0.07593417825846306, - "grad_norm": 603.0989379882812, + "grad_norm": 451898.09375, "learning_rate": 7.518000000000001e-06, - "loss": 27.5582, + "loss": 119307.275, "step": 37590 }, { "epoch": 0.07595437889114687, - "grad_norm": 183.7471923828125, + "grad_norm": 9508.4677734375, "learning_rate": 7.520000000000001e-06, - "loss": 30.4387, + "loss": 250459.675, "step": 37600 }, { "epoch": 0.07597457952383069, - "grad_norm": 195.45645141601562, + "grad_norm": 113627.9296875, "learning_rate": 7.522e-06, - "loss": 52.3235, + "loss": 347847.15, "step": 37610 }, { "epoch": 0.07599478015651451, - "grad_norm": 438.6357116699219, + "grad_norm": 376252.15625, "learning_rate": 7.524e-06, - "loss": 32.2656, + "loss": 205173.75, "step": 37620 }, { "epoch": 0.07601498078919831, - "grad_norm": 324.342529296875, + "grad_norm": 291680.8125, "learning_rate": 7.526000000000001e-06, - "loss": 13.2064, + "loss": 127106.675, "step": 37630 }, { "epoch": 0.07603518142188213, - "grad_norm": 404.1524963378906, + "grad_norm": 414202.84375, "learning_rate": 7.528000000000001e-06, - "loss": 35.1141, + "loss": 172943.625, "step": 37640 }, { "epoch": 0.07605538205456595, - "grad_norm": 245.86949157714844, + "grad_norm": 17444.90625, "learning_rate": 7.530000000000001e-06, - "loss": 29.8367, + "loss": 201796.55, "step": 37650 }, { "epoch": 0.07607558268724976, - "grad_norm": 205.048828125, + "grad_norm": 74517.28125, "learning_rate": 7.532e-06, - "loss": 35.5854, + "loss": 225514.875, "step": 37660 }, { "epoch": 0.07609578331993358, - "grad_norm": 289.99774169921875, + "grad_norm": 82731.2734375, "learning_rate": 7.534e-06, - "loss": 30.7862, + "loss": 130736.375, "step": 37670 }, { "epoch": 0.0761159839526174, - "grad_norm": 216.33566284179688, + "grad_norm": 109899.546875, "learning_rate": 7.536000000000001e-06, - "loss": 11.94, + "loss": 36105.8844, "step": 37680 }, { "epoch": 0.0761361845853012, - "grad_norm": 475.70025634765625, + "grad_norm": 166748.15625, "learning_rate": 7.5380000000000005e-06, - "loss": 30.3343, + "loss": 308384.85, "step": 37690 }, { "epoch": 0.07615638521798503, - "grad_norm": 100.22981262207031, + "grad_norm": 5528.26123046875, "learning_rate": 7.540000000000001e-06, - "loss": 29.544, + "loss": 152051.8125, "step": 37700 }, { "epoch": 0.07617658585066885, - "grad_norm": 428.4676818847656, + "grad_norm": 307016.03125, "learning_rate": 7.542000000000001e-06, - "loss": 35.3024, + "loss": 123954.3875, "step": 37710 }, { "epoch": 0.07619678648335265, - "grad_norm": 449.5159606933594, + "grad_norm": 621633.0625, "learning_rate": 7.544e-06, - "loss": 29.6146, + "loss": 194842.725, "step": 37720 }, { "epoch": 0.07621698711603647, - "grad_norm": 399.134033203125, + "grad_norm": 35223.28515625, "learning_rate": 7.546000000000001e-06, - "loss": 27.0095, + "loss": 140976.075, "step": 37730 }, { "epoch": 0.07623718774872029, - "grad_norm": 325.0783996582031, + "grad_norm": 73926.8515625, "learning_rate": 7.548000000000001e-06, - "loss": 29.3766, + "loss": 68701.3625, "step": 37740 }, { "epoch": 0.07625738838140411, - "grad_norm": 131.785400390625, + "grad_norm": 12638.7978515625, "learning_rate": 7.5500000000000006e-06, - "loss": 27.9222, + "loss": 145624.625, "step": 37750 }, { "epoch": 0.07627758901408792, - "grad_norm": 279.1617736816406, + "grad_norm": 105534.65625, "learning_rate": 7.552000000000001e-06, - "loss": 23.7648, + "loss": 141258.9625, "step": 37760 }, { "epoch": 0.07629778964677174, - "grad_norm": 256.0752258300781, + "grad_norm": 16827.51171875, "learning_rate": 7.554e-06, - "loss": 28.1566, + "loss": 116532.9875, "step": 37770 }, { "epoch": 0.07631799027945556, - "grad_norm": 584.557861328125, + "grad_norm": 975889.0625, "learning_rate": 7.556000000000001e-06, - "loss": 26.8847, + "loss": 206274.9, "step": 37780 }, { "epoch": 0.07633819091213936, - "grad_norm": 1256.2279052734375, + "grad_norm": 1739479.0, "learning_rate": 7.558000000000001e-06, - "loss": 32.9997, + "loss": 224497.725, "step": 37790 }, { "epoch": 0.07635839154482318, - "grad_norm": 201.48388671875, + "grad_norm": 9364.4384765625, "learning_rate": 7.5600000000000005e-06, - "loss": 21.9864, + "loss": 91758.3938, "step": 37800 }, { "epoch": 0.076378592177507, - "grad_norm": 55.078330993652344, + "grad_norm": 1600.554443359375, "learning_rate": 7.562000000000001e-06, - "loss": 26.4167, + "loss": 77857.3938, "step": 37810 }, { "epoch": 0.07639879281019081, - "grad_norm": 175.41482543945312, + "grad_norm": 42045.76171875, "learning_rate": 7.564e-06, - "loss": 29.6828, + "loss": 325363.275, "step": 37820 }, { "epoch": 0.07641899344287463, - "grad_norm": 230.7091522216797, + "grad_norm": 37621.6015625, "learning_rate": 7.566000000000001e-06, - "loss": 43.724, + "loss": 360733.125, "step": 37830 }, { "epoch": 0.07643919407555845, - "grad_norm": 123.84848022460938, + "grad_norm": 8971.455078125, "learning_rate": 7.568000000000001e-06, - "loss": 51.0016, + "loss": 358183.2, "step": 37840 }, { "epoch": 0.07645939470824226, - "grad_norm": 361.093017578125, + "grad_norm": 201093.15625, "learning_rate": 7.57e-06, - "loss": 21.1311, + "loss": 154932.8625, "step": 37850 }, { "epoch": 0.07647959534092608, - "grad_norm": 568.5448608398438, + "grad_norm": 733842.1875, "learning_rate": 7.5720000000000005e-06, - "loss": 25.8015, + "loss": 237306.225, "step": 37860 }, { "epoch": 0.0764997959736099, - "grad_norm": 303.17047119140625, + "grad_norm": 15179.6533203125, "learning_rate": 7.574e-06, - "loss": 12.6588, + "loss": 61878.7875, "step": 37870 }, { "epoch": 0.0765199966062937, - "grad_norm": 362.2989196777344, + "grad_norm": 202406.703125, "learning_rate": 7.576000000000001e-06, - "loss": 21.2263, + "loss": 109110.05, "step": 37880 }, { "epoch": 0.07654019723897752, - "grad_norm": 1215.2640380859375, + "grad_norm": 356767.25, "learning_rate": 7.578000000000001e-06, - "loss": 30.4012, + "loss": 89702.9563, "step": 37890 }, { "epoch": 0.07656039787166134, - "grad_norm": 220.96456909179688, + "grad_norm": 101222.8828125, "learning_rate": 7.58e-06, - "loss": 25.9396, + "loss": 204696.6125, "step": 37900 }, { "epoch": 0.07658059850434516, - "grad_norm": 170.63856506347656, + "grad_norm": 8264.720703125, "learning_rate": 7.582e-06, - "loss": 24.1375, + "loss": 87415.1187, "step": 37910 }, { "epoch": 0.07660079913702897, - "grad_norm": 239.08563232421875, + "grad_norm": 334065.875, "learning_rate": 7.5840000000000006e-06, - "loss": 32.5399, + "loss": 171030.0625, "step": 37920 }, { "epoch": 0.07662099976971279, - "grad_norm": 218.6236572265625, + "grad_norm": 1760.197265625, "learning_rate": 7.586000000000001e-06, - "loss": 25.7304, + "loss": 119863.8875, "step": 37930 }, { "epoch": 0.07664120040239661, - "grad_norm": 143.8291473388672, + "grad_norm": 13289.7021484375, "learning_rate": 7.588000000000001e-06, - "loss": 17.6976, + "loss": 95149.1812, "step": 37940 }, { "epoch": 0.07666140103508041, - "grad_norm": 518.2284545898438, + "grad_norm": 718892.75, "learning_rate": 7.590000000000001e-06, - "loss": 32.8466, + "loss": 269641.35, "step": 37950 }, { "epoch": 0.07668160166776423, - "grad_norm": 638.3121337890625, + "grad_norm": 326569.65625, "learning_rate": 7.592e-06, - "loss": 28.6176, + "loss": 139990.6, "step": 37960 }, { "epoch": 0.07670180230044805, - "grad_norm": 358.8909606933594, + "grad_norm": 577237.3125, "learning_rate": 7.5940000000000005e-06, - "loss": 17.4063, + "loss": 109538.325, "step": 37970 }, { "epoch": 0.07672200293313186, - "grad_norm": 872.4479370117188, + "grad_norm": 1701408.0, "learning_rate": 7.5960000000000015e-06, - "loss": 41.7478, + "loss": 381076.625, "step": 37980 }, { "epoch": 0.07674220356581568, - "grad_norm": 452.6264953613281, + "grad_norm": 105713.515625, "learning_rate": 7.598000000000001e-06, - "loss": 51.4937, + "loss": 298618.95, "step": 37990 }, { "epoch": 0.0767624041984995, - "grad_norm": 527.2770385742188, + "grad_norm": 820447.875, "learning_rate": 7.600000000000001e-06, - "loss": 45.5283, + "loss": 215894.775, "step": 38000 }, { "epoch": 0.07678260483118331, - "grad_norm": 215.32249450683594, + "grad_norm": 15590.9287109375, "learning_rate": 7.602e-06, - "loss": 13.5002, + "loss": 42511.5281, "step": 38010 }, { "epoch": 0.07680280546386713, - "grad_norm": 340.856689453125, + "grad_norm": 63000.8828125, "learning_rate": 7.604e-06, - "loss": 23.8508, + "loss": 84444.475, "step": 38020 }, { "epoch": 0.07682300609655095, - "grad_norm": 341.5080261230469, + "grad_norm": 58258.27734375, "learning_rate": 7.606000000000001e-06, - "loss": 24.837, + "loss": 139435.1125, "step": 38030 }, { "epoch": 0.07684320672923475, - "grad_norm": 315.8214111328125, + "grad_norm": 50980.41015625, "learning_rate": 7.608000000000001e-06, - "loss": 26.5996, + "loss": 76231.9438, "step": 38040 }, { "epoch": 0.07686340736191857, - "grad_norm": 34.659786224365234, + "grad_norm": 13935.0283203125, "learning_rate": 7.610000000000001e-06, - "loss": 16.2948, + "loss": 58349.7188, "step": 38050 }, { "epoch": 0.0768836079946024, - "grad_norm": 385.8763732910156, + "grad_norm": 188745.015625, "learning_rate": 7.612e-06, - "loss": 30.8735, + "loss": 174621.2375, "step": 38060 }, { "epoch": 0.07690380862728621, - "grad_norm": 216.09523010253906, + "grad_norm": 109581.390625, "learning_rate": 7.614e-06, - "loss": 36.05, + "loss": 210955.65, "step": 38070 }, { "epoch": 0.07692400925997002, - "grad_norm": 476.50372314453125, + "grad_norm": 689185.375, "learning_rate": 7.616000000000001e-06, - "loss": 24.4925, + "loss": 145961.325, "step": 38080 }, { "epoch": 0.07694420989265384, - "grad_norm": 422.1429138183594, + "grad_norm": 186245.375, "learning_rate": 7.618000000000001e-06, - "loss": 38.6426, + "loss": 253562.825, "step": 38090 }, { "epoch": 0.07696441052533766, - "grad_norm": 453.0693664550781, + "grad_norm": 364914.96875, "learning_rate": 7.620000000000001e-06, - "loss": 29.0683, + "loss": 139239.075, "step": 38100 }, { "epoch": 0.07698461115802147, - "grad_norm": 96.81813049316406, + "grad_norm": 8197.4775390625, "learning_rate": 7.622000000000001e-06, - "loss": 55.061, + "loss": 284922.675, "step": 38110 }, { "epoch": 0.07700481179070529, - "grad_norm": 230.05735778808594, + "grad_norm": 58617.96875, "learning_rate": 7.624e-06, - "loss": 31.8604, + "loss": 214228.25, "step": 38120 }, { "epoch": 0.0770250124233891, - "grad_norm": 345.4097595214844, + "grad_norm": 160961.890625, "learning_rate": 7.626e-06, - "loss": 21.2156, + "loss": 102018.2312, "step": 38130 }, { "epoch": 0.07704521305607291, - "grad_norm": 425.7206115722656, + "grad_norm": 218007.171875, "learning_rate": 7.628000000000001e-06, - "loss": 26.9501, + "loss": 162826.425, "step": 38140 }, { "epoch": 0.07706541368875673, - "grad_norm": 570.3914794921875, + "grad_norm": 59576.14453125, "learning_rate": 7.630000000000001e-06, - "loss": 27.5759, + "loss": 82028.7375, "step": 38150 }, { "epoch": 0.07708561432144055, - "grad_norm": 61.713958740234375, + "grad_norm": 13867.0947265625, "learning_rate": 7.632e-06, - "loss": 23.2355, + "loss": 116687.1875, "step": 38160 }, { "epoch": 0.07710581495412436, - "grad_norm": 384.5368347167969, + "grad_norm": 10856.3427734375, "learning_rate": 7.634e-06, - "loss": 32.9063, + "loss": 97048.4875, "step": 38170 }, { "epoch": 0.07712601558680818, - "grad_norm": 433.4608459472656, + "grad_norm": 499524.71875, "learning_rate": 7.636e-06, - "loss": 24.4792, + "loss": 147528.8, "step": 38180 }, { "epoch": 0.077146216219492, - "grad_norm": 223.1848907470703, + "grad_norm": 89391.046875, "learning_rate": 7.638e-06, - "loss": 20.5868, + "loss": 68869.9812, "step": 38190 }, { "epoch": 0.0771664168521758, - "grad_norm": 408.47845458984375, + "grad_norm": 6776.5419921875, "learning_rate": 7.640000000000001e-06, - "loss": 34.8088, + "loss": 212098.7, "step": 38200 }, { "epoch": 0.07718661748485962, - "grad_norm": 336.5214538574219, + "grad_norm": 54849.28515625, "learning_rate": 7.642e-06, - "loss": 28.309, + "loss": 147694.45, "step": 38210 }, { "epoch": 0.07720681811754344, - "grad_norm": 384.3644104003906, + "grad_norm": 143308.96875, "learning_rate": 7.644e-06, - "loss": 21.7772, + "loss": 201754.0875, "step": 38220 }, { "epoch": 0.07722701875022726, - "grad_norm": 416.7187194824219, + "grad_norm": 14982.9677734375, "learning_rate": 7.646e-06, - "loss": 19.0977, + "loss": 117577.4625, "step": 38230 }, { "epoch": 0.07724721938291107, - "grad_norm": 214.350341796875, + "grad_norm": 26960.59765625, "learning_rate": 7.648e-06, - "loss": 18.651, + "loss": 71919.9688, "step": 38240 }, { "epoch": 0.07726742001559489, - "grad_norm": 730.2731323242188, + "grad_norm": 192825.265625, "learning_rate": 7.650000000000001e-06, - "loss": 20.8267, + "loss": 222091.975, "step": 38250 }, { "epoch": 0.07728762064827871, - "grad_norm": 273.54486083984375, + "grad_norm": 3607.896728515625, "learning_rate": 7.652e-06, - "loss": 43.3072, + "loss": 154120.175, "step": 38260 }, { "epoch": 0.07730782128096252, - "grad_norm": 216.2020263671875, + "grad_norm": 113552.421875, "learning_rate": 7.654e-06, - "loss": 18.4144, + "loss": 132107.6875, "step": 38270 }, { "epoch": 0.07732802191364634, - "grad_norm": 244.2738494873047, + "grad_norm": 25695.78125, "learning_rate": 7.656000000000001e-06, - "loss": 16.5595, + "loss": 81651.6812, "step": 38280 }, { "epoch": 0.07734822254633016, - "grad_norm": 315.9172668457031, + "grad_norm": 444118.21875, "learning_rate": 7.658e-06, - "loss": 14.2322, + "loss": 118909.225, "step": 38290 }, { "epoch": 0.07736842317901396, - "grad_norm": 353.1627197265625, + "grad_norm": 137053.75, "learning_rate": 7.660000000000001e-06, - "loss": 19.8115, + "loss": 243232.15, "step": 38300 }, { "epoch": 0.07738862381169778, - "grad_norm": 112.1511459350586, + "grad_norm": 48547.78125, "learning_rate": 7.662e-06, - "loss": 13.8515, + "loss": 61634.8125, "step": 38310 }, { "epoch": 0.0774088244443816, - "grad_norm": 296.2076416015625, + "grad_norm": 75521.140625, "learning_rate": 7.664e-06, - "loss": 21.1189, + "loss": 87961.7375, "step": 38320 }, { "epoch": 0.07742902507706541, - "grad_norm": 126.26333618164062, + "grad_norm": 171356.234375, "learning_rate": 7.666e-06, - "loss": 17.0716, + "loss": 154210.375, "step": 38330 }, { "epoch": 0.07744922570974923, - "grad_norm": 345.6390686035156, + "grad_norm": 464604.90625, "learning_rate": 7.668000000000002e-06, - "loss": 15.5117, + "loss": 91537.075, "step": 38340 }, { "epoch": 0.07746942634243305, - "grad_norm": 248.05418395996094, + "grad_norm": 75851.03125, "learning_rate": 7.670000000000001e-06, - "loss": 28.3819, + "loss": 85164.1, "step": 38350 }, { "epoch": 0.07748962697511685, - "grad_norm": 560.028076171875, + "grad_norm": 342781.8125, "learning_rate": 7.672e-06, - "loss": 24.5581, + "loss": 113865.7875, "step": 38360 }, { "epoch": 0.07750982760780067, - "grad_norm": 2880.149169921875, + "grad_norm": 490078.78125, "learning_rate": 7.674e-06, - "loss": 39.5029, + "loss": 232813.35, "step": 38370 }, { "epoch": 0.0775300282404845, - "grad_norm": 340.9888000488281, + "grad_norm": 107294.3984375, "learning_rate": 7.676e-06, - "loss": 37.1128, + "loss": 286824.925, "step": 38380 }, { "epoch": 0.07755022887316831, - "grad_norm": 820.3145751953125, + "grad_norm": 1168228.0, "learning_rate": 7.678000000000002e-06, - "loss": 23.1238, + "loss": 195335.7125, "step": 38390 }, { "epoch": 0.07757042950585212, - "grad_norm": 299.7330627441406, + "grad_norm": 24923.076171875, "learning_rate": 7.680000000000001e-06, - "loss": 21.5669, + "loss": 161939.825, "step": 38400 }, { "epoch": 0.07759063013853594, - "grad_norm": 537.774658203125, + "grad_norm": 138433.8125, "learning_rate": 7.682e-06, - "loss": 38.3354, + "loss": 149463.95, "step": 38410 }, { "epoch": 0.07761083077121976, - "grad_norm": 205.1475830078125, + "grad_norm": 93355.1015625, "learning_rate": 7.684e-06, - "loss": 26.5034, + "loss": 247103.8, "step": 38420 }, { "epoch": 0.07763103140390357, - "grad_norm": 396.5845947265625, + "grad_norm": 86373.078125, "learning_rate": 7.686e-06, - "loss": 42.9647, + "loss": 257579.075, "step": 38430 }, { "epoch": 0.07765123203658739, - "grad_norm": 157.94041442871094, + "grad_norm": 2898.438720703125, "learning_rate": 7.688000000000002e-06, - "loss": 34.7968, + "loss": 358946.675, "step": 38440 }, { "epoch": 0.0776714326692712, - "grad_norm": 305.363037109375, + "grad_norm": 134670.25, "learning_rate": 7.690000000000001e-06, - "loss": 30.2723, + "loss": 177784.425, "step": 38450 }, { "epoch": 0.07769163330195501, - "grad_norm": 287.2494812011719, + "grad_norm": 132771.421875, "learning_rate": 7.692e-06, - "loss": 21.682, + "loss": 105914.05, "step": 38460 }, { "epoch": 0.07771183393463883, - "grad_norm": 111.05783081054688, + "grad_norm": 5837.34619140625, "learning_rate": 7.694e-06, - "loss": 46.0671, + "loss": 284259.775, "step": 38470 }, { "epoch": 0.07773203456732265, - "grad_norm": 459.8643798828125, + "grad_norm": 503196.3125, "learning_rate": 7.696e-06, - "loss": 34.0809, + "loss": 217318.7, "step": 38480 }, { "epoch": 0.07775223520000646, - "grad_norm": 488.703857421875, + "grad_norm": 158212.71875, "learning_rate": 7.698000000000002e-06, - "loss": 29.491, + "loss": 223577.65, "step": 38490 }, { "epoch": 0.07777243583269028, - "grad_norm": 566.2803955078125, + "grad_norm": 10963.2568359375, "learning_rate": 7.7e-06, - "loss": 22.0141, + "loss": 78081.2188, "step": 38500 }, { "epoch": 0.0777926364653741, - "grad_norm": 668.6922607421875, + "grad_norm": 317745.84375, "learning_rate": 7.702e-06, - "loss": 29.4514, + "loss": 94592.8, "step": 38510 }, { "epoch": 0.0778128370980579, - "grad_norm": 255.03981018066406, + "grad_norm": 166512.703125, "learning_rate": 7.704000000000001e-06, - "loss": 37.2635, + "loss": 153212.2, "step": 38520 }, { "epoch": 0.07783303773074172, - "grad_norm": 656.078857421875, + "grad_norm": 894925.625, "learning_rate": 7.706e-06, - "loss": 24.1617, + "loss": 123226.725, "step": 38530 }, { "epoch": 0.07785323836342554, - "grad_norm": 506.50732421875, + "grad_norm": 87084.3671875, "learning_rate": 7.708000000000001e-06, - "loss": 28.9566, + "loss": 208159.4125, "step": 38540 }, { "epoch": 0.07787343899610936, - "grad_norm": 86.63217163085938, + "grad_norm": 19469.283203125, "learning_rate": 7.71e-06, - "loss": 29.1458, + "loss": 97628.4625, "step": 38550 }, { "epoch": 0.07789363962879317, - "grad_norm": 201.97003173828125, + "grad_norm": 56582.69140625, "learning_rate": 7.712e-06, - "loss": 10.9031, + "loss": 73745.2625, "step": 38560 }, { "epoch": 0.07791384026147699, - "grad_norm": 104.5938491821289, + "grad_norm": 22095.798828125, "learning_rate": 7.714000000000001e-06, - "loss": 17.4683, + "loss": 102497.0813, "step": 38570 }, { "epoch": 0.07793404089416081, - "grad_norm": 152.68423461914062, + "grad_norm": 31316.919921875, "learning_rate": 7.716e-06, - "loss": 12.3194, + "loss": 111419.7875, "step": 38580 }, { "epoch": 0.07795424152684462, - "grad_norm": 294.45306396484375, + "grad_norm": 68994.9921875, "learning_rate": 7.718000000000001e-06, - "loss": 39.5909, + "loss": 145910.875, "step": 38590 }, { "epoch": 0.07797444215952844, - "grad_norm": 63.02577590942383, + "grad_norm": 11130.275390625, "learning_rate": 7.72e-06, - "loss": 20.2548, + "loss": 106093.6625, "step": 38600 }, { "epoch": 0.07799464279221226, - "grad_norm": 229.97557067871094, + "grad_norm": 118929.296875, "learning_rate": 7.722e-06, - "loss": 21.1741, + "loss": 92183.85, "step": 38610 }, { "epoch": 0.07801484342489606, - "grad_norm": 218.60293579101562, + "grad_norm": 28621.45703125, "learning_rate": 7.724000000000001e-06, - "loss": 22.0307, + "loss": 103826.15, "step": 38620 }, { "epoch": 0.07803504405757988, - "grad_norm": 434.71160888671875, + "grad_norm": 1216161.375, "learning_rate": 7.726e-06, - "loss": 37.2633, + "loss": 315996.35, "step": 38630 }, { "epoch": 0.0780552446902637, - "grad_norm": 665.3933715820312, + "grad_norm": 145675.015625, "learning_rate": 7.728000000000001e-06, - "loss": 63.3594, + "loss": 309786.975, "step": 38640 }, { "epoch": 0.07807544532294751, - "grad_norm": 292.5657043457031, + "grad_norm": 9702.33203125, "learning_rate": 7.73e-06, - "loss": 28.5122, + "loss": 69810.4812, "step": 38650 }, { "epoch": 0.07809564595563133, - "grad_norm": 435.3643798828125, + "grad_norm": 338479.59375, "learning_rate": 7.732e-06, - "loss": 41.8224, + "loss": 285624.4, "step": 38660 }, { "epoch": 0.07811584658831515, - "grad_norm": 647.57861328125, + "grad_norm": 501389.84375, "learning_rate": 7.734e-06, - "loss": 30.0907, + "loss": 178115.925, "step": 38670 }, { "epoch": 0.07813604722099896, - "grad_norm": 548.1163330078125, + "grad_norm": 142535.65625, "learning_rate": 7.736e-06, - "loss": 36.9682, + "loss": 161319.275, "step": 38680 }, { "epoch": 0.07815624785368278, - "grad_norm": 214.9593505859375, + "grad_norm": 30547.517578125, "learning_rate": 7.738000000000001e-06, - "loss": 24.2031, + "loss": 103991.6438, "step": 38690 }, { "epoch": 0.0781764484863666, - "grad_norm": 192.67250061035156, + "grad_norm": 51233.20703125, "learning_rate": 7.74e-06, - "loss": 24.6837, + "loss": 149770.9125, "step": 38700 }, { "epoch": 0.07819664911905042, - "grad_norm": 467.13983154296875, + "grad_norm": 43281.265625, "learning_rate": 7.742000000000001e-06, - "loss": 30.5346, + "loss": 212669.1, "step": 38710 }, { "epoch": 0.07821684975173422, - "grad_norm": 390.44482421875, + "grad_norm": 91658.8828125, "learning_rate": 7.744e-06, - "loss": 43.1372, + "loss": 311277.65, "step": 38720 }, { "epoch": 0.07823705038441804, - "grad_norm": 137.72560119628906, + "grad_norm": 47752.2421875, "learning_rate": 7.746e-06, - "loss": 27.8986, + "loss": 319218.325, "step": 38730 }, { "epoch": 0.07825725101710186, - "grad_norm": 475.2204895019531, + "grad_norm": 366699.15625, "learning_rate": 7.748000000000001e-06, - "loss": 23.493, + "loss": 100342.5938, "step": 38740 }, { "epoch": 0.07827745164978567, - "grad_norm": 1002.3382568359375, + "grad_norm": 845503.6875, "learning_rate": 7.75e-06, - "loss": 38.5035, + "loss": 230580.125, "step": 38750 }, { "epoch": 0.07829765228246949, - "grad_norm": 866.871826171875, + "grad_norm": 519551.03125, "learning_rate": 7.752000000000001e-06, - "loss": 40.3604, + "loss": 143116.0875, "step": 38760 }, { "epoch": 0.07831785291515331, - "grad_norm": 209.6705780029297, + "grad_norm": 70365.3828125, "learning_rate": 7.754e-06, - "loss": 29.3788, + "loss": 190186.175, "step": 38770 }, { "epoch": 0.07833805354783711, - "grad_norm": 113.38577270507812, + "grad_norm": 11463.0712890625, "learning_rate": 7.756e-06, - "loss": 41.4995, + "loss": 204366.3875, "step": 38780 }, { "epoch": 0.07835825418052093, "grad_norm": 0.0, "learning_rate": 7.758000000000001e-06, - "loss": 23.5417, + "loss": 105073.025, "step": 38790 }, { "epoch": 0.07837845481320475, - "grad_norm": 854.087890625, + "grad_norm": 1283961.875, "learning_rate": 7.76e-06, - "loss": 48.5038, + "loss": 356115.275, "step": 38800 }, { "epoch": 0.07839865544588856, - "grad_norm": 98.32456970214844, + "grad_norm": 9914.6611328125, "learning_rate": 7.762000000000001e-06, - "loss": 28.5804, + "loss": 162252.8375, "step": 38810 }, { "epoch": 0.07841885607857238, - "grad_norm": 487.97796630859375, + "grad_norm": 387297.28125, "learning_rate": 7.764e-06, - "loss": 37.1735, + "loss": 177433.0625, "step": 38820 }, { "epoch": 0.0784390567112562, - "grad_norm": 874.7785034179688, + "grad_norm": 647245.125, "learning_rate": 7.766e-06, - "loss": 45.4984, + "loss": 233832.975, "step": 38830 }, { "epoch": 0.07845925734394, - "grad_norm": 306.357666015625, + "grad_norm": 48741.93359375, "learning_rate": 7.768e-06, - "loss": 19.5471, + "loss": 118657.8625, "step": 38840 }, { "epoch": 0.07847945797662383, - "grad_norm": 203.58636474609375, + "grad_norm": 80920.5703125, "learning_rate": 7.77e-06, - "loss": 19.9997, + "loss": 123488.025, "step": 38850 }, { "epoch": 0.07849965860930765, - "grad_norm": 331.6130065917969, + "grad_norm": 7965.44775390625, "learning_rate": 7.772000000000001e-06, - "loss": 43.4086, + "loss": 126334.1, "step": 38860 }, { "epoch": 0.07851985924199147, - "grad_norm": 602.2072143554688, + "grad_norm": 379851.84375, "learning_rate": 7.774e-06, - "loss": 32.2504, + "loss": 117290.6375, "step": 38870 }, { "epoch": 0.07854005987467527, - "grad_norm": 225.1083526611328, + "grad_norm": 4720.97705078125, "learning_rate": 7.776e-06, - "loss": 17.0911, + "loss": 108771.75, "step": 38880 }, { "epoch": 0.07856026050735909, - "grad_norm": 251.9822998046875, + "grad_norm": 119825.5546875, "learning_rate": 7.778e-06, - "loss": 20.4462, + "loss": 129812.45, "step": 38890 }, { "epoch": 0.07858046114004291, "grad_norm": 0.0, "learning_rate": 7.78e-06, - "loss": 26.7651, + "loss": 168888.25, "step": 38900 }, { "epoch": 0.07860066177272672, - "grad_norm": 525.1461791992188, + "grad_norm": 397752.5, "learning_rate": 7.782000000000001e-06, - "loss": 28.6109, + "loss": 131955.4625, "step": 38910 }, { "epoch": 0.07862086240541054, - "grad_norm": 77.64105987548828, + "grad_norm": 2101.947021484375, "learning_rate": 7.784e-06, - "loss": 20.1857, + "loss": 117488.35, "step": 38920 }, { "epoch": 0.07864106303809436, - "grad_norm": 324.5278015136719, + "grad_norm": 133245.03125, "learning_rate": 7.786e-06, - "loss": 22.1805, + "loss": 89574.2125, "step": 38930 }, { "epoch": 0.07866126367077816, - "grad_norm": 336.5326843261719, + "grad_norm": 5419.9892578125, "learning_rate": 7.788e-06, - "loss": 27.8223, + "loss": 141338.75, "step": 38940 }, { "epoch": 0.07868146430346198, - "grad_norm": 565.1139526367188, + "grad_norm": 1244621.0, "learning_rate": 7.790000000000002e-06, - "loss": 23.0212, + "loss": 244234.2, "step": 38950 }, { "epoch": 0.0787016649361458, - "grad_norm": 656.93408203125, + "grad_norm": 179824.140625, "learning_rate": 7.792000000000001e-06, - "loss": 15.9798, + "loss": 116327.3875, "step": 38960 }, { "epoch": 0.07872186556882961, - "grad_norm": 213.3914031982422, + "grad_norm": 19770.130859375, "learning_rate": 7.794e-06, - "loss": 41.1962, + "loss": 255371.825, "step": 38970 }, { "epoch": 0.07874206620151343, - "grad_norm": 195.66822814941406, + "grad_norm": 10084.791015625, "learning_rate": 7.796e-06, - "loss": 45.6488, + "loss": 299542.675, "step": 38980 }, { "epoch": 0.07876226683419725, - "grad_norm": 699.197021484375, + "grad_norm": 744948.3125, "learning_rate": 7.798e-06, - "loss": 32.3391, + "loss": 265098.925, "step": 38990 }, { "epoch": 0.07878246746688106, - "grad_norm": 186.45999145507812, + "grad_norm": 95679.9765625, "learning_rate": 7.800000000000002e-06, - "loss": 27.1626, + "loss": 266459.225, "step": 39000 }, { "epoch": 0.07880266809956488, - "grad_norm": 824.193359375, + "grad_norm": 1694229.25, "learning_rate": 7.802000000000001e-06, - "loss": 31.3804, + "loss": 264476.0, "step": 39010 }, { "epoch": 0.0788228687322487, - "grad_norm": 266.989013671875, + "grad_norm": 149921.8125, "learning_rate": 7.804e-06, - "loss": 14.4106, + "loss": 68783.625, "step": 39020 }, { "epoch": 0.07884306936493252, - "grad_norm": 406.9546813964844, + "grad_norm": 82944.046875, "learning_rate": 7.806e-06, - "loss": 25.4219, + "loss": 87809.575, "step": 39030 }, { "epoch": 0.07886326999761632, - "grad_norm": 454.76141357421875, + "grad_norm": 196528.015625, "learning_rate": 7.808e-06, - "loss": 24.3998, + "loss": 119354.65, "step": 39040 }, { "epoch": 0.07888347063030014, - "grad_norm": 474.2530517578125, + "grad_norm": 228442.3125, "learning_rate": 7.810000000000001e-06, - "loss": 23.5222, + "loss": 161718.8125, "step": 39050 }, { "epoch": 0.07890367126298396, - "grad_norm": 537.8236083984375, + "grad_norm": 260365.109375, "learning_rate": 7.812e-06, - "loss": 36.1553, + "loss": 228876.825, "step": 39060 }, { "epoch": 0.07892387189566777, - "grad_norm": 575.7525024414062, + "grad_norm": 234860.96875, "learning_rate": 7.814e-06, - "loss": 18.1287, + "loss": 101359.2875, "step": 39070 }, { "epoch": 0.07894407252835159, - "grad_norm": 583.9301147460938, + "grad_norm": 1358168.25, "learning_rate": 7.816000000000001e-06, - "loss": 37.095, + "loss": 266692.825, "step": 39080 }, { "epoch": 0.07896427316103541, - "grad_norm": 410.6824035644531, + "grad_norm": 21224.060546875, "learning_rate": 7.818e-06, - "loss": 25.8169, + "loss": 161025.4125, "step": 39090 }, { "epoch": 0.07898447379371921, - "grad_norm": 490.35687255859375, + "grad_norm": 439177.0, "learning_rate": 7.820000000000001e-06, - "loss": 28.4401, + "loss": 173377.2875, "step": 39100 }, { "epoch": 0.07900467442640303, - "grad_norm": 13.36561393737793, + "grad_norm": 678.4488525390625, "learning_rate": 7.822e-06, - "loss": 37.471, + "loss": 357500.8, "step": 39110 }, { "epoch": 0.07902487505908685, - "grad_norm": 153.46160888671875, + "grad_norm": 47885.24609375, "learning_rate": 7.824e-06, - "loss": 26.377, + "loss": 116690.225, "step": 39120 }, { "epoch": 0.07904507569177066, - "grad_norm": 327.67999267578125, + "grad_norm": 84919.6953125, "learning_rate": 7.826000000000001e-06, - "loss": 23.6518, + "loss": 146693.325, "step": 39130 }, { "epoch": 0.07906527632445448, - "grad_norm": 116.82891845703125, + "grad_norm": 12372.138671875, "learning_rate": 7.828000000000002e-06, - "loss": 18.7044, + "loss": 160048.0375, "step": 39140 }, { "epoch": 0.0790854769571383, - "grad_norm": 561.8224487304688, + "grad_norm": 81102.3828125, "learning_rate": 7.830000000000001e-06, - "loss": 24.7607, + "loss": 155806.9, "step": 39150 }, { "epoch": 0.07910567758982211, - "grad_norm": 250.5322723388672, + "grad_norm": 22832.923828125, "learning_rate": 7.832e-06, - "loss": 17.7831, + "loss": 41665.3063, "step": 39160 }, { "epoch": 0.07912587822250593, - "grad_norm": 516.6336669921875, + "grad_norm": 5709.001953125, "learning_rate": 7.834e-06, - "loss": 19.4361, + "loss": 79066.4688, "step": 39170 }, { "epoch": 0.07914607885518975, - "grad_norm": 644.5774536132812, + "grad_norm": 265723.34375, "learning_rate": 7.836000000000001e-06, - "loss": 17.2036, + "loss": 90277.2312, "step": 39180 }, { "epoch": 0.07916627948787357, - "grad_norm": 316.52069091796875, + "grad_norm": 56215.578125, "learning_rate": 7.838000000000002e-06, - "loss": 27.8679, + "loss": 110747.725, "step": 39190 }, { "epoch": 0.07918648012055737, - "grad_norm": 269.5502624511719, + "grad_norm": 126518.15625, "learning_rate": 7.840000000000001e-06, - "loss": 48.5693, + "loss": 270126.15, "step": 39200 }, { "epoch": 0.0792066807532412, - "grad_norm": 311.39788818359375, + "grad_norm": 5471.0283203125, "learning_rate": 7.842e-06, - "loss": 19.5268, + "loss": 70075.875, "step": 39210 }, { "epoch": 0.07922688138592501, - "grad_norm": 291.3498840332031, + "grad_norm": 138653.125, "learning_rate": 7.844e-06, - "loss": 34.8294, + "loss": 328149.75, "step": 39220 }, { "epoch": 0.07924708201860882, - "grad_norm": 137.64781188964844, + "grad_norm": 35626.5625, "learning_rate": 7.846e-06, - "loss": 17.3334, + "loss": 97900.6625, "step": 39230 }, { "epoch": 0.07926728265129264, - "grad_norm": 193.2944793701172, + "grad_norm": 52721.75, "learning_rate": 7.848000000000002e-06, - "loss": 23.836, + "loss": 156722.9, "step": 39240 }, { "epoch": 0.07928748328397646, - "grad_norm": 994.33984375, + "grad_norm": 263312.5625, "learning_rate": 7.850000000000001e-06, - "loss": 29.2834, + "loss": 70105.25, "step": 39250 }, { "epoch": 0.07930768391666027, - "grad_norm": 223.15631103515625, + "grad_norm": 109931.515625, "learning_rate": 7.852e-06, - "loss": 24.6491, + "loss": 130707.275, "step": 39260 }, { "epoch": 0.07932788454934409, - "grad_norm": 439.4519348144531, + "grad_norm": 61363.87109375, "learning_rate": 7.854e-06, - "loss": 28.0021, + "loss": 156385.9625, "step": 39270 }, { "epoch": 0.0793480851820279, - "grad_norm": 693.3170776367188, + "grad_norm": 176442.265625, "learning_rate": 7.856e-06, - "loss": 32.0823, + "loss": 233448.3, "step": 39280 }, { "epoch": 0.07936828581471171, - "grad_norm": 307.3612976074219, + "grad_norm": 241651.640625, "learning_rate": 7.858000000000002e-06, - "loss": 25.5835, + "loss": 116840.3875, "step": 39290 }, { "epoch": 0.07938848644739553, - "grad_norm": 332.3131408691406, + "grad_norm": 115721.4921875, "learning_rate": 7.860000000000001e-06, - "loss": 22.3718, + "loss": 224858.5, "step": 39300 }, { "epoch": 0.07940868708007935, - "grad_norm": 221.25428771972656, + "grad_norm": 55949.5625, "learning_rate": 7.862e-06, - "loss": 28.4674, + "loss": 219874.675, "step": 39310 }, { "epoch": 0.07942888771276316, - "grad_norm": 165.73020935058594, + "grad_norm": 8154.03173828125, "learning_rate": 7.864000000000001e-06, - "loss": 23.2906, + "loss": 98772.2375, "step": 39320 }, { "epoch": 0.07944908834544698, - "grad_norm": 364.01385498046875, + "grad_norm": 150919.140625, "learning_rate": 7.866e-06, - "loss": 29.6909, + "loss": 148866.7, "step": 39330 }, { "epoch": 0.0794692889781308, - "grad_norm": 484.53143310546875, + "grad_norm": 198255.953125, "learning_rate": 7.868000000000002e-06, - "loss": 28.3154, + "loss": 197723.4875, "step": 39340 }, { "epoch": 0.07948948961081462, - "grad_norm": 126.66886138916016, + "grad_norm": 19588.083984375, "learning_rate": 7.870000000000001e-06, - "loss": 18.5973, + "loss": 125131.3625, "step": 39350 }, { "epoch": 0.07950969024349842, - "grad_norm": 253.3115234375, + "grad_norm": 28943.767578125, "learning_rate": 7.872e-06, - "loss": 24.0437, + "loss": 167497.8875, "step": 39360 }, { "epoch": 0.07952989087618224, - "grad_norm": 1409.166748046875, + "grad_norm": 2313019.25, "learning_rate": 7.874000000000001e-06, - "loss": 49.3596, + "loss": 400745.75, "step": 39370 }, { "epoch": 0.07955009150886606, - "grad_norm": 170.87071228027344, + "grad_norm": 113874.3984375, "learning_rate": 7.876e-06, - "loss": 25.401, + "loss": 172632.725, "step": 39380 }, { "epoch": 0.07957029214154987, - "grad_norm": 80.8373031616211, + "grad_norm": 31830.673828125, "learning_rate": 7.878e-06, - "loss": 26.2307, + "loss": 142244.55, "step": 39390 }, { "epoch": 0.07959049277423369, - "grad_norm": 157.31773376464844, + "grad_norm": 10638.0615234375, "learning_rate": 7.88e-06, - "loss": 14.1911, + "loss": 99705.025, "step": 39400 }, { "epoch": 0.07961069340691751, - "grad_norm": 174.0682830810547, + "grad_norm": 13038.634765625, "learning_rate": 7.882e-06, - "loss": 20.9031, + "loss": 64053.4, "step": 39410 }, { "epoch": 0.07963089403960132, - "grad_norm": 192.36581420898438, + "grad_norm": 76474.796875, "learning_rate": 7.884000000000001e-06, - "loss": 23.4383, + "loss": 98538.6, "step": 39420 }, { "epoch": 0.07965109467228514, - "grad_norm": 355.9923095703125, + "grad_norm": 154882.5625, "learning_rate": 7.886e-06, - "loss": 27.7255, + "loss": 180362.45, "step": 39430 }, { "epoch": 0.07967129530496896, - "grad_norm": 444.078125, + "grad_norm": 299538.84375, "learning_rate": 7.888e-06, - "loss": 18.7206, + "loss": 115389.2625, "step": 39440 }, { "epoch": 0.07969149593765276, - "grad_norm": 421.5205078125, + "grad_norm": 38726.50390625, "learning_rate": 7.89e-06, - "loss": 20.7926, + "loss": 113778.875, "step": 39450 }, { "epoch": 0.07971169657033658, - "grad_norm": 483.5147399902344, + "grad_norm": 345676.0, "learning_rate": 7.892e-06, - "loss": 37.6867, + "loss": 294546.425, "step": 39460 }, { "epoch": 0.0797318972030204, - "grad_norm": 188.4382781982422, + "grad_norm": 36491.12890625, "learning_rate": 7.894000000000001e-06, - "loss": 34.2945, + "loss": 159613.1125, "step": 39470 }, { "epoch": 0.07975209783570421, - "grad_norm": 433.1778564453125, + "grad_norm": 30410.345703125, "learning_rate": 7.896e-06, - "loss": 12.7678, + "loss": 76359.0063, "step": 39480 }, { "epoch": 0.07977229846838803, - "grad_norm": 172.69244384765625, + "grad_norm": 27212.2265625, "learning_rate": 7.898e-06, - "loss": 18.4869, + "loss": 143551.475, "step": 39490 }, { "epoch": 0.07979249910107185, - "grad_norm": 365.7929992675781, + "grad_norm": 286866.09375, "learning_rate": 7.9e-06, - "loss": 20.3563, + "loss": 71964.4625, "step": 39500 }, { "epoch": 0.07981269973375565, - "grad_norm": 270.8347473144531, + "grad_norm": 94141.578125, "learning_rate": 7.902000000000002e-06, - "loss": 23.8935, + "loss": 149731.9875, "step": 39510 }, { "epoch": 0.07983290036643947, - "grad_norm": 178.36856079101562, + "grad_norm": 198540.875, "learning_rate": 7.904000000000001e-06, - "loss": 33.3036, + "loss": 244693.325, "step": 39520 }, { "epoch": 0.0798531009991233, - "grad_norm": 364.9798583984375, + "grad_norm": 10330.7470703125, "learning_rate": 7.906e-06, - "loss": 35.2624, + "loss": 177932.575, "step": 39530 }, { "epoch": 0.07987330163180711, - "grad_norm": 453.27178955078125, + "grad_norm": 211372.890625, "learning_rate": 7.908e-06, - "loss": 26.2489, + "loss": 181743.6375, "step": 39540 }, { "epoch": 0.07989350226449092, - "grad_norm": 220.4715576171875, + "grad_norm": 49324.671875, "learning_rate": 7.91e-06, - "loss": 19.8085, + "loss": 161778.575, "step": 39550 }, { "epoch": 0.07991370289717474, - "grad_norm": 264.56671142578125, + "grad_norm": 62783.1328125, "learning_rate": 7.912000000000001e-06, - "loss": 18.68, + "loss": 118563.75, "step": 39560 }, { "epoch": 0.07993390352985856, - "grad_norm": 223.5511932373047, + "grad_norm": 59906.484375, "learning_rate": 7.914e-06, - "loss": 18.9332, + "loss": 78266.7437, "step": 39570 }, { "epoch": 0.07995410416254237, - "grad_norm": 379.7696228027344, + "grad_norm": 132631.828125, "learning_rate": 7.916e-06, - "loss": 20.1655, + "loss": 105419.575, "step": 39580 }, { "epoch": 0.07997430479522619, - "grad_norm": 433.83953857421875, + "grad_norm": 41329.62109375, "learning_rate": 7.918e-06, - "loss": 18.233, + "loss": 152690.5875, "step": 39590 }, { "epoch": 0.07999450542791, - "grad_norm": 566.2931518554688, + "grad_norm": 26434.603515625, "learning_rate": 7.92e-06, - "loss": 30.7419, + "loss": 326833.625, "step": 39600 }, { "epoch": 0.08001470606059381, - "grad_norm": 138.607421875, + "grad_norm": 46785.44921875, "learning_rate": 7.922000000000001e-06, - "loss": 22.5985, + "loss": 225451.975, "step": 39610 }, { "epoch": 0.08003490669327763, - "grad_norm": 351.9343566894531, + "grad_norm": 91650.4140625, "learning_rate": 7.924e-06, - "loss": 44.8302, + "loss": 239328.925, "step": 39620 }, { "epoch": 0.08005510732596145, - "grad_norm": 298.3395080566406, + "grad_norm": 4927.634765625, "learning_rate": 7.926e-06, - "loss": 14.4203, + "loss": 58407.7937, "step": 39630 }, { "epoch": 0.08007530795864526, - "grad_norm": 175.9684295654297, + "grad_norm": 14103.59765625, "learning_rate": 7.928e-06, - "loss": 36.0825, + "loss": 343173.15, "step": 39640 }, { "epoch": 0.08009550859132908, - "grad_norm": 512.1127319335938, + "grad_norm": 446174.875, "learning_rate": 7.93e-06, - "loss": 30.4307, + "loss": 176777.625, "step": 39650 }, { "epoch": 0.0801157092240129, - "grad_norm": 1372.822998046875, + "grad_norm": 1205256.875, "learning_rate": 7.932000000000001e-06, - "loss": 69.6502, + "loss": 403562.075, "step": 39660 }, { "epoch": 0.0801359098566967, - "grad_norm": 298.2317810058594, + "grad_norm": 52897.61328125, "learning_rate": 7.934e-06, - "loss": 33.3147, + "loss": 175676.4625, "step": 39670 }, { "epoch": 0.08015611048938052, - "grad_norm": 310.0917053222656, + "grad_norm": 60915.81640625, "learning_rate": 7.936e-06, - "loss": 51.5298, + "loss": 258438.375, "step": 39680 }, { "epoch": 0.08017631112206434, - "grad_norm": 45.98925018310547, + "grad_norm": 6614.107421875, "learning_rate": 7.938000000000001e-06, - "loss": 34.7196, + "loss": 178339.725, "step": 39690 }, { "epoch": 0.08019651175474816, - "grad_norm": 322.8241882324219, + "grad_norm": 39283.125, "learning_rate": 7.94e-06, - "loss": 35.661, + "loss": 152025.9625, "step": 39700 }, { "epoch": 0.08021671238743197, - "grad_norm": 180.28321838378906, + "grad_norm": 36403.69140625, "learning_rate": 7.942000000000001e-06, - "loss": 9.7107, + "loss": 54309.0312, "step": 39710 }, { "epoch": 0.08023691302011579, - "grad_norm": 339.8468017578125, + "grad_norm": 70237.4765625, "learning_rate": 7.944e-06, - "loss": 22.9418, + "loss": 209042.2625, "step": 39720 }, { "epoch": 0.08025711365279961, - "grad_norm": 404.9114990234375, + "grad_norm": 502924.4375, "learning_rate": 7.946e-06, - "loss": 23.3855, + "loss": 188875.0875, "step": 39730 }, { "epoch": 0.08027731428548342, - "grad_norm": 406.2514953613281, + "grad_norm": 51645.90234375, "learning_rate": 7.948e-06, - "loss": 18.2815, + "loss": 51794.4563, "step": 39740 }, { "epoch": 0.08029751491816724, - "grad_norm": 206.7250518798828, + "grad_norm": 65120.48828125, "learning_rate": 7.950000000000002e-06, - "loss": 37.404, + "loss": 167996.05, "step": 39750 }, { "epoch": 0.08031771555085106, - "grad_norm": 159.21121215820312, + "grad_norm": 6095.98876953125, "learning_rate": 7.952000000000001e-06, - "loss": 28.1668, + "loss": 111943.3375, "step": 39760 }, { "epoch": 0.08033791618353486, - "grad_norm": 114.62528228759766, + "grad_norm": 19813.625, "learning_rate": 7.954e-06, - "loss": 26.7519, + "loss": 156028.6875, "step": 39770 }, { "epoch": 0.08035811681621868, - "grad_norm": 230.16160583496094, + "grad_norm": 28398.162109375, "learning_rate": 7.956e-06, - "loss": 37.9638, + "loss": 267007.975, "step": 39780 }, { "epoch": 0.0803783174489025, - "grad_norm": 213.52178955078125, + "grad_norm": 14365.162109375, "learning_rate": 7.958e-06, - "loss": 39.4954, + "loss": 190437.275, "step": 39790 }, { "epoch": 0.08039851808158631, - "grad_norm": 584.9029541015625, + "grad_norm": 682910.5, "learning_rate": 7.960000000000002e-06, - "loss": 13.7502, + "loss": 109096.8875, "step": 39800 }, { "epoch": 0.08041871871427013, - "grad_norm": 290.5389099121094, + "grad_norm": 38193.1015625, "learning_rate": 7.962000000000001e-06, - "loss": 39.4151, + "loss": 180863.9875, "step": 39810 }, { "epoch": 0.08043891934695395, - "grad_norm": 558.5338134765625, + "grad_norm": 157275.15625, "learning_rate": 7.964e-06, - "loss": 58.5718, + "loss": 241833.1, "step": 39820 }, { "epoch": 0.08045911997963776, - "grad_norm": 161.5792694091797, + "grad_norm": 9471.390625, "learning_rate": 7.966e-06, - "loss": 17.8117, + "loss": 50029.0844, "step": 39830 }, { "epoch": 0.08047932061232158, - "grad_norm": 362.5206604003906, + "grad_norm": 86033.21875, "learning_rate": 7.968e-06, - "loss": 20.2731, + "loss": 72638.7125, "step": 39840 }, { "epoch": 0.0804995212450054, - "grad_norm": 143.33853149414062, + "grad_norm": 34032.87890625, "learning_rate": 7.970000000000002e-06, - "loss": 20.3367, + "loss": 91883.8375, "step": 39850 }, { "epoch": 0.08051972187768922, - "grad_norm": 542.078369140625, + "grad_norm": 426382.71875, "learning_rate": 7.972000000000001e-06, - "loss": 19.1295, + "loss": 78338.9187, "step": 39860 }, { "epoch": 0.08053992251037302, - "grad_norm": 435.0280456542969, + "grad_norm": 41742.25390625, "learning_rate": 7.974e-06, - "loss": 23.3512, + "loss": 69527.9438, "step": 39870 }, { "epoch": 0.08056012314305684, - "grad_norm": 465.48614501953125, + "grad_norm": 184969.921875, "learning_rate": 7.976000000000001e-06, - "loss": 21.44, + "loss": 133475.4625, "step": 39880 }, { "epoch": 0.08058032377574066, - "grad_norm": 265.0390625, + "grad_norm": 57172.140625, "learning_rate": 7.978e-06, - "loss": 22.3309, + "loss": 241342.75, "step": 39890 }, { "epoch": 0.08060052440842447, - "grad_norm": 332.6612243652344, + "grad_norm": 135546.28125, "learning_rate": 7.980000000000002e-06, - "loss": 24.4405, + "loss": 182630.825, "step": 39900 }, { "epoch": 0.08062072504110829, - "grad_norm": 153.6615753173828, + "grad_norm": 19111.326171875, "learning_rate": 7.982e-06, - "loss": 44.8212, + "loss": 169484.1875, "step": 39910 }, { "epoch": 0.08064092567379211, - "grad_norm": 295.36700439453125, + "grad_norm": 531949.125, "learning_rate": 7.984e-06, - "loss": 16.5709, + "loss": 106207.4375, "step": 39920 }, { "epoch": 0.08066112630647591, - "grad_norm": 449.8714599609375, + "grad_norm": 120789.8671875, "learning_rate": 7.986000000000001e-06, - "loss": 42.9904, + "loss": 292753.575, "step": 39930 }, { "epoch": 0.08068132693915973, - "grad_norm": 280.0345153808594, + "grad_norm": 40963.6171875, "learning_rate": 7.988e-06, - "loss": 44.0653, + "loss": 179233.9625, "step": 39940 }, { "epoch": 0.08070152757184355, - "grad_norm": 90.7134017944336, + "grad_norm": 3614.74072265625, "learning_rate": 7.990000000000001e-06, - "loss": 19.1288, + "loss": 223563.975, "step": 39950 }, { "epoch": 0.08072172820452736, - "grad_norm": 610.0533447265625, + "grad_norm": 678605.4375, "learning_rate": 7.992e-06, - "loss": 33.2247, + "loss": 332635.4, "step": 39960 }, { "epoch": 0.08074192883721118, - "grad_norm": 709.1533203125, + "grad_norm": 617105.0, "learning_rate": 7.994e-06, - "loss": 19.8445, + "loss": 140068.9, "step": 39970 }, { "epoch": 0.080762129469895, - "grad_norm": 237.41796875, + "grad_norm": 23942.259765625, "learning_rate": 7.996000000000001e-06, - "loss": 21.7348, + "loss": 99126.3938, "step": 39980 }, { "epoch": 0.0807823301025788, - "grad_norm": 384.1241149902344, + "grad_norm": 781618.6875, "learning_rate": 7.998e-06, - "loss": 40.8574, + "loss": 194793.6625, "step": 39990 }, { "epoch": 0.08080253073526263, - "grad_norm": 416.15740966796875, + "grad_norm": 284140.59375, "learning_rate": 8.000000000000001e-06, - "loss": 13.9586, + "loss": 75937.7125, "step": 40000 + }, + { + "epoch": 0.08082273136794645, + "grad_norm": 1116037.5, + "learning_rate": 8.002e-06, + "loss": 312687.725, + "step": 40010 + }, + { + "epoch": 0.08084293200063027, + "grad_norm": 19052.2265625, + "learning_rate": 8.004e-06, + "loss": 270561.45, + "step": 40020 + }, + { + "epoch": 0.08086313263331407, + "grad_norm": 10770.837890625, + "learning_rate": 8.006000000000001e-06, + "loss": 78569.1875, + "step": 40030 + }, + { + "epoch": 0.08088333326599789, + "grad_norm": 37625.00390625, + "learning_rate": 8.008e-06, + "loss": 171767.725, + "step": 40040 + }, + { + "epoch": 0.08090353389868171, + "grad_norm": 148556.828125, + "learning_rate": 8.010000000000001e-06, + "loss": 72512.95, + "step": 40050 + }, + { + "epoch": 0.08092373453136552, + "grad_norm": 9366.8486328125, + "learning_rate": 8.012e-06, + "loss": 131048.4875, + "step": 40060 + }, + { + "epoch": 0.08094393516404934, + "grad_norm": 22191.455078125, + "learning_rate": 8.014e-06, + "loss": 224289.75, + "step": 40070 + }, + { + "epoch": 0.08096413579673316, + "grad_norm": 98419.15625, + "learning_rate": 8.016e-06, + "loss": 118444.4625, + "step": 40080 + }, + { + "epoch": 0.08098433642941696, + "grad_norm": 1250558.0, + "learning_rate": 8.018e-06, + "loss": 219622.4, + "step": 40090 + }, + { + "epoch": 0.08100453706210078, + "grad_norm": 1447.7890625, + "learning_rate": 8.020000000000001e-06, + "loss": 65957.0063, + "step": 40100 + }, + { + "epoch": 0.0810247376947846, + "grad_norm": 292105.34375, + "learning_rate": 8.022e-06, + "loss": 191834.875, + "step": 40110 + }, + { + "epoch": 0.08104493832746841, + "grad_norm": 79470.609375, + "learning_rate": 8.024000000000001e-06, + "loss": 147223.35, + "step": 40120 + }, + { + "epoch": 0.08106513896015223, + "grad_norm": 7481.99658203125, + "learning_rate": 8.026e-06, + "loss": 154626.7625, + "step": 40130 + }, + { + "epoch": 0.08108533959283605, + "grad_norm": 207224.8125, + "learning_rate": 8.028e-06, + "loss": 362053.9, + "step": 40140 + }, + { + "epoch": 0.08110554022551986, + "grad_norm": 64039.73046875, + "learning_rate": 8.030000000000001e-06, + "loss": 280863.15, + "step": 40150 + }, + { + "epoch": 0.08112574085820368, + "grad_norm": 272914.15625, + "learning_rate": 8.032e-06, + "loss": 183851.225, + "step": 40160 + }, + { + "epoch": 0.0811459414908875, + "grad_norm": 106858.984375, + "learning_rate": 8.034000000000001e-06, + "loss": 254025.925, + "step": 40170 + }, + { + "epoch": 0.08116614212357132, + "grad_norm": 3362.61865234375, + "learning_rate": 8.036e-06, + "loss": 144256.175, + "step": 40180 + }, + { + "epoch": 0.08118634275625512, + "grad_norm": 144216.28125, + "learning_rate": 8.038e-06, + "loss": 260774.375, + "step": 40190 + }, + { + "epoch": 0.08120654338893894, + "grad_norm": 71470.421875, + "learning_rate": 8.040000000000001e-06, + "loss": 100767.2688, + "step": 40200 + }, + { + "epoch": 0.08122674402162276, + "grad_norm": 143596.671875, + "learning_rate": 8.042e-06, + "loss": 303846.225, + "step": 40210 + }, + { + "epoch": 0.08124694465430657, + "grad_norm": 394106.4375, + "learning_rate": 8.044000000000001e-06, + "loss": 180598.775, + "step": 40220 + }, + { + "epoch": 0.08126714528699039, + "grad_norm": 927369.3125, + "learning_rate": 8.046e-06, + "loss": 290752.575, + "step": 40230 + }, + { + "epoch": 0.08128734591967421, + "grad_norm": 107564.9921875, + "learning_rate": 8.048e-06, + "loss": 354578.675, + "step": 40240 + }, + { + "epoch": 0.08130754655235801, + "grad_norm": 322061.625, + "learning_rate": 8.050000000000001e-06, + "loss": 384030.75, + "step": 40250 + }, + { + "epoch": 0.08132774718504183, + "grad_norm": 6970.4951171875, + "learning_rate": 8.052e-06, + "loss": 88366.3125, + "step": 40260 + }, + { + "epoch": 0.08134794781772565, + "grad_norm": 66683.2734375, + "learning_rate": 8.054000000000001e-06, + "loss": 189676.45, + "step": 40270 + }, + { + "epoch": 0.08136814845040946, + "grad_norm": 1556561.875, + "learning_rate": 8.056e-06, + "loss": 279709.175, + "step": 40280 + }, + { + "epoch": 0.08138834908309328, + "grad_norm": 345366.34375, + "learning_rate": 8.058e-06, + "loss": 256450.225, + "step": 40290 + }, + { + "epoch": 0.0814085497157771, + "grad_norm": 7572.31640625, + "learning_rate": 8.06e-06, + "loss": 125399.7625, + "step": 40300 + }, + { + "epoch": 0.08142875034846091, + "grad_norm": 34962.55078125, + "learning_rate": 8.062000000000002e-06, + "loss": 182003.4125, + "step": 40310 + }, + { + "epoch": 0.08144895098114473, + "grad_norm": 58664.265625, + "learning_rate": 8.064000000000001e-06, + "loss": 106875.7, + "step": 40320 + }, + { + "epoch": 0.08146915161382855, + "grad_norm": 11279.7919921875, + "learning_rate": 8.066e-06, + "loss": 146956.3125, + "step": 40330 + }, + { + "epoch": 0.08148935224651237, + "grad_norm": 244947.484375, + "learning_rate": 8.068e-06, + "loss": 94030.7, + "step": 40340 + }, + { + "epoch": 0.08150955287919617, + "grad_norm": 835446.125, + "learning_rate": 8.07e-06, + "loss": 303164.6, + "step": 40350 + }, + { + "epoch": 0.08152975351188, + "grad_norm": 26112.208984375, + "learning_rate": 8.072000000000002e-06, + "loss": 193257.15, + "step": 40360 + }, + { + "epoch": 0.08154995414456381, + "grad_norm": 6330.16064453125, + "learning_rate": 8.074000000000001e-06, + "loss": 191638.0625, + "step": 40370 + }, + { + "epoch": 0.08157015477724762, + "grad_norm": 564855.4375, + "learning_rate": 8.076e-06, + "loss": 244806.875, + "step": 40380 + }, + { + "epoch": 0.08159035540993144, + "grad_norm": 88348.5546875, + "learning_rate": 8.078e-06, + "loss": 117246.375, + "step": 40390 + }, + { + "epoch": 0.08161055604261526, + "grad_norm": 184449.515625, + "learning_rate": 8.08e-06, + "loss": 146311.3125, + "step": 40400 + }, + { + "epoch": 0.08163075667529907, + "grad_norm": 161243.875, + "learning_rate": 8.082000000000002e-06, + "loss": 120608.9, + "step": 40410 + }, + { + "epoch": 0.08165095730798289, + "grad_norm": 7781.037109375, + "learning_rate": 8.084000000000001e-06, + "loss": 205604.6625, + "step": 40420 + }, + { + "epoch": 0.0816711579406667, + "grad_norm": 7540.23974609375, + "learning_rate": 8.086e-06, + "loss": 38625.1625, + "step": 40430 + }, + { + "epoch": 0.08169135857335051, + "grad_norm": 117411.46875, + "learning_rate": 8.088e-06, + "loss": 97749.0375, + "step": 40440 + }, + { + "epoch": 0.08171155920603433, + "grad_norm": 12258.5478515625, + "learning_rate": 8.09e-06, + "loss": 120775.4625, + "step": 40450 + }, + { + "epoch": 0.08173175983871815, + "grad_norm": 0.0, + "learning_rate": 8.092000000000001e-06, + "loss": 83272.3938, + "step": 40460 + }, + { + "epoch": 0.08175196047140196, + "grad_norm": 0.0, + "learning_rate": 8.094e-06, + "loss": 75563.4625, + "step": 40470 + }, + { + "epoch": 0.08177216110408578, + "grad_norm": 242645.046875, + "learning_rate": 8.096e-06, + "loss": 268529.875, + "step": 40480 + }, + { + "epoch": 0.0817923617367696, + "grad_norm": 238784.5625, + "learning_rate": 8.098000000000001e-06, + "loss": 188407.125, + "step": 40490 + }, + { + "epoch": 0.08181256236945342, + "grad_norm": 176840.296875, + "learning_rate": 8.1e-06, + "loss": 207940.575, + "step": 40500 + }, + { + "epoch": 0.08183276300213722, + "grad_norm": 102627.453125, + "learning_rate": 8.102000000000001e-06, + "loss": 126196.0375, + "step": 40510 + }, + { + "epoch": 0.08185296363482104, + "grad_norm": 274988.125, + "learning_rate": 8.104e-06, + "loss": 159013.9875, + "step": 40520 + }, + { + "epoch": 0.08187316426750486, + "grad_norm": 33086.5, + "learning_rate": 8.106e-06, + "loss": 183981.85, + "step": 40530 + }, + { + "epoch": 0.08189336490018867, + "grad_norm": 26957.705078125, + "learning_rate": 8.108000000000001e-06, + "loss": 153530.3375, + "step": 40540 + }, + { + "epoch": 0.08191356553287249, + "grad_norm": 470043.71875, + "learning_rate": 8.110000000000002e-06, + "loss": 178281.85, + "step": 40550 + }, + { + "epoch": 0.08193376616555631, + "grad_norm": 15659.8291015625, + "learning_rate": 8.112000000000001e-06, + "loss": 233955.725, + "step": 40560 + }, + { + "epoch": 0.08195396679824012, + "grad_norm": 10979.7978515625, + "learning_rate": 8.114e-06, + "loss": 180527.275, + "step": 40570 + }, + { + "epoch": 0.08197416743092394, + "grad_norm": 18408.6640625, + "learning_rate": 8.116e-06, + "loss": 57928.575, + "step": 40580 + }, + { + "epoch": 0.08199436806360776, + "grad_norm": 578696.625, + "learning_rate": 8.118000000000001e-06, + "loss": 150501.625, + "step": 40590 + }, + { + "epoch": 0.08201456869629156, + "grad_norm": 36497.70703125, + "learning_rate": 8.120000000000002e-06, + "loss": 113494.2875, + "step": 40600 + }, + { + "epoch": 0.08203476932897538, + "grad_norm": 547786.75, + "learning_rate": 8.122000000000001e-06, + "loss": 256544.475, + "step": 40610 + }, + { + "epoch": 0.0820549699616592, + "grad_norm": 449659.96875, + "learning_rate": 8.124e-06, + "loss": 235036.9, + "step": 40620 + }, + { + "epoch": 0.08207517059434301, + "grad_norm": 24230.3125, + "learning_rate": 8.126e-06, + "loss": 81485.975, + "step": 40630 + }, + { + "epoch": 0.08209537122702683, + "grad_norm": 78508.34375, + "learning_rate": 8.128e-06, + "loss": 144096.1125, + "step": 40640 + }, + { + "epoch": 0.08211557185971065, + "grad_norm": 189980.3125, + "learning_rate": 8.13e-06, + "loss": 185729.875, + "step": 40650 + }, + { + "epoch": 0.08213577249239447, + "grad_norm": 183532.59375, + "learning_rate": 8.132000000000001e-06, + "loss": 134936.675, + "step": 40660 + }, + { + "epoch": 0.08215597312507827, + "grad_norm": 65921.5546875, + "learning_rate": 8.134e-06, + "loss": 95293.4563, + "step": 40670 + }, + { + "epoch": 0.0821761737577621, + "grad_norm": 166758.078125, + "learning_rate": 8.136000000000001e-06, + "loss": 168090.6875, + "step": 40680 + }, + { + "epoch": 0.08219637439044591, + "grad_norm": 81132.9453125, + "learning_rate": 8.138e-06, + "loss": 154676.5875, + "step": 40690 + }, + { + "epoch": 0.08221657502312972, + "grad_norm": 76604.6875, + "learning_rate": 8.14e-06, + "loss": 98091.3062, + "step": 40700 + }, + { + "epoch": 0.08223677565581354, + "grad_norm": 89228.2890625, + "learning_rate": 8.142000000000001e-06, + "loss": 119794.925, + "step": 40710 + }, + { + "epoch": 0.08225697628849736, + "grad_norm": 25862.984375, + "learning_rate": 8.144e-06, + "loss": 156541.725, + "step": 40720 + }, + { + "epoch": 0.08227717692118117, + "grad_norm": 8245.376953125, + "learning_rate": 8.146000000000001e-06, + "loss": 179704.075, + "step": 40730 + }, + { + "epoch": 0.08229737755386499, + "grad_norm": 18674.7890625, + "learning_rate": 8.148e-06, + "loss": 194911.35, + "step": 40740 + }, + { + "epoch": 0.0823175781865488, + "grad_norm": 319475.0625, + "learning_rate": 8.15e-06, + "loss": 216135.9, + "step": 40750 + }, + { + "epoch": 0.08233777881923261, + "grad_norm": 1728085.5, + "learning_rate": 8.152000000000001e-06, + "loss": 311322.925, + "step": 40760 + }, + { + "epoch": 0.08235797945191643, + "grad_norm": 12291.5517578125, + "learning_rate": 8.154e-06, + "loss": 165717.775, + "step": 40770 + }, + { + "epoch": 0.08237818008460025, + "grad_norm": 131324.03125, + "learning_rate": 8.156000000000001e-06, + "loss": 186033.95, + "step": 40780 + }, + { + "epoch": 0.08239838071728406, + "grad_norm": 429545.75, + "learning_rate": 8.158e-06, + "loss": 222079.3, + "step": 40790 + }, + { + "epoch": 0.08241858134996788, + "grad_norm": 11972.4921875, + "learning_rate": 8.16e-06, + "loss": 224468.225, + "step": 40800 + }, + { + "epoch": 0.0824387819826517, + "grad_norm": 85421.4140625, + "learning_rate": 8.162e-06, + "loss": 134903.675, + "step": 40810 + }, + { + "epoch": 0.08245898261533552, + "grad_norm": 150769.59375, + "learning_rate": 8.164e-06, + "loss": 96090.6875, + "step": 40820 + }, + { + "epoch": 0.08247918324801932, + "grad_norm": 1397811.75, + "learning_rate": 8.166000000000001e-06, + "loss": 485328.3, + "step": 40830 + }, + { + "epoch": 0.08249938388070314, + "grad_norm": 128796.6328125, + "learning_rate": 8.168e-06, + "loss": 105012.5625, + "step": 40840 + }, + { + "epoch": 0.08251958451338696, + "grad_norm": 618736.5, + "learning_rate": 8.17e-06, + "loss": 130936.2, + "step": 40850 + }, + { + "epoch": 0.08253978514607077, + "grad_norm": 30746.845703125, + "learning_rate": 8.172e-06, + "loss": 112398.35, + "step": 40860 + }, + { + "epoch": 0.08255998577875459, + "grad_norm": 43372.64453125, + "learning_rate": 8.174e-06, + "loss": 112423.475, + "step": 40870 + }, + { + "epoch": 0.08258018641143841, + "grad_norm": 141136.40625, + "learning_rate": 8.176000000000001e-06, + "loss": 199968.2625, + "step": 40880 + }, + { + "epoch": 0.08260038704412222, + "grad_norm": 383289.59375, + "learning_rate": 8.178e-06, + "loss": 218036.75, + "step": 40890 + }, + { + "epoch": 0.08262058767680604, + "grad_norm": 1181011.5, + "learning_rate": 8.18e-06, + "loss": 152162.95, + "step": 40900 + }, + { + "epoch": 0.08264078830948986, + "grad_norm": 709695.8125, + "learning_rate": 8.182e-06, + "loss": 138116.8375, + "step": 40910 + }, + { + "epoch": 0.08266098894217366, + "grad_norm": 10606.7392578125, + "learning_rate": 8.184000000000002e-06, + "loss": 115901.175, + "step": 40920 + }, + { + "epoch": 0.08268118957485748, + "grad_norm": 81603.7890625, + "learning_rate": 8.186000000000001e-06, + "loss": 306885.0, + "step": 40930 + }, + { + "epoch": 0.0827013902075413, + "grad_norm": 319972.15625, + "learning_rate": 8.188e-06, + "loss": 120625.1, + "step": 40940 + }, + { + "epoch": 0.08272159084022511, + "grad_norm": 154198.875, + "learning_rate": 8.19e-06, + "loss": 169847.65, + "step": 40950 + }, + { + "epoch": 0.08274179147290893, + "grad_norm": 40977.8203125, + "learning_rate": 8.192e-06, + "loss": 160510.25, + "step": 40960 + }, + { + "epoch": 0.08276199210559275, + "grad_norm": 575275.125, + "learning_rate": 8.194000000000002e-06, + "loss": 322182.125, + "step": 40970 + }, + { + "epoch": 0.08278219273827657, + "grad_norm": 922648.5, + "learning_rate": 8.196e-06, + "loss": 283848.1, + "step": 40980 + }, + { + "epoch": 0.08280239337096038, + "grad_norm": 33533.8984375, + "learning_rate": 8.198e-06, + "loss": 69930.6438, + "step": 40990 + }, + { + "epoch": 0.0828225940036442, + "grad_norm": 48277.7109375, + "learning_rate": 8.2e-06, + "loss": 298331.825, + "step": 41000 + }, + { + "epoch": 0.08284279463632802, + "grad_norm": 24796.5, + "learning_rate": 8.202e-06, + "loss": 294086.875, + "step": 41010 + }, + { + "epoch": 0.08286299526901182, + "grad_norm": 42129.89453125, + "learning_rate": 8.204000000000001e-06, + "loss": 39641.7844, + "step": 41020 + }, + { + "epoch": 0.08288319590169564, + "grad_norm": 281300.4375, + "learning_rate": 8.206e-06, + "loss": 169891.7, + "step": 41030 + }, + { + "epoch": 0.08290339653437946, + "grad_norm": 593753.75, + "learning_rate": 8.208e-06, + "loss": 106966.5375, + "step": 41040 + }, + { + "epoch": 0.08292359716706327, + "grad_norm": 219132.875, + "learning_rate": 8.210000000000001e-06, + "loss": 89211.1125, + "step": 41050 + }, + { + "epoch": 0.08294379779974709, + "grad_norm": 118648.421875, + "learning_rate": 8.212e-06, + "loss": 348828.275, + "step": 41060 + }, + { + "epoch": 0.08296399843243091, + "grad_norm": 116059.703125, + "learning_rate": 8.214000000000001e-06, + "loss": 155344.425, + "step": 41070 + }, + { + "epoch": 0.08298419906511471, + "grad_norm": 56421.3046875, + "learning_rate": 8.216e-06, + "loss": 43282.7625, + "step": 41080 + }, + { + "epoch": 0.08300439969779853, + "grad_norm": 13216.76171875, + "learning_rate": 8.218e-06, + "loss": 120179.1625, + "step": 41090 + }, + { + "epoch": 0.08302460033048235, + "grad_norm": 645928.75, + "learning_rate": 8.220000000000001e-06, + "loss": 208015.55, + "step": 41100 + }, + { + "epoch": 0.08304480096316616, + "grad_norm": 211440.453125, + "learning_rate": 8.222000000000002e-06, + "loss": 144570.3125, + "step": 41110 + }, + { + "epoch": 0.08306500159584998, + "grad_norm": 165411.34375, + "learning_rate": 8.224000000000001e-06, + "loss": 83207.375, + "step": 41120 + }, + { + "epoch": 0.0830852022285338, + "grad_norm": 435917.0625, + "learning_rate": 8.226e-06, + "loss": 117606.2375, + "step": 41130 + }, + { + "epoch": 0.08310540286121762, + "grad_norm": 41520.08203125, + "learning_rate": 8.228e-06, + "loss": 82686.9563, + "step": 41140 + }, + { + "epoch": 0.08312560349390143, + "grad_norm": 7059.73779296875, + "learning_rate": 8.23e-06, + "loss": 265902.775, + "step": 41150 + }, + { + "epoch": 0.08314580412658525, + "grad_norm": 4907.54443359375, + "learning_rate": 8.232000000000002e-06, + "loss": 230198.95, + "step": 41160 + }, + { + "epoch": 0.08316600475926907, + "grad_norm": 63284.62890625, + "learning_rate": 8.234000000000001e-06, + "loss": 72777.7, + "step": 41170 + }, + { + "epoch": 0.08318620539195287, + "grad_norm": 154450.09375, + "learning_rate": 8.236e-06, + "loss": 368125.4, + "step": 41180 + }, + { + "epoch": 0.08320640602463669, + "grad_norm": 32939.6484375, + "learning_rate": 8.238e-06, + "loss": 343547.575, + "step": 41190 + }, + { + "epoch": 0.08322660665732051, + "grad_norm": 88273.1640625, + "learning_rate": 8.24e-06, + "loss": 197283.4125, + "step": 41200 + }, + { + "epoch": 0.08324680729000432, + "grad_norm": 6613.77197265625, + "learning_rate": 8.242000000000002e-06, + "loss": 251455.875, + "step": 41210 + }, + { + "epoch": 0.08326700792268814, + "grad_norm": 25805.8515625, + "learning_rate": 8.244000000000001e-06, + "loss": 84469.2125, + "step": 41220 + }, + { + "epoch": 0.08328720855537196, + "grad_norm": 22237.171875, + "learning_rate": 8.246e-06, + "loss": 166250.5, + "step": 41230 + }, + { + "epoch": 0.08330740918805576, + "grad_norm": 680429.9375, + "learning_rate": 8.248e-06, + "loss": 129089.95, + "step": 41240 + }, + { + "epoch": 0.08332760982073958, + "grad_norm": 8040.21923828125, + "learning_rate": 8.25e-06, + "loss": 260085.875, + "step": 41250 + }, + { + "epoch": 0.0833478104534234, + "grad_norm": 34905.26953125, + "learning_rate": 8.252000000000002e-06, + "loss": 104267.55, + "step": 41260 + }, + { + "epoch": 0.08336801108610721, + "grad_norm": 30362.48046875, + "learning_rate": 8.254000000000001e-06, + "loss": 182800.0, + "step": 41270 + }, + { + "epoch": 0.08338821171879103, + "grad_norm": 97423.2109375, + "learning_rate": 8.256e-06, + "loss": 190063.4, + "step": 41280 + }, + { + "epoch": 0.08340841235147485, + "grad_norm": 28418.638671875, + "learning_rate": 8.258000000000001e-06, + "loss": 166216.9, + "step": 41290 + }, + { + "epoch": 0.08342861298415867, + "grad_norm": 103295.5234375, + "learning_rate": 8.26e-06, + "loss": 131423.025, + "step": 41300 + }, + { + "epoch": 0.08344881361684248, + "grad_norm": 83154.40625, + "learning_rate": 8.262000000000002e-06, + "loss": 184753.2, + "step": 41310 + }, + { + "epoch": 0.0834690142495263, + "grad_norm": 332631.25, + "learning_rate": 8.264e-06, + "loss": 418004.875, + "step": 41320 + }, + { + "epoch": 0.08348921488221012, + "grad_norm": 595163.5, + "learning_rate": 8.266e-06, + "loss": 294452.45, + "step": 41330 + }, + { + "epoch": 0.08350941551489392, + "grad_norm": 465247.96875, + "learning_rate": 8.268000000000001e-06, + "loss": 262920.325, + "step": 41340 + }, + { + "epoch": 0.08352961614757774, + "grad_norm": 236555.078125, + "learning_rate": 8.27e-06, + "loss": 325949.775, + "step": 41350 + }, + { + "epoch": 0.08354981678026156, + "grad_norm": 50044.33984375, + "learning_rate": 8.272000000000001e-06, + "loss": 280169.05, + "step": 41360 + }, + { + "epoch": 0.08357001741294537, + "grad_norm": 376975.65625, + "learning_rate": 8.274e-06, + "loss": 128220.7875, + "step": 41370 + }, + { + "epoch": 0.08359021804562919, + "grad_norm": 686409.875, + "learning_rate": 8.276e-06, + "loss": 239926.9, + "step": 41380 + }, + { + "epoch": 0.08361041867831301, + "grad_norm": 0.0, + "learning_rate": 8.278000000000001e-06, + "loss": 213427.85, + "step": 41390 + }, + { + "epoch": 0.08363061931099681, + "grad_norm": 7680.9150390625, + "learning_rate": 8.28e-06, + "loss": 236680.775, + "step": 41400 + }, + { + "epoch": 0.08365081994368063, + "grad_norm": 335495.1875, + "learning_rate": 8.282000000000001e-06, + "loss": 309547.25, + "step": 41410 + }, + { + "epoch": 0.08367102057636445, + "grad_norm": 2238.788330078125, + "learning_rate": 8.284e-06, + "loss": 77841.025, + "step": 41420 + }, + { + "epoch": 0.08369122120904826, + "grad_norm": 686734.375, + "learning_rate": 8.286e-06, + "loss": 160990.1, + "step": 41430 + }, + { + "epoch": 0.08371142184173208, + "grad_norm": 44876.49609375, + "learning_rate": 8.288000000000001e-06, + "loss": 134226.3875, + "step": 41440 + }, + { + "epoch": 0.0837316224744159, + "grad_norm": 121625.7109375, + "learning_rate": 8.29e-06, + "loss": 124170.9375, + "step": 41450 + }, + { + "epoch": 0.08375182310709972, + "grad_norm": 68947.859375, + "learning_rate": 8.292000000000001e-06, + "loss": 157359.3, + "step": 41460 + }, + { + "epoch": 0.08377202373978353, + "grad_norm": 132056.140625, + "learning_rate": 8.294e-06, + "loss": 213833.575, + "step": 41470 + }, + { + "epoch": 0.08379222437246735, + "grad_norm": 249806.71875, + "learning_rate": 8.296000000000002e-06, + "loss": 225232.025, + "step": 41480 + }, + { + "epoch": 0.08381242500515117, + "grad_norm": 56602.4921875, + "learning_rate": 8.298000000000001e-06, + "loss": 65073.75, + "step": 41490 + }, + { + "epoch": 0.08383262563783497, + "grad_norm": 1395085.25, + "learning_rate": 8.3e-06, + "loss": 172300.475, + "step": 41500 + }, + { + "epoch": 0.0838528262705188, + "grad_norm": 331877.90625, + "learning_rate": 8.302000000000001e-06, + "loss": 96719.6687, + "step": 41510 + }, + { + "epoch": 0.08387302690320261, + "grad_norm": 663754.6875, + "learning_rate": 8.304e-06, + "loss": 152759.1125, + "step": 41520 + }, + { + "epoch": 0.08389322753588642, + "grad_norm": 14888.568359375, + "learning_rate": 8.306000000000001e-06, + "loss": 123926.625, + "step": 41530 + }, + { + "epoch": 0.08391342816857024, + "grad_norm": 15214.8896484375, + "learning_rate": 8.308e-06, + "loss": 331934.575, + "step": 41540 + }, + { + "epoch": 0.08393362880125406, + "grad_norm": 1062265.625, + "learning_rate": 8.31e-06, + "loss": 129586.475, + "step": 41550 + }, + { + "epoch": 0.08395382943393787, + "grad_norm": 53707.58984375, + "learning_rate": 8.312000000000001e-06, + "loss": 60168.8438, + "step": 41560 + }, + { + "epoch": 0.08397403006662169, + "grad_norm": 536435.75, + "learning_rate": 8.314e-06, + "loss": 166807.725, + "step": 41570 + }, + { + "epoch": 0.0839942306993055, + "grad_norm": 174495.90625, + "learning_rate": 8.316000000000001e-06, + "loss": 148820.475, + "step": 41580 + }, + { + "epoch": 0.08401443133198931, + "grad_norm": 50139.4375, + "learning_rate": 8.318e-06, + "loss": 132927.275, + "step": 41590 + }, + { + "epoch": 0.08403463196467313, + "grad_norm": 5657.75244140625, + "learning_rate": 8.32e-06, + "loss": 142345.975, + "step": 41600 + }, + { + "epoch": 0.08405483259735695, + "grad_norm": 104852.953125, + "learning_rate": 8.322000000000001e-06, + "loss": 253618.25, + "step": 41610 + }, + { + "epoch": 0.08407503323004077, + "grad_norm": 648662.0625, + "learning_rate": 8.324e-06, + "loss": 200340.825, + "step": 41620 + }, + { + "epoch": 0.08409523386272458, + "grad_norm": 12946.556640625, + "learning_rate": 8.326000000000001e-06, + "loss": 71528.5437, + "step": 41630 + }, + { + "epoch": 0.0841154344954084, + "grad_norm": 48537.328125, + "learning_rate": 8.328e-06, + "loss": 126782.975, + "step": 41640 + }, + { + "epoch": 0.08413563512809222, + "grad_norm": 23945.296875, + "learning_rate": 8.33e-06, + "loss": 247878.425, + "step": 41650 + }, + { + "epoch": 0.08415583576077602, + "grad_norm": 20849.435546875, + "learning_rate": 8.332000000000001e-06, + "loss": 174364.3875, + "step": 41660 + }, + { + "epoch": 0.08417603639345984, + "grad_norm": 2892.1923828125, + "learning_rate": 8.334e-06, + "loss": 44065.5156, + "step": 41670 + }, + { + "epoch": 0.08419623702614366, + "grad_norm": 162269.78125, + "learning_rate": 8.336000000000001e-06, + "loss": 253684.025, + "step": 41680 + }, + { + "epoch": 0.08421643765882747, + "grad_norm": 521335.46875, + "learning_rate": 8.338e-06, + "loss": 101177.8188, + "step": 41690 + }, + { + "epoch": 0.08423663829151129, + "grad_norm": 18903.166015625, + "learning_rate": 8.34e-06, + "loss": 60200.9812, + "step": 41700 + }, + { + "epoch": 0.08425683892419511, + "grad_norm": 8681.1328125, + "learning_rate": 8.342e-06, + "loss": 149990.65, + "step": 41710 + }, + { + "epoch": 0.08427703955687892, + "grad_norm": 48888.890625, + "learning_rate": 8.344000000000002e-06, + "loss": 87902.3062, + "step": 41720 + }, + { + "epoch": 0.08429724018956274, + "grad_norm": 3504.574951171875, + "learning_rate": 8.346000000000001e-06, + "loss": 88933.1687, + "step": 41730 + }, + { + "epoch": 0.08431744082224656, + "grad_norm": 85407.6484375, + "learning_rate": 8.348e-06, + "loss": 90971.3, + "step": 41740 + }, + { + "epoch": 0.08433764145493036, + "grad_norm": 14753.11328125, + "learning_rate": 8.35e-06, + "loss": 355336.575, + "step": 41750 + }, + { + "epoch": 0.08435784208761418, + "grad_norm": 251690.71875, + "learning_rate": 8.352e-06, + "loss": 168149.5, + "step": 41760 + }, + { + "epoch": 0.084378042720298, + "grad_norm": 170850.796875, + "learning_rate": 8.354000000000002e-06, + "loss": 157961.5125, + "step": 41770 + }, + { + "epoch": 0.08439824335298182, + "grad_norm": 277812.90625, + "learning_rate": 8.356000000000001e-06, + "loss": 137611.35, + "step": 41780 + }, + { + "epoch": 0.08441844398566563, + "grad_norm": 320898.09375, + "learning_rate": 8.358e-06, + "loss": 155807.025, + "step": 41790 + }, + { + "epoch": 0.08443864461834945, + "grad_norm": 970592.8125, + "learning_rate": 8.36e-06, + "loss": 175217.9875, + "step": 41800 + }, + { + "epoch": 0.08445884525103327, + "grad_norm": 21039.91796875, + "learning_rate": 8.362e-06, + "loss": 156357.05, + "step": 41810 + }, + { + "epoch": 0.08447904588371707, + "grad_norm": 119380.765625, + "learning_rate": 8.364000000000002e-06, + "loss": 379557.425, + "step": 41820 + }, + { + "epoch": 0.0844992465164009, + "grad_norm": 12152.7421875, + "learning_rate": 8.366000000000001e-06, + "loss": 305793.45, + "step": 41830 + }, + { + "epoch": 0.08451944714908471, + "grad_norm": 56772.85546875, + "learning_rate": 8.368e-06, + "loss": 105926.45, + "step": 41840 + }, + { + "epoch": 0.08453964778176852, + "grad_norm": 27080.87890625, + "learning_rate": 8.370000000000001e-06, + "loss": 138001.55, + "step": 41850 + }, + { + "epoch": 0.08455984841445234, + "grad_norm": 63956.30078125, + "learning_rate": 8.372e-06, + "loss": 245295.525, + "step": 41860 + }, + { + "epoch": 0.08458004904713616, + "grad_norm": 129878.0625, + "learning_rate": 8.374000000000001e-06, + "loss": 230060.95, + "step": 41870 + }, + { + "epoch": 0.08460024967981997, + "grad_norm": 23498.6171875, + "learning_rate": 8.376e-06, + "loss": 181780.1625, + "step": 41880 + }, + { + "epoch": 0.08462045031250379, + "grad_norm": 72454.296875, + "learning_rate": 8.378e-06, + "loss": 203709.975, + "step": 41890 + }, + { + "epoch": 0.0846406509451876, + "grad_norm": 64626.91796875, + "learning_rate": 8.380000000000001e-06, + "loss": 138570.375, + "step": 41900 + }, + { + "epoch": 0.08466085157787141, + "grad_norm": 621499.25, + "learning_rate": 8.382e-06, + "loss": 224010.95, + "step": 41910 + }, + { + "epoch": 0.08468105221055523, + "grad_norm": 20323.56640625, + "learning_rate": 8.384000000000001e-06, + "loss": 70128.1, + "step": 41920 + }, + { + "epoch": 0.08470125284323905, + "grad_norm": 201114.796875, + "learning_rate": 8.386e-06, + "loss": 208090.1, + "step": 41930 + }, + { + "epoch": 0.08472145347592287, + "grad_norm": 46364.51171875, + "learning_rate": 8.388e-06, + "loss": 89300.1812, + "step": 41940 + }, + { + "epoch": 0.08474165410860668, + "grad_norm": 45065.95703125, + "learning_rate": 8.390000000000001e-06, + "loss": 72008.625, + "step": 41950 + }, + { + "epoch": 0.0847618547412905, + "grad_norm": 103853.046875, + "learning_rate": 8.392e-06, + "loss": 117894.825, + "step": 41960 + }, + { + "epoch": 0.08478205537397432, + "grad_norm": 6369.6396484375, + "learning_rate": 8.394000000000001e-06, + "loss": 138962.325, + "step": 41970 + }, + { + "epoch": 0.08480225600665812, + "grad_norm": 41652.875, + "learning_rate": 8.396e-06, + "loss": 228206.075, + "step": 41980 + }, + { + "epoch": 0.08482245663934194, + "grad_norm": 62723.640625, + "learning_rate": 8.398e-06, + "loss": 90218.3625, + "step": 41990 + }, + { + "epoch": 0.08484265727202576, + "grad_norm": 57825.87109375, + "learning_rate": 8.400000000000001e-06, + "loss": 98396.1062, + "step": 42000 + }, + { + "epoch": 0.08486285790470957, + "grad_norm": 520104.0625, + "learning_rate": 8.402e-06, + "loss": 108427.4875, + "step": 42010 + }, + { + "epoch": 0.08488305853739339, + "grad_norm": 12502.9951171875, + "learning_rate": 8.404000000000001e-06, + "loss": 324787.275, + "step": 42020 + }, + { + "epoch": 0.08490325917007721, + "grad_norm": 1123975.25, + "learning_rate": 8.406e-06, + "loss": 320904.05, + "step": 42030 + }, + { + "epoch": 0.08492345980276102, + "grad_norm": 946297.6875, + "learning_rate": 8.408e-06, + "loss": 517167.1, + "step": 42040 + }, + { + "epoch": 0.08494366043544484, + "grad_norm": 31211.1328125, + "learning_rate": 8.41e-06, + "loss": 138752.6375, + "step": 42050 + }, + { + "epoch": 0.08496386106812866, + "grad_norm": 3517.409912109375, + "learning_rate": 8.412e-06, + "loss": 84891.125, + "step": 42060 + }, + { + "epoch": 0.08498406170081246, + "grad_norm": 53607.4296875, + "learning_rate": 8.414000000000001e-06, + "loss": 67969.3188, + "step": 42070 + }, + { + "epoch": 0.08500426233349628, + "grad_norm": 94813.5546875, + "learning_rate": 8.416e-06, + "loss": 152149.2625, + "step": 42080 + }, + { + "epoch": 0.0850244629661801, + "grad_norm": 49041.15234375, + "learning_rate": 8.418000000000001e-06, + "loss": 116188.8375, + "step": 42090 + }, + { + "epoch": 0.08504466359886392, + "grad_norm": 3803.15673828125, + "learning_rate": 8.42e-06, + "loss": 264709.825, + "step": 42100 + }, + { + "epoch": 0.08506486423154773, + "grad_norm": 43493.4609375, + "learning_rate": 8.422e-06, + "loss": 142275.425, + "step": 42110 + }, + { + "epoch": 0.08508506486423155, + "grad_norm": 151082.21875, + "learning_rate": 8.424000000000001e-06, + "loss": 40543.25, + "step": 42120 + }, + { + "epoch": 0.08510526549691537, + "grad_norm": 4783.6669921875, + "learning_rate": 8.426e-06, + "loss": 284701.15, + "step": 42130 + }, + { + "epoch": 0.08512546612959918, + "grad_norm": 4641.31201171875, + "learning_rate": 8.428000000000001e-06, + "loss": 508644.15, + "step": 42140 + }, + { + "epoch": 0.085145666762283, + "grad_norm": 253817.703125, + "learning_rate": 8.43e-06, + "loss": 136556.925, + "step": 42150 + }, + { + "epoch": 0.08516586739496682, + "grad_norm": 100092.671875, + "learning_rate": 8.432e-06, + "loss": 251046.45, + "step": 42160 + }, + { + "epoch": 0.08518606802765062, + "grad_norm": 11456.8515625, + "learning_rate": 8.434000000000001e-06, + "loss": 189886.15, + "step": 42170 + }, + { + "epoch": 0.08520626866033444, + "grad_norm": 242100.125, + "learning_rate": 8.436e-06, + "loss": 119192.6125, + "step": 42180 + }, + { + "epoch": 0.08522646929301826, + "grad_norm": 8262.751953125, + "learning_rate": 8.438000000000001e-06, + "loss": 149750.1375, + "step": 42190 + }, + { + "epoch": 0.08524666992570207, + "grad_norm": 11211.748046875, + "learning_rate": 8.44e-06, + "loss": 97176.225, + "step": 42200 + }, + { + "epoch": 0.08526687055838589, + "grad_norm": 437180.84375, + "learning_rate": 8.442e-06, + "loss": 216617.725, + "step": 42210 + }, + { + "epoch": 0.08528707119106971, + "grad_norm": 77579.2265625, + "learning_rate": 8.444e-06, + "loss": 245992.1, + "step": 42220 + }, + { + "epoch": 0.08530727182375351, + "grad_norm": 1089194.375, + "learning_rate": 8.446e-06, + "loss": 259107.25, + "step": 42230 + }, + { + "epoch": 0.08532747245643733, + "grad_norm": 23570.326171875, + "learning_rate": 8.448000000000001e-06, + "loss": 45669.0156, + "step": 42240 + }, + { + "epoch": 0.08534767308912115, + "grad_norm": 445200.15625, + "learning_rate": 8.45e-06, + "loss": 273713.5, + "step": 42250 + }, + { + "epoch": 0.08536787372180497, + "grad_norm": 5853.46435546875, + "learning_rate": 8.452e-06, + "loss": 87057.0625, + "step": 42260 + }, + { + "epoch": 0.08538807435448878, + "grad_norm": 39950.17578125, + "learning_rate": 8.454e-06, + "loss": 208821.0125, + "step": 42270 + }, + { + "epoch": 0.0854082749871726, + "grad_norm": 94301.8359375, + "learning_rate": 8.456000000000002e-06, + "loss": 310161.975, + "step": 42280 + }, + { + "epoch": 0.08542847561985642, + "grad_norm": 15885.2685546875, + "learning_rate": 8.458000000000001e-06, + "loss": 145320.0875, + "step": 42290 + }, + { + "epoch": 0.08544867625254023, + "grad_norm": 272489.46875, + "learning_rate": 8.46e-06, + "loss": 124827.1125, + "step": 42300 + }, + { + "epoch": 0.08546887688522405, + "grad_norm": 10890.91796875, + "learning_rate": 8.462e-06, + "loss": 198459.6125, + "step": 42310 + }, + { + "epoch": 0.08548907751790787, + "grad_norm": 149363.53125, + "learning_rate": 8.464e-06, + "loss": 94640.425, + "step": 42320 + }, + { + "epoch": 0.08550927815059167, + "grad_norm": 3603.788818359375, + "learning_rate": 8.466000000000002e-06, + "loss": 172633.975, + "step": 42330 + }, + { + "epoch": 0.08552947878327549, + "grad_norm": 203113.375, + "learning_rate": 8.468000000000001e-06, + "loss": 81614.425, + "step": 42340 + }, + { + "epoch": 0.08554967941595931, + "grad_norm": 21790.220703125, + "learning_rate": 8.47e-06, + "loss": 137076.8375, + "step": 42350 + }, + { + "epoch": 0.08556988004864312, + "grad_norm": 4690.8056640625, + "learning_rate": 8.472e-06, + "loss": 46016.5938, + "step": 42360 + }, + { + "epoch": 0.08559008068132694, + "grad_norm": 96999.7578125, + "learning_rate": 8.474e-06, + "loss": 224116.775, + "step": 42370 + }, + { + "epoch": 0.08561028131401076, + "grad_norm": 179597.09375, + "learning_rate": 8.476000000000002e-06, + "loss": 152028.55, + "step": 42380 + }, + { + "epoch": 0.08563048194669456, + "grad_norm": 139642.46875, + "learning_rate": 8.478e-06, + "loss": 280752.825, + "step": 42390 + }, + { + "epoch": 0.08565068257937838, + "grad_norm": 697479.125, + "learning_rate": 8.48e-06, + "loss": 185088.9625, + "step": 42400 + }, + { + "epoch": 0.0856708832120622, + "grad_norm": 128513.8828125, + "learning_rate": 8.482e-06, + "loss": 240587.475, + "step": 42410 + }, + { + "epoch": 0.08569108384474602, + "grad_norm": 94420.5625, + "learning_rate": 8.484e-06, + "loss": 181572.675, + "step": 42420 + }, + { + "epoch": 0.08571128447742983, + "grad_norm": 17213.947265625, + "learning_rate": 8.486000000000001e-06, + "loss": 120635.5, + "step": 42430 + }, + { + "epoch": 0.08573148511011365, + "grad_norm": 1773062.375, + "learning_rate": 8.488e-06, + "loss": 333293.6, + "step": 42440 + }, + { + "epoch": 0.08575168574279747, + "grad_norm": 275433.5625, + "learning_rate": 8.49e-06, + "loss": 242711.75, + "step": 42450 + }, + { + "epoch": 0.08577188637548128, + "grad_norm": 459157.65625, + "learning_rate": 8.492000000000001e-06, + "loss": 161562.8, + "step": 42460 + }, + { + "epoch": 0.0857920870081651, + "grad_norm": 148252.3125, + "learning_rate": 8.494e-06, + "loss": 86060.525, + "step": 42470 + }, + { + "epoch": 0.08581228764084892, + "grad_norm": 2768.928955078125, + "learning_rate": 8.496000000000001e-06, + "loss": 165544.925, + "step": 42480 + }, + { + "epoch": 0.08583248827353272, + "grad_norm": 28708.0390625, + "learning_rate": 8.498e-06, + "loss": 106009.2, + "step": 42490 + }, + { + "epoch": 0.08585268890621654, + "grad_norm": 191583.53125, + "learning_rate": 8.5e-06, + "loss": 196739.8625, + "step": 42500 + }, + { + "epoch": 0.08587288953890036, + "grad_norm": 26091.478515625, + "learning_rate": 8.502000000000001e-06, + "loss": 181564.925, + "step": 42510 + }, + { + "epoch": 0.08589309017158417, + "grad_norm": 78750.8359375, + "learning_rate": 8.504000000000002e-06, + "loss": 161761.5125, + "step": 42520 + }, + { + "epoch": 0.08591329080426799, + "grad_norm": 45284.6875, + "learning_rate": 8.506000000000001e-06, + "loss": 77543.5188, + "step": 42530 + }, + { + "epoch": 0.08593349143695181, + "grad_norm": 913553.5, + "learning_rate": 8.508e-06, + "loss": 209413.6, + "step": 42540 + }, + { + "epoch": 0.08595369206963561, + "grad_norm": 57759.9296875, + "learning_rate": 8.51e-06, + "loss": 214606.85, + "step": 42550 + }, + { + "epoch": 0.08597389270231943, + "grad_norm": 553983.6875, + "learning_rate": 8.512e-06, + "loss": 157516.8, + "step": 42560 + }, + { + "epoch": 0.08599409333500325, + "grad_norm": 124097.828125, + "learning_rate": 8.514000000000002e-06, + "loss": 424932.55, + "step": 42570 + }, + { + "epoch": 0.08601429396768706, + "grad_norm": 33649.19921875, + "learning_rate": 8.516000000000001e-06, + "loss": 349332.325, + "step": 42580 + }, + { + "epoch": 0.08603449460037088, + "grad_norm": 0.0, + "learning_rate": 8.518e-06, + "loss": 128330.3625, + "step": 42590 + }, + { + "epoch": 0.0860546952330547, + "grad_norm": 28074.46484375, + "learning_rate": 8.52e-06, + "loss": 115228.0, + "step": 42600 + }, + { + "epoch": 0.08607489586573852, + "grad_norm": 46956.2890625, + "learning_rate": 8.522e-06, + "loss": 73478.2437, + "step": 42610 + }, + { + "epoch": 0.08609509649842233, + "grad_norm": 232473.5, + "learning_rate": 8.524000000000002e-06, + "loss": 186564.3, + "step": 42620 + }, + { + "epoch": 0.08611529713110615, + "grad_norm": 354663.5625, + "learning_rate": 8.526000000000001e-06, + "loss": 106127.15, + "step": 42630 + }, + { + "epoch": 0.08613549776378997, + "grad_norm": 86057.2109375, + "learning_rate": 8.528e-06, + "loss": 133719.7875, + "step": 42640 + }, + { + "epoch": 0.08615569839647377, + "grad_norm": 76530.7265625, + "learning_rate": 8.530000000000001e-06, + "loss": 115319.0875, + "step": 42650 + }, + { + "epoch": 0.0861758990291576, + "grad_norm": 140594.015625, + "learning_rate": 8.532e-06, + "loss": 182740.675, + "step": 42660 + }, + { + "epoch": 0.08619609966184141, + "grad_norm": 170850.21875, + "learning_rate": 8.534000000000002e-06, + "loss": 103786.2063, + "step": 42670 + }, + { + "epoch": 0.08621630029452522, + "grad_norm": 106443.8984375, + "learning_rate": 8.536000000000001e-06, + "loss": 250918.05, + "step": 42680 + }, + { + "epoch": 0.08623650092720904, + "grad_norm": 103829.984375, + "learning_rate": 8.538e-06, + "loss": 88497.2688, + "step": 42690 + }, + { + "epoch": 0.08625670155989286, + "grad_norm": 49956.5625, + "learning_rate": 8.540000000000001e-06, + "loss": 158740.3125, + "step": 42700 + }, + { + "epoch": 0.08627690219257667, + "grad_norm": 8709.796875, + "learning_rate": 8.542e-06, + "loss": 188464.6125, + "step": 42710 + }, + { + "epoch": 0.08629710282526049, + "grad_norm": 567882.4375, + "learning_rate": 8.544000000000002e-06, + "loss": 182399.325, + "step": 42720 + }, + { + "epoch": 0.0863173034579443, + "grad_norm": 109120.984375, + "learning_rate": 8.546000000000001e-06, + "loss": 100554.75, + "step": 42730 + }, + { + "epoch": 0.08633750409062811, + "grad_norm": 119009.6171875, + "learning_rate": 8.548e-06, + "loss": 71045.2937, + "step": 42740 + }, + { + "epoch": 0.08635770472331193, + "grad_norm": 59183.66015625, + "learning_rate": 8.550000000000001e-06, + "loss": 45389.5219, + "step": 42750 + }, + { + "epoch": 0.08637790535599575, + "grad_norm": 146083.28125, + "learning_rate": 8.552e-06, + "loss": 80405.1625, + "step": 42760 + }, + { + "epoch": 0.08639810598867957, + "grad_norm": 141413.484375, + "learning_rate": 8.554000000000001e-06, + "loss": 157622.0875, + "step": 42770 + }, + { + "epoch": 0.08641830662136338, + "grad_norm": 455057.4375, + "learning_rate": 8.556e-06, + "loss": 224516.975, + "step": 42780 + }, + { + "epoch": 0.0864385072540472, + "grad_norm": 40610.24609375, + "learning_rate": 8.558e-06, + "loss": 62346.2312, + "step": 42790 + }, + { + "epoch": 0.08645870788673102, + "grad_norm": 481423.65625, + "learning_rate": 8.560000000000001e-06, + "loss": 221731.7, + "step": 42800 + }, + { + "epoch": 0.08647890851941482, + "grad_norm": 90770.09375, + "learning_rate": 8.562e-06, + "loss": 148746.6, + "step": 42810 + }, + { + "epoch": 0.08649910915209864, + "grad_norm": 20233.673828125, + "learning_rate": 8.564000000000001e-06, + "loss": 77635.5312, + "step": 42820 + }, + { + "epoch": 0.08651930978478246, + "grad_norm": 103954.3828125, + "learning_rate": 8.566e-06, + "loss": 299431.8, + "step": 42830 + }, + { + "epoch": 0.08653951041746627, + "grad_norm": 37999.61328125, + "learning_rate": 8.568e-06, + "loss": 159116.775, + "step": 42840 + }, + { + "epoch": 0.08655971105015009, + "grad_norm": 357951.0, + "learning_rate": 8.570000000000001e-06, + "loss": 208884.85, + "step": 42850 + }, + { + "epoch": 0.08657991168283391, + "grad_norm": 215850.65625, + "learning_rate": 8.572e-06, + "loss": 78474.625, + "step": 42860 + }, + { + "epoch": 0.08660011231551772, + "grad_norm": 223757.8125, + "learning_rate": 8.574000000000001e-06, + "loss": 202084.275, + "step": 42870 + }, + { + "epoch": 0.08662031294820154, + "grad_norm": 716058.3125, + "learning_rate": 8.576e-06, + "loss": 266100.4, + "step": 42880 + }, + { + "epoch": 0.08664051358088536, + "grad_norm": 28140.568359375, + "learning_rate": 8.578000000000002e-06, + "loss": 122455.55, + "step": 42890 + }, + { + "epoch": 0.08666071421356916, + "grad_norm": 49042.74609375, + "learning_rate": 8.580000000000001e-06, + "loss": 301027.55, + "step": 42900 + }, + { + "epoch": 0.08668091484625298, + "grad_norm": 30291.162109375, + "learning_rate": 8.582e-06, + "loss": 80691.6062, + "step": 42910 + }, + { + "epoch": 0.0867011154789368, + "grad_norm": 56629.68359375, + "learning_rate": 8.584000000000001e-06, + "loss": 165791.9625, + "step": 42920 + }, + { + "epoch": 0.08672131611162062, + "grad_norm": 314737.625, + "learning_rate": 8.586e-06, + "loss": 113553.025, + "step": 42930 + }, + { + "epoch": 0.08674151674430443, + "grad_norm": 790156.5625, + "learning_rate": 8.588000000000001e-06, + "loss": 244807.75, + "step": 42940 + }, + { + "epoch": 0.08676171737698825, + "grad_norm": 217319.203125, + "learning_rate": 8.59e-06, + "loss": 77536.225, + "step": 42950 + }, + { + "epoch": 0.08678191800967207, + "grad_norm": 4426.3076171875, + "learning_rate": 8.592e-06, + "loss": 55616.75, + "step": 42960 + }, + { + "epoch": 0.08680211864235587, + "grad_norm": 6592.40869140625, + "learning_rate": 8.594000000000001e-06, + "loss": 176201.7375, + "step": 42970 + }, + { + "epoch": 0.0868223192750397, + "grad_norm": 18283.97265625, + "learning_rate": 8.596e-06, + "loss": 273803.65, + "step": 42980 + }, + { + "epoch": 0.08684251990772351, + "grad_norm": 148878.78125, + "learning_rate": 8.598000000000001e-06, + "loss": 79581.925, + "step": 42990 + }, + { + "epoch": 0.08686272054040732, + "grad_norm": 21906.869140625, + "learning_rate": 8.6e-06, + "loss": 188059.525, + "step": 43000 + }, + { + "epoch": 0.08688292117309114, + "grad_norm": 13347.212890625, + "learning_rate": 8.602e-06, + "loss": 67666.6062, + "step": 43010 + }, + { + "epoch": 0.08690312180577496, + "grad_norm": 8322.4736328125, + "learning_rate": 8.604000000000001e-06, + "loss": 202827.175, + "step": 43020 + }, + { + "epoch": 0.08692332243845877, + "grad_norm": 11868.388671875, + "learning_rate": 8.606e-06, + "loss": 307715.675, + "step": 43030 + }, + { + "epoch": 0.08694352307114259, + "grad_norm": 15326.095703125, + "learning_rate": 8.608000000000001e-06, + "loss": 179379.2125, + "step": 43040 + }, + { + "epoch": 0.0869637237038264, + "grad_norm": 188514.90625, + "learning_rate": 8.61e-06, + "loss": 166349.225, + "step": 43050 + }, + { + "epoch": 0.08698392433651021, + "grad_norm": 6181.10107421875, + "learning_rate": 8.612e-06, + "loss": 289272.975, + "step": 43060 + }, + { + "epoch": 0.08700412496919403, + "grad_norm": 14502.6015625, + "learning_rate": 8.614000000000001e-06, + "loss": 79772.1125, + "step": 43070 + }, + { + "epoch": 0.08702432560187785, + "grad_norm": 764729.5625, + "learning_rate": 8.616000000000002e-06, + "loss": 142998.175, + "step": 43080 + }, + { + "epoch": 0.08704452623456167, + "grad_norm": 77820.0078125, + "learning_rate": 8.618000000000001e-06, + "loss": 168257.675, + "step": 43090 + }, + { + "epoch": 0.08706472686724548, + "grad_norm": 35942.71875, + "learning_rate": 8.62e-06, + "loss": 233049.575, + "step": 43100 + }, + { + "epoch": 0.0870849274999293, + "grad_norm": 18713.017578125, + "learning_rate": 8.622e-06, + "loss": 230853.5, + "step": 43110 + }, + { + "epoch": 0.08710512813261312, + "grad_norm": 609409.1875, + "learning_rate": 8.624e-06, + "loss": 130916.1625, + "step": 43120 + }, + { + "epoch": 0.08712532876529692, + "grad_norm": 3072.981201171875, + "learning_rate": 8.626000000000002e-06, + "loss": 98637.575, + "step": 43130 + }, + { + "epoch": 0.08714552939798074, + "grad_norm": 5730.93310546875, + "learning_rate": 8.628000000000001e-06, + "loss": 109204.5125, + "step": 43140 + }, + { + "epoch": 0.08716573003066456, + "grad_norm": 385227.1875, + "learning_rate": 8.63e-06, + "loss": 187699.9, + "step": 43150 + }, + { + "epoch": 0.08718593066334837, + "grad_norm": 392.2604064941406, + "learning_rate": 8.632e-06, + "loss": 83697.3438, + "step": 43160 + }, + { + "epoch": 0.08720613129603219, + "grad_norm": 327264.0, + "learning_rate": 8.634e-06, + "loss": 104081.3438, + "step": 43170 + }, + { + "epoch": 0.08722633192871601, + "grad_norm": 11551.8857421875, + "learning_rate": 8.636000000000002e-06, + "loss": 208855.75, + "step": 43180 + }, + { + "epoch": 0.08724653256139982, + "grad_norm": 128026.875, + "learning_rate": 8.638000000000001e-06, + "loss": 192608.275, + "step": 43190 + }, + { + "epoch": 0.08726673319408364, + "grad_norm": 151795.421875, + "learning_rate": 8.64e-06, + "loss": 145717.3125, + "step": 43200 + }, + { + "epoch": 0.08728693382676746, + "grad_norm": 728680.9375, + "learning_rate": 8.642e-06, + "loss": 154031.9375, + "step": 43210 + }, + { + "epoch": 0.08730713445945126, + "grad_norm": 37628.41796875, + "learning_rate": 8.644e-06, + "loss": 139283.25, + "step": 43220 + }, + { + "epoch": 0.08732733509213508, + "grad_norm": 99069.3203125, + "learning_rate": 8.646000000000002e-06, + "loss": 92072.8313, + "step": 43230 + }, + { + "epoch": 0.0873475357248189, + "grad_norm": 149195.46875, + "learning_rate": 8.648000000000001e-06, + "loss": 151376.0, + "step": 43240 + }, + { + "epoch": 0.08736773635750272, + "grad_norm": 99039.2578125, + "learning_rate": 8.65e-06, + "loss": 80985.2312, + "step": 43250 + }, + { + "epoch": 0.08738793699018653, + "grad_norm": 13131.80078125, + "learning_rate": 8.652000000000001e-06, + "loss": 316986.425, + "step": 43260 + }, + { + "epoch": 0.08740813762287035, + "grad_norm": 61747.85546875, + "learning_rate": 8.654e-06, + "loss": 140412.6375, + "step": 43270 + }, + { + "epoch": 0.08742833825555417, + "grad_norm": 85864.7734375, + "learning_rate": 8.656000000000001e-06, + "loss": 130576.5375, + "step": 43280 + }, + { + "epoch": 0.08744853888823798, + "grad_norm": 8174.59619140625, + "learning_rate": 8.658e-06, + "loss": 266348.975, + "step": 43290 + }, + { + "epoch": 0.0874687395209218, + "grad_norm": 55818.66015625, + "learning_rate": 8.66e-06, + "loss": 313990.8, + "step": 43300 + }, + { + "epoch": 0.08748894015360562, + "grad_norm": 367297.375, + "learning_rate": 8.662000000000001e-06, + "loss": 140453.075, + "step": 43310 + }, + { + "epoch": 0.08750914078628942, + "grad_norm": 168458.546875, + "learning_rate": 8.664e-06, + "loss": 87272.2875, + "step": 43320 + }, + { + "epoch": 0.08752934141897324, + "grad_norm": 6858.38720703125, + "learning_rate": 8.666000000000001e-06, + "loss": 70151.9688, + "step": 43330 + }, + { + "epoch": 0.08754954205165706, + "grad_norm": 185097.125, + "learning_rate": 8.668e-06, + "loss": 185952.55, + "step": 43340 + }, + { + "epoch": 0.08756974268434087, + "grad_norm": 34221.51953125, + "learning_rate": 8.67e-06, + "loss": 126423.975, + "step": 43350 + }, + { + "epoch": 0.08758994331702469, + "grad_norm": 4629.17529296875, + "learning_rate": 8.672000000000001e-06, + "loss": 81385.7625, + "step": 43360 + }, + { + "epoch": 0.08761014394970851, + "grad_norm": 136810.109375, + "learning_rate": 8.674e-06, + "loss": 175485.45, + "step": 43370 + }, + { + "epoch": 0.08763034458239231, + "grad_norm": 66792.8515625, + "learning_rate": 8.676000000000001e-06, + "loss": 196494.175, + "step": 43380 + }, + { + "epoch": 0.08765054521507613, + "grad_norm": 26290.314453125, + "learning_rate": 8.678e-06, + "loss": 280816.725, + "step": 43390 + }, + { + "epoch": 0.08767074584775995, + "grad_norm": 17944.140625, + "learning_rate": 8.68e-06, + "loss": 146447.9375, + "step": 43400 + }, + { + "epoch": 0.08769094648044377, + "grad_norm": 124965.7578125, + "learning_rate": 8.682000000000001e-06, + "loss": 118160.6, + "step": 43410 + }, + { + "epoch": 0.08771114711312758, + "grad_norm": 547455.5625, + "learning_rate": 8.684e-06, + "loss": 299009.4, + "step": 43420 + }, + { + "epoch": 0.0877313477458114, + "grad_norm": 34717.12890625, + "learning_rate": 8.686000000000001e-06, + "loss": 94589.0437, + "step": 43430 + }, + { + "epoch": 0.08775154837849522, + "grad_norm": 170661.484375, + "learning_rate": 8.688e-06, + "loss": 143215.0375, + "step": 43440 + }, + { + "epoch": 0.08777174901117903, + "grad_norm": 71431.8828125, + "learning_rate": 8.690000000000002e-06, + "loss": 322744.525, + "step": 43450 + }, + { + "epoch": 0.08779194964386285, + "grad_norm": 5912.609375, + "learning_rate": 8.692e-06, + "loss": 111545.9125, + "step": 43460 + }, + { + "epoch": 0.08781215027654667, + "grad_norm": 169360.5, + "learning_rate": 8.694e-06, + "loss": 41089.9938, + "step": 43470 + }, + { + "epoch": 0.08783235090923047, + "grad_norm": 534504.3125, + "learning_rate": 8.696000000000001e-06, + "loss": 138510.1, + "step": 43480 + }, + { + "epoch": 0.08785255154191429, + "grad_norm": 107176.9921875, + "learning_rate": 8.698e-06, + "loss": 105497.3125, + "step": 43490 + }, + { + "epoch": 0.08787275217459811, + "grad_norm": 217060.484375, + "learning_rate": 8.700000000000001e-06, + "loss": 81957.4625, + "step": 43500 + }, + { + "epoch": 0.08789295280728192, + "grad_norm": 105210.8984375, + "learning_rate": 8.702e-06, + "loss": 131869.2375, + "step": 43510 + }, + { + "epoch": 0.08791315343996574, + "grad_norm": 211744.140625, + "learning_rate": 8.704e-06, + "loss": 168912.4625, + "step": 43520 + }, + { + "epoch": 0.08793335407264956, + "grad_norm": 19119.44140625, + "learning_rate": 8.706000000000001e-06, + "loss": 229787.975, + "step": 43530 + }, + { + "epoch": 0.08795355470533336, + "grad_norm": 4076.060302734375, + "learning_rate": 8.708e-06, + "loss": 106030.65, + "step": 43540 + }, + { + "epoch": 0.08797375533801718, + "grad_norm": 107903.40625, + "learning_rate": 8.710000000000001e-06, + "loss": 382987.275, + "step": 43550 + }, + { + "epoch": 0.087993955970701, + "grad_norm": 32943.484375, + "learning_rate": 8.712e-06, + "loss": 129558.875, + "step": 43560 + }, + { + "epoch": 0.08801415660338482, + "grad_norm": 81097.5546875, + "learning_rate": 8.714e-06, + "loss": 254264.325, + "step": 43570 + }, + { + "epoch": 0.08803435723606863, + "grad_norm": 3559.42236328125, + "learning_rate": 8.716000000000001e-06, + "loss": 153125.3, + "step": 43580 + }, + { + "epoch": 0.08805455786875245, + "grad_norm": 757444.625, + "learning_rate": 8.718e-06, + "loss": 364967.575, + "step": 43590 + }, + { + "epoch": 0.08807475850143627, + "grad_norm": 160915.015625, + "learning_rate": 8.720000000000001e-06, + "loss": 316472.4, + "step": 43600 + }, + { + "epoch": 0.08809495913412008, + "grad_norm": 2076599.5, + "learning_rate": 8.722e-06, + "loss": 299072.75, + "step": 43610 + }, + { + "epoch": 0.0881151597668039, + "grad_norm": 52987.46875, + "learning_rate": 8.724e-06, + "loss": 294093.0, + "step": 43620 + }, + { + "epoch": 0.08813536039948772, + "grad_norm": 1128602.25, + "learning_rate": 8.726e-06, + "loss": 251870.55, + "step": 43630 + }, + { + "epoch": 0.08815556103217152, + "grad_norm": 3209.09765625, + "learning_rate": 8.728e-06, + "loss": 175069.6875, + "step": 43640 + }, + { + "epoch": 0.08817576166485534, + "grad_norm": 529964.875, + "learning_rate": 8.730000000000001e-06, + "loss": 118368.9125, + "step": 43650 + }, + { + "epoch": 0.08819596229753916, + "grad_norm": 532679.5, + "learning_rate": 8.732e-06, + "loss": 263694.6, + "step": 43660 + }, + { + "epoch": 0.08821616293022297, + "grad_norm": 253125.875, + "learning_rate": 8.734e-06, + "loss": 133801.925, + "step": 43670 + }, + { + "epoch": 0.08823636356290679, + "grad_norm": 16567.189453125, + "learning_rate": 8.736e-06, + "loss": 180604.2, + "step": 43680 + }, + { + "epoch": 0.08825656419559061, + "grad_norm": 250531.53125, + "learning_rate": 8.738000000000002e-06, + "loss": 58957.925, + "step": 43690 + }, + { + "epoch": 0.08827676482827441, + "grad_norm": 43087.703125, + "learning_rate": 8.740000000000001e-06, + "loss": 76188.1875, + "step": 43700 + }, + { + "epoch": 0.08829696546095823, + "grad_norm": 263834.3125, + "learning_rate": 8.742e-06, + "loss": 168263.15, + "step": 43710 + }, + { + "epoch": 0.08831716609364205, + "grad_norm": 75237.921875, + "learning_rate": 8.744e-06, + "loss": 51281.9906, + "step": 43720 + }, + { + "epoch": 0.08833736672632587, + "grad_norm": 1213.0709228515625, + "learning_rate": 8.746e-06, + "loss": 148462.525, + "step": 43730 + }, + { + "epoch": 0.08835756735900968, + "grad_norm": 21714.701171875, + "learning_rate": 8.748000000000002e-06, + "loss": 232276.475, + "step": 43740 + }, + { + "epoch": 0.0883777679916935, + "grad_norm": 7464.02783203125, + "learning_rate": 8.750000000000001e-06, + "loss": 275010.225, + "step": 43750 + }, + { + "epoch": 0.08839796862437732, + "grad_norm": 60013.97265625, + "learning_rate": 8.752e-06, + "loss": 176379.9625, + "step": 43760 + }, + { + "epoch": 0.08841816925706113, + "grad_norm": 6596.6484375, + "learning_rate": 8.754e-06, + "loss": 90849.4688, + "step": 43770 + }, + { + "epoch": 0.08843836988974495, + "grad_norm": 114075.09375, + "learning_rate": 8.756e-06, + "loss": 113582.4125, + "step": 43780 + }, + { + "epoch": 0.08845857052242877, + "grad_norm": 218882.703125, + "learning_rate": 8.758000000000002e-06, + "loss": 107363.05, + "step": 43790 + }, + { + "epoch": 0.08847877115511257, + "grad_norm": 20439.849609375, + "learning_rate": 8.76e-06, + "loss": 265003.65, + "step": 43800 + }, + { + "epoch": 0.0884989717877964, + "grad_norm": 69680.515625, + "learning_rate": 8.762e-06, + "loss": 252661.45, + "step": 43810 + }, + { + "epoch": 0.08851917242048021, + "grad_norm": 161883.265625, + "learning_rate": 8.764e-06, + "loss": 151616.8875, + "step": 43820 + }, + { + "epoch": 0.08853937305316402, + "grad_norm": 3299.712890625, + "learning_rate": 8.766e-06, + "loss": 101429.6375, + "step": 43830 + }, + { + "epoch": 0.08855957368584784, + "grad_norm": 229593.3125, + "learning_rate": 8.768000000000001e-06, + "loss": 240104.675, + "step": 43840 + }, + { + "epoch": 0.08857977431853166, + "grad_norm": 321192.8125, + "learning_rate": 8.77e-06, + "loss": 224547.175, + "step": 43850 + }, + { + "epoch": 0.08859997495121547, + "grad_norm": 671641.875, + "learning_rate": 8.772e-06, + "loss": 154220.3, + "step": 43860 + }, + { + "epoch": 0.08862017558389929, + "grad_norm": 384957.15625, + "learning_rate": 8.774000000000001e-06, + "loss": 217583.65, + "step": 43870 + }, + { + "epoch": 0.0886403762165831, + "grad_norm": 11702.458984375, + "learning_rate": 8.776e-06, + "loss": 224651.525, + "step": 43880 + }, + { + "epoch": 0.08866057684926693, + "grad_norm": 306709.875, + "learning_rate": 8.778000000000001e-06, + "loss": 64379.9313, + "step": 43890 + }, + { + "epoch": 0.08868077748195073, + "grad_norm": 17183.322265625, + "learning_rate": 8.78e-06, + "loss": 176895.425, + "step": 43900 + }, + { + "epoch": 0.08870097811463455, + "grad_norm": 215134.078125, + "learning_rate": 8.782e-06, + "loss": 248202.075, + "step": 43910 + }, + { + "epoch": 0.08872117874731837, + "grad_norm": 106246.953125, + "learning_rate": 8.784000000000001e-06, + "loss": 163154.4125, + "step": 43920 + }, + { + "epoch": 0.08874137938000218, + "grad_norm": 22083.943359375, + "learning_rate": 8.786000000000002e-06, + "loss": 141695.05, + "step": 43930 + }, + { + "epoch": 0.088761580012686, + "grad_norm": 24097.634765625, + "learning_rate": 8.788000000000001e-06, + "loss": 208170.575, + "step": 43940 + }, + { + "epoch": 0.08878178064536982, + "grad_norm": 340077.21875, + "learning_rate": 8.79e-06, + "loss": 274420.125, + "step": 43950 + }, + { + "epoch": 0.08880198127805362, + "grad_norm": 49524.80859375, + "learning_rate": 8.792e-06, + "loss": 129774.9375, + "step": 43960 + }, + { + "epoch": 0.08882218191073744, + "grad_norm": 379995.6875, + "learning_rate": 8.794e-06, + "loss": 147115.4375, + "step": 43970 + }, + { + "epoch": 0.08884238254342126, + "grad_norm": 1349864.375, + "learning_rate": 8.796000000000002e-06, + "loss": 265522.95, + "step": 43980 + }, + { + "epoch": 0.08886258317610507, + "grad_norm": 18853.814453125, + "learning_rate": 8.798000000000001e-06, + "loss": 118606.175, + "step": 43990 + }, + { + "epoch": 0.08888278380878889, + "grad_norm": 20354.423828125, + "learning_rate": 8.8e-06, + "loss": 215314.15, + "step": 44000 + }, + { + "epoch": 0.08890298444147271, + "grad_norm": 48513.890625, + "learning_rate": 8.802e-06, + "loss": 108670.075, + "step": 44010 + }, + { + "epoch": 0.08892318507415652, + "grad_norm": 30581.103515625, + "learning_rate": 8.804e-06, + "loss": 127432.35, + "step": 44020 + }, + { + "epoch": 0.08894338570684034, + "grad_norm": 60489.671875, + "learning_rate": 8.806000000000002e-06, + "loss": 51465.1937, + "step": 44030 + }, + { + "epoch": 0.08896358633952416, + "grad_norm": 174142.828125, + "learning_rate": 8.808000000000001e-06, + "loss": 349783.175, + "step": 44040 + }, + { + "epoch": 0.08898378697220798, + "grad_norm": 51520.7734375, + "learning_rate": 8.81e-06, + "loss": 142285.8375, + "step": 44050 + }, + { + "epoch": 0.08900398760489178, + "grad_norm": 37896.03515625, + "learning_rate": 8.812000000000001e-06, + "loss": 358085.0, + "step": 44060 + }, + { + "epoch": 0.0890241882375756, + "grad_norm": 37899.36328125, + "learning_rate": 8.814e-06, + "loss": 164210.6, + "step": 44070 + }, + { + "epoch": 0.08904438887025942, + "grad_norm": 24106.59765625, + "learning_rate": 8.816000000000002e-06, + "loss": 99470.3438, + "step": 44080 + }, + { + "epoch": 0.08906458950294323, + "grad_norm": 18216.697265625, + "learning_rate": 8.818000000000001e-06, + "loss": 74094.2188, + "step": 44090 + }, + { + "epoch": 0.08908479013562705, + "grad_norm": 304592.1875, + "learning_rate": 8.82e-06, + "loss": 106480.125, + "step": 44100 + }, + { + "epoch": 0.08910499076831087, + "grad_norm": 248727.859375, + "learning_rate": 8.822000000000001e-06, + "loss": 228864.5, + "step": 44110 + }, + { + "epoch": 0.08912519140099467, + "grad_norm": 99766.53125, + "learning_rate": 8.824e-06, + "loss": 185317.675, + "step": 44120 + }, + { + "epoch": 0.0891453920336785, + "grad_norm": 17547.482421875, + "learning_rate": 8.826000000000002e-06, + "loss": 130834.3875, + "step": 44130 + }, + { + "epoch": 0.08916559266636231, + "grad_norm": 149051.96875, + "learning_rate": 8.828000000000001e-06, + "loss": 152576.525, + "step": 44140 + }, + { + "epoch": 0.08918579329904612, + "grad_norm": 11563.728515625, + "learning_rate": 8.83e-06, + "loss": 120170.5875, + "step": 44150 + }, + { + "epoch": 0.08920599393172994, + "grad_norm": 2054572.375, + "learning_rate": 8.832000000000001e-06, + "loss": 459115.1, + "step": 44160 + }, + { + "epoch": 0.08922619456441376, + "grad_norm": 511010.34375, + "learning_rate": 8.834e-06, + "loss": 266784.3, + "step": 44170 + }, + { + "epoch": 0.08924639519709757, + "grad_norm": 1692975.0, + "learning_rate": 8.836000000000001e-06, + "loss": 248525.65, + "step": 44180 + }, + { + "epoch": 0.08926659582978139, + "grad_norm": 519141.53125, + "learning_rate": 8.838e-06, + "loss": 325842.4, + "step": 44190 + }, + { + "epoch": 0.0892867964624652, + "grad_norm": 18059.302734375, + "learning_rate": 8.84e-06, + "loss": 166167.375, + "step": 44200 + }, + { + "epoch": 0.08930699709514903, + "grad_norm": 129143.4921875, + "learning_rate": 8.842000000000001e-06, + "loss": 139514.8625, + "step": 44210 + }, + { + "epoch": 0.08932719772783283, + "grad_norm": 117828.84375, + "learning_rate": 8.844e-06, + "loss": 104329.7875, + "step": 44220 + }, + { + "epoch": 0.08934739836051665, + "grad_norm": 63602.73046875, + "learning_rate": 8.846000000000001e-06, + "loss": 77642.8938, + "step": 44230 + }, + { + "epoch": 0.08936759899320047, + "grad_norm": 69924.7421875, + "learning_rate": 8.848e-06, + "loss": 171672.9375, + "step": 44240 + }, + { + "epoch": 0.08938779962588428, + "grad_norm": 27802.658203125, + "learning_rate": 8.85e-06, + "loss": 286395.05, + "step": 44250 + }, + { + "epoch": 0.0894080002585681, + "grad_norm": 404906.0625, + "learning_rate": 8.852000000000001e-06, + "loss": 140470.6, + "step": 44260 + }, + { + "epoch": 0.08942820089125192, + "grad_norm": 245143.25, + "learning_rate": 8.854e-06, + "loss": 223797.425, + "step": 44270 + }, + { + "epoch": 0.08944840152393572, + "grad_norm": 225159.625, + "learning_rate": 8.856000000000001e-06, + "loss": 63743.225, + "step": 44280 + }, + { + "epoch": 0.08946860215661954, + "grad_norm": 650148.625, + "learning_rate": 8.858e-06, + "loss": 148739.2, + "step": 44290 + }, + { + "epoch": 0.08948880278930336, + "grad_norm": 133915.078125, + "learning_rate": 8.860000000000002e-06, + "loss": 195467.3, + "step": 44300 + }, + { + "epoch": 0.08950900342198717, + "grad_norm": 8957.40234375, + "learning_rate": 8.862000000000001e-06, + "loss": 55974.9875, + "step": 44310 + }, + { + "epoch": 0.08952920405467099, + "grad_norm": 25993.4453125, + "learning_rate": 8.864e-06, + "loss": 116647.925, + "step": 44320 + }, + { + "epoch": 0.08954940468735481, + "grad_norm": 100630.5390625, + "learning_rate": 8.866000000000001e-06, + "loss": 131935.95, + "step": 44330 + }, + { + "epoch": 0.08956960532003862, + "grad_norm": 101326.7421875, + "learning_rate": 8.868e-06, + "loss": 106368.3, + "step": 44340 + }, + { + "epoch": 0.08958980595272244, + "grad_norm": 14248.1005859375, + "learning_rate": 8.870000000000001e-06, + "loss": 200625.1375, + "step": 44350 + }, + { + "epoch": 0.08961000658540626, + "grad_norm": 118004.171875, + "learning_rate": 8.872e-06, + "loss": 80358.2125, + "step": 44360 + }, + { + "epoch": 0.08963020721809008, + "grad_norm": 108674.90625, + "learning_rate": 8.874e-06, + "loss": 73033.7375, + "step": 44370 + }, + { + "epoch": 0.08965040785077388, + "grad_norm": 129342.6015625, + "learning_rate": 8.876e-06, + "loss": 340908.525, + "step": 44380 + }, + { + "epoch": 0.0896706084834577, + "grad_norm": 51677.390625, + "learning_rate": 8.878e-06, + "loss": 181908.1, + "step": 44390 + }, + { + "epoch": 0.08969080911614152, + "grad_norm": 72358.3125, + "learning_rate": 8.880000000000001e-06, + "loss": 127481.9875, + "step": 44400 + }, + { + "epoch": 0.08971100974882533, + "grad_norm": 114049.625, + "learning_rate": 8.882e-06, + "loss": 117809.7625, + "step": 44410 + }, + { + "epoch": 0.08973121038150915, + "grad_norm": 94732.3359375, + "learning_rate": 8.884e-06, + "loss": 238827.0, + "step": 44420 + }, + { + "epoch": 0.08975141101419297, + "grad_norm": 220088.09375, + "learning_rate": 8.886000000000001e-06, + "loss": 159669.7875, + "step": 44430 + }, + { + "epoch": 0.08977161164687678, + "grad_norm": 11599.44921875, + "learning_rate": 8.888e-06, + "loss": 250880.275, + "step": 44440 + }, + { + "epoch": 0.0897918122795606, + "grad_norm": 85075.4609375, + "learning_rate": 8.890000000000001e-06, + "loss": 191590.9, + "step": 44450 + }, + { + "epoch": 0.08981201291224442, + "grad_norm": 43727.0390625, + "learning_rate": 8.892e-06, + "loss": 78882.875, + "step": 44460 + }, + { + "epoch": 0.08983221354492822, + "grad_norm": 6057.24658203125, + "learning_rate": 8.894e-06, + "loss": 226639.05, + "step": 44470 + }, + { + "epoch": 0.08985241417761204, + "grad_norm": 5800.24609375, + "learning_rate": 8.896000000000001e-06, + "loss": 200304.125, + "step": 44480 + }, + { + "epoch": 0.08987261481029586, + "grad_norm": 976956.0, + "learning_rate": 8.898000000000002e-06, + "loss": 226518.55, + "step": 44490 + }, + { + "epoch": 0.08989281544297967, + "grad_norm": 45734.23828125, + "learning_rate": 8.900000000000001e-06, + "loss": 130144.1, + "step": 44500 + }, + { + "epoch": 0.08991301607566349, + "grad_norm": 9471.9208984375, + "learning_rate": 8.902e-06, + "loss": 116121.1125, + "step": 44510 + }, + { + "epoch": 0.08993321670834731, + "grad_norm": 76104.4296875, + "learning_rate": 8.904e-06, + "loss": 135214.4, + "step": 44520 + }, + { + "epoch": 0.08995341734103113, + "grad_norm": 592741.3125, + "learning_rate": 8.906e-06, + "loss": 111755.1375, + "step": 44530 + }, + { + "epoch": 0.08997361797371493, + "grad_norm": 3589.349609375, + "learning_rate": 8.908000000000002e-06, + "loss": 285193.025, + "step": 44540 + }, + { + "epoch": 0.08999381860639875, + "grad_norm": 308442.875, + "learning_rate": 8.910000000000001e-06, + "loss": 138859.55, + "step": 44550 + }, + { + "epoch": 0.09001401923908257, + "grad_norm": 184345.453125, + "learning_rate": 8.912e-06, + "loss": 48390.9094, + "step": 44560 + }, + { + "epoch": 0.09003421987176638, + "grad_norm": 4071.212646484375, + "learning_rate": 8.914e-06, + "loss": 104030.0813, + "step": 44570 + }, + { + "epoch": 0.0900544205044502, + "grad_norm": 13201.0703125, + "learning_rate": 8.916e-06, + "loss": 166452.8625, + "step": 44580 + }, + { + "epoch": 0.09007462113713402, + "grad_norm": 46102.12109375, + "learning_rate": 8.918000000000002e-06, + "loss": 308293.425, + "step": 44590 + }, + { + "epoch": 0.09009482176981783, + "grad_norm": 43089.04296875, + "learning_rate": 8.920000000000001e-06, + "loss": 233410.875, + "step": 44600 + }, + { + "epoch": 0.09011502240250165, + "grad_norm": 1068750.125, + "learning_rate": 8.922e-06, + "loss": 189655.675, + "step": 44610 + }, + { + "epoch": 0.09013522303518547, + "grad_norm": 271306.03125, + "learning_rate": 8.924e-06, + "loss": 79662.0125, + "step": 44620 + }, + { + "epoch": 0.09015542366786927, + "grad_norm": 52782.2734375, + "learning_rate": 8.926e-06, + "loss": 181962.2, + "step": 44630 + }, + { + "epoch": 0.09017562430055309, + "grad_norm": 15377.552734375, + "learning_rate": 8.928000000000002e-06, + "loss": 171862.45, + "step": 44640 + }, + { + "epoch": 0.09019582493323691, + "grad_norm": 785791.625, + "learning_rate": 8.930000000000001e-06, + "loss": 204644.3125, + "step": 44650 + }, + { + "epoch": 0.09021602556592072, + "grad_norm": 15292.1845703125, + "learning_rate": 8.932e-06, + "loss": 136566.2, + "step": 44660 + }, + { + "epoch": 0.09023622619860454, + "grad_norm": 254018.015625, + "learning_rate": 8.934000000000001e-06, + "loss": 110981.6, + "step": 44670 + }, + { + "epoch": 0.09025642683128836, + "grad_norm": 791506.9375, + "learning_rate": 8.936e-06, + "loss": 217085.85, + "step": 44680 + }, + { + "epoch": 0.09027662746397218, + "grad_norm": 126336.703125, + "learning_rate": 8.938000000000001e-06, + "loss": 158559.35, + "step": 44690 + }, + { + "epoch": 0.09029682809665598, + "grad_norm": 33440.0078125, + "learning_rate": 8.94e-06, + "loss": 88841.0, + "step": 44700 + }, + { + "epoch": 0.0903170287293398, + "grad_norm": 11282.3955078125, + "learning_rate": 8.942e-06, + "loss": 76681.5188, + "step": 44710 + }, + { + "epoch": 0.09033722936202362, + "grad_norm": 115027.2890625, + "learning_rate": 8.944000000000001e-06, + "loss": 69828.975, + "step": 44720 + }, + { + "epoch": 0.09035742999470743, + "grad_norm": 331672.28125, + "learning_rate": 8.946e-06, + "loss": 247084.55, + "step": 44730 + }, + { + "epoch": 0.09037763062739125, + "grad_norm": 339772.65625, + "learning_rate": 8.948000000000001e-06, + "loss": 127411.925, + "step": 44740 + }, + { + "epoch": 0.09039783126007507, + "grad_norm": 37624.25, + "learning_rate": 8.95e-06, + "loss": 185821.5, + "step": 44750 + }, + { + "epoch": 0.09041803189275888, + "grad_norm": 285074.90625, + "learning_rate": 8.952e-06, + "loss": 112028.2, + "step": 44760 + }, + { + "epoch": 0.0904382325254427, + "grad_norm": 88330.234375, + "learning_rate": 8.954000000000001e-06, + "loss": 160742.375, + "step": 44770 + }, + { + "epoch": 0.09045843315812652, + "grad_norm": 274308.84375, + "learning_rate": 8.956e-06, + "loss": 221628.45, + "step": 44780 + }, + { + "epoch": 0.09047863379081032, + "grad_norm": 11806.0361328125, + "learning_rate": 8.958000000000001e-06, + "loss": 113640.075, + "step": 44790 + }, + { + "epoch": 0.09049883442349414, + "grad_norm": 159274.09375, + "learning_rate": 8.96e-06, + "loss": 174757.475, + "step": 44800 + }, + { + "epoch": 0.09051903505617796, + "grad_norm": 111773.21875, + "learning_rate": 8.962e-06, + "loss": 174010.025, + "step": 44810 + }, + { + "epoch": 0.09053923568886177, + "grad_norm": 35169.79296875, + "learning_rate": 8.964000000000001e-06, + "loss": 189624.5, + "step": 44820 + }, + { + "epoch": 0.09055943632154559, + "grad_norm": 111119.5234375, + "learning_rate": 8.966e-06, + "loss": 416389.4, + "step": 44830 + }, + { + "epoch": 0.09057963695422941, + "grad_norm": 2159.327880859375, + "learning_rate": 8.968000000000001e-06, + "loss": 165701.675, + "step": 44840 + }, + { + "epoch": 0.09059983758691323, + "grad_norm": 192859.671875, + "learning_rate": 8.97e-06, + "loss": 177952.875, + "step": 44850 + }, + { + "epoch": 0.09062003821959703, + "grad_norm": 78014.7109375, + "learning_rate": 8.972000000000002e-06, + "loss": 100008.8875, + "step": 44860 + }, + { + "epoch": 0.09064023885228085, + "grad_norm": 103817.1484375, + "learning_rate": 8.974e-06, + "loss": 95353.4, + "step": 44870 + }, + { + "epoch": 0.09066043948496467, + "grad_norm": 0.0, + "learning_rate": 8.976e-06, + "loss": 79356.3875, + "step": 44880 + }, + { + "epoch": 0.09068064011764848, + "grad_norm": 948155.875, + "learning_rate": 8.978000000000001e-06, + "loss": 152248.125, + "step": 44890 + }, + { + "epoch": 0.0907008407503323, + "grad_norm": 39514.4140625, + "learning_rate": 8.98e-06, + "loss": 275459.175, + "step": 44900 + }, + { + "epoch": 0.09072104138301612, + "grad_norm": 101258.0625, + "learning_rate": 8.982000000000001e-06, + "loss": 219010.9, + "step": 44910 + }, + { + "epoch": 0.09074124201569993, + "grad_norm": 868214.375, + "learning_rate": 8.984e-06, + "loss": 343480.65, + "step": 44920 + }, + { + "epoch": 0.09076144264838375, + "grad_norm": 13819.3369140625, + "learning_rate": 8.986e-06, + "loss": 235142.475, + "step": 44930 + }, + { + "epoch": 0.09078164328106757, + "grad_norm": 73367.390625, + "learning_rate": 8.988000000000001e-06, + "loss": 154016.1, + "step": 44940 + }, + { + "epoch": 0.09080184391375137, + "grad_norm": 239854.796875, + "learning_rate": 8.99e-06, + "loss": 138163.5875, + "step": 44950 + }, + { + "epoch": 0.0908220445464352, + "grad_norm": 10979.7060546875, + "learning_rate": 8.992000000000001e-06, + "loss": 107587.95, + "step": 44960 + }, + { + "epoch": 0.09084224517911901, + "grad_norm": 282774.875, + "learning_rate": 8.994e-06, + "loss": 218321.525, + "step": 44970 + }, + { + "epoch": 0.09086244581180282, + "grad_norm": 65674.0625, + "learning_rate": 8.996e-06, + "loss": 192657.8375, + "step": 44980 + }, + { + "epoch": 0.09088264644448664, + "grad_norm": 49642.73046875, + "learning_rate": 8.998000000000001e-06, + "loss": 159314.2875, + "step": 44990 + }, + { + "epoch": 0.09090284707717046, + "grad_norm": 15008.2958984375, + "learning_rate": 9e-06, + "loss": 66767.9688, + "step": 45000 + }, + { + "epoch": 0.09092304770985428, + "grad_norm": 297661.03125, + "learning_rate": 9.002000000000001e-06, + "loss": 94643.3875, + "step": 45010 + }, + { + "epoch": 0.09094324834253809, + "grad_norm": 159569.078125, + "learning_rate": 9.004e-06, + "loss": 183879.225, + "step": 45020 + }, + { + "epoch": 0.0909634489752219, + "grad_norm": 20624.26171875, + "learning_rate": 9.006e-06, + "loss": 72094.8938, + "step": 45030 + }, + { + "epoch": 0.09098364960790573, + "grad_norm": 16437.052734375, + "learning_rate": 9.008e-06, + "loss": 104846.875, + "step": 45040 + }, + { + "epoch": 0.09100385024058953, + "grad_norm": 55331.53125, + "learning_rate": 9.01e-06, + "loss": 236971.45, + "step": 45050 + }, + { + "epoch": 0.09102405087327335, + "grad_norm": 2118.048828125, + "learning_rate": 9.012000000000001e-06, + "loss": 81251.25, + "step": 45060 + }, + { + "epoch": 0.09104425150595717, + "grad_norm": 450750.03125, + "learning_rate": 9.014e-06, + "loss": 240994.45, + "step": 45070 + }, + { + "epoch": 0.09106445213864098, + "grad_norm": 1255750.375, + "learning_rate": 9.016e-06, + "loss": 239111.95, + "step": 45080 + }, + { + "epoch": 0.0910846527713248, + "grad_norm": 67774.4296875, + "learning_rate": 9.018e-06, + "loss": 119251.275, + "step": 45090 + }, + { + "epoch": 0.09110485340400862, + "grad_norm": 153664.046875, + "learning_rate": 9.020000000000002e-06, + "loss": 85216.7188, + "step": 45100 + }, + { + "epoch": 0.09112505403669242, + "grad_norm": 0.0, + "learning_rate": 9.022000000000001e-06, + "loss": 86479.5312, + "step": 45110 + }, + { + "epoch": 0.09114525466937624, + "grad_norm": 248387.96875, + "learning_rate": 9.024e-06, + "loss": 142956.3875, + "step": 45120 + }, + { + "epoch": 0.09116545530206006, + "grad_norm": 77185.640625, + "learning_rate": 9.026e-06, + "loss": 224031.9, + "step": 45130 + }, + { + "epoch": 0.09118565593474387, + "grad_norm": 81252.3125, + "learning_rate": 9.028e-06, + "loss": 123655.3625, + "step": 45140 + }, + { + "epoch": 0.09120585656742769, + "grad_norm": 9736.4814453125, + "learning_rate": 9.030000000000002e-06, + "loss": 214861.475, + "step": 45150 + }, + { + "epoch": 0.09122605720011151, + "grad_norm": 8266.5263671875, + "learning_rate": 9.032000000000001e-06, + "loss": 133288.925, + "step": 45160 + }, + { + "epoch": 0.09124625783279533, + "grad_norm": 157808.546875, + "learning_rate": 9.034e-06, + "loss": 150951.7125, + "step": 45170 + }, + { + "epoch": 0.09126645846547914, + "grad_norm": 13263.2490234375, + "learning_rate": 9.036e-06, + "loss": 78836.525, + "step": 45180 + }, + { + "epoch": 0.09128665909816296, + "grad_norm": 0.0, + "learning_rate": 9.038e-06, + "loss": 184555.1625, + "step": 45190 + }, + { + "epoch": 0.09130685973084678, + "grad_norm": 34621.28125, + "learning_rate": 9.040000000000002e-06, + "loss": 75773.2312, + "step": 45200 + }, + { + "epoch": 0.09132706036353058, + "grad_norm": 526573.25, + "learning_rate": 9.042e-06, + "loss": 96503.225, + "step": 45210 + }, + { + "epoch": 0.0913472609962144, + "grad_norm": 20155.779296875, + "learning_rate": 9.044e-06, + "loss": 159321.275, + "step": 45220 + }, + { + "epoch": 0.09136746162889822, + "grad_norm": 13605.376953125, + "learning_rate": 9.046000000000001e-06, + "loss": 94373.8125, + "step": 45230 + }, + { + "epoch": 0.09138766226158203, + "grad_norm": 147111.921875, + "learning_rate": 9.048e-06, + "loss": 195789.1875, + "step": 45240 + }, + { + "epoch": 0.09140786289426585, + "grad_norm": 2656.450439453125, + "learning_rate": 9.050000000000001e-06, + "loss": 364946.175, + "step": 45250 + }, + { + "epoch": 0.09142806352694967, + "grad_norm": 8573.123046875, + "learning_rate": 9.052e-06, + "loss": 103947.225, + "step": 45260 + }, + { + "epoch": 0.09144826415963347, + "grad_norm": 93008.6875, + "learning_rate": 9.054e-06, + "loss": 153103.375, + "step": 45270 + }, + { + "epoch": 0.0914684647923173, + "grad_norm": 1317.8355712890625, + "learning_rate": 9.056000000000001e-06, + "loss": 47931.7812, + "step": 45280 + }, + { + "epoch": 0.09148866542500111, + "grad_norm": 18408.482421875, + "learning_rate": 9.058000000000002e-06, + "loss": 196473.4375, + "step": 45290 + }, + { + "epoch": 0.09150886605768492, + "grad_norm": 81403.03125, + "learning_rate": 9.060000000000001e-06, + "loss": 69616.4438, + "step": 45300 + }, + { + "epoch": 0.09152906669036874, + "grad_norm": 438253.5625, + "learning_rate": 9.062e-06, + "loss": 261474.2, + "step": 45310 + }, + { + "epoch": 0.09154926732305256, + "grad_norm": 1731461.5, + "learning_rate": 9.064e-06, + "loss": 233512.85, + "step": 45320 + }, + { + "epoch": 0.09156946795573638, + "grad_norm": 26219.876953125, + "learning_rate": 9.066000000000001e-06, + "loss": 179109.225, + "step": 45330 + }, + { + "epoch": 0.09158966858842019, + "grad_norm": 14543.8369140625, + "learning_rate": 9.068000000000002e-06, + "loss": 174662.0125, + "step": 45340 + }, + { + "epoch": 0.091609869221104, + "grad_norm": 341617.0, + "learning_rate": 9.070000000000001e-06, + "loss": 229274.575, + "step": 45350 + }, + { + "epoch": 0.09163006985378783, + "grad_norm": 150647.265625, + "learning_rate": 9.072e-06, + "loss": 133259.675, + "step": 45360 + }, + { + "epoch": 0.09165027048647163, + "grad_norm": 7317.4921875, + "learning_rate": 9.074e-06, + "loss": 130675.5625, + "step": 45370 + }, + { + "epoch": 0.09167047111915545, + "grad_norm": 35025.2890625, + "learning_rate": 9.076000000000001e-06, + "loss": 131102.95, + "step": 45380 + }, + { + "epoch": 0.09169067175183927, + "grad_norm": 10259.4140625, + "learning_rate": 9.078000000000002e-06, + "loss": 301130.625, + "step": 45390 + }, + { + "epoch": 0.09171087238452308, + "grad_norm": 363301.6875, + "learning_rate": 9.080000000000001e-06, + "loss": 176540.2625, + "step": 45400 + }, + { + "epoch": 0.0917310730172069, + "grad_norm": 239540.96875, + "learning_rate": 9.082e-06, + "loss": 187266.875, + "step": 45410 + }, + { + "epoch": 0.09175127364989072, + "grad_norm": 764361.4375, + "learning_rate": 9.084e-06, + "loss": 188942.675, + "step": 45420 + }, + { + "epoch": 0.09177147428257452, + "grad_norm": 13796.2109375, + "learning_rate": 9.086e-06, + "loss": 48724.3656, + "step": 45430 + }, + { + "epoch": 0.09179167491525834, + "grad_norm": 22774.23828125, + "learning_rate": 9.088000000000002e-06, + "loss": 162976.95, + "step": 45440 + }, + { + "epoch": 0.09181187554794216, + "grad_norm": 164262.5625, + "learning_rate": 9.090000000000001e-06, + "loss": 290161.05, + "step": 45450 + }, + { + "epoch": 0.09183207618062597, + "grad_norm": 0.0, + "learning_rate": 9.092e-06, + "loss": 141458.3875, + "step": 45460 + }, + { + "epoch": 0.09185227681330979, + "grad_norm": 100999.625, + "learning_rate": 9.094000000000001e-06, + "loss": 152469.5625, + "step": 45470 + }, + { + "epoch": 0.09187247744599361, + "grad_norm": 14140.7578125, + "learning_rate": 9.096e-06, + "loss": 98838.85, + "step": 45480 + }, + { + "epoch": 0.09189267807867743, + "grad_norm": 3118.44580078125, + "learning_rate": 9.098000000000002e-06, + "loss": 68241.8313, + "step": 45490 + }, + { + "epoch": 0.09191287871136124, + "grad_norm": 20405.943359375, + "learning_rate": 9.100000000000001e-06, + "loss": 116350.2375, + "step": 45500 + }, + { + "epoch": 0.09193307934404506, + "grad_norm": 533291.0625, + "learning_rate": 9.102e-06, + "loss": 175475.825, + "step": 45510 + }, + { + "epoch": 0.09195327997672888, + "grad_norm": 587328.5625, + "learning_rate": 9.104000000000001e-06, + "loss": 194994.9125, + "step": 45520 + }, + { + "epoch": 0.09197348060941268, + "grad_norm": 154911.890625, + "learning_rate": 9.106e-06, + "loss": 154769.525, + "step": 45530 + }, + { + "epoch": 0.0919936812420965, + "grad_norm": 13142.076171875, + "learning_rate": 9.108000000000002e-06, + "loss": 130738.1875, + "step": 45540 + }, + { + "epoch": 0.09201388187478032, + "grad_norm": 110480.5390625, + "learning_rate": 9.110000000000001e-06, + "loss": 85560.675, + "step": 45550 + }, + { + "epoch": 0.09203408250746413, + "grad_norm": 1282531.25, + "learning_rate": 9.112e-06, + "loss": 234606.05, + "step": 45560 + }, + { + "epoch": 0.09205428314014795, + "grad_norm": 27132.806640625, + "learning_rate": 9.114000000000001e-06, + "loss": 60330.125, + "step": 45570 + }, + { + "epoch": 0.09207448377283177, + "grad_norm": 167140.78125, + "learning_rate": 9.116e-06, + "loss": 76269.75, + "step": 45580 + }, + { + "epoch": 0.09209468440551558, + "grad_norm": 374677.65625, + "learning_rate": 9.118000000000001e-06, + "loss": 166556.475, + "step": 45590 + }, + { + "epoch": 0.0921148850381994, + "grad_norm": 101942.984375, + "learning_rate": 9.12e-06, + "loss": 156016.4, + "step": 45600 + }, + { + "epoch": 0.09213508567088322, + "grad_norm": 34878.72265625, + "learning_rate": 9.122e-06, + "loss": 330413.2, + "step": 45610 + }, + { + "epoch": 0.09215528630356702, + "grad_norm": 226481.84375, + "learning_rate": 9.124000000000001e-06, + "loss": 216239.55, + "step": 45620 + }, + { + "epoch": 0.09217548693625084, + "grad_norm": 14747.6826171875, + "learning_rate": 9.126e-06, + "loss": 95204.5188, + "step": 45630 + }, + { + "epoch": 0.09219568756893466, + "grad_norm": 68096.3984375, + "learning_rate": 9.128e-06, + "loss": 130620.4375, + "step": 45640 + }, + { + "epoch": 0.09221588820161847, + "grad_norm": 114615.25, + "learning_rate": 9.13e-06, + "loss": 159235.45, + "step": 45650 + }, + { + "epoch": 0.09223608883430229, + "grad_norm": 182181.21875, + "learning_rate": 9.132000000000002e-06, + "loss": 128907.85, + "step": 45660 + }, + { + "epoch": 0.09225628946698611, + "grad_norm": 2233251.5, + "learning_rate": 9.134000000000001e-06, + "loss": 267253.6, + "step": 45670 + }, + { + "epoch": 0.09227649009966993, + "grad_norm": 80980.3671875, + "learning_rate": 9.136e-06, + "loss": 76570.5875, + "step": 45680 + }, + { + "epoch": 0.09229669073235373, + "grad_norm": 295579.15625, + "learning_rate": 9.138e-06, + "loss": 129057.425, + "step": 45690 + }, + { + "epoch": 0.09231689136503755, + "grad_norm": 7817.7041015625, + "learning_rate": 9.14e-06, + "loss": 182383.4875, + "step": 45700 + }, + { + "epoch": 0.09233709199772137, + "grad_norm": 104559.3984375, + "learning_rate": 9.142000000000002e-06, + "loss": 120875.15, + "step": 45710 + }, + { + "epoch": 0.09235729263040518, + "grad_norm": 159901.375, + "learning_rate": 9.144000000000001e-06, + "loss": 190581.5375, + "step": 45720 + }, + { + "epoch": 0.092377493263089, + "grad_norm": 267349.78125, + "learning_rate": 9.146e-06, + "loss": 67683.1187, + "step": 45730 + }, + { + "epoch": 0.09239769389577282, + "grad_norm": 14473.642578125, + "learning_rate": 9.148e-06, + "loss": 235890.2, + "step": 45740 + }, + { + "epoch": 0.09241789452845663, + "grad_norm": 880532.25, + "learning_rate": 9.15e-06, + "loss": 200743.45, + "step": 45750 + }, + { + "epoch": 0.09243809516114045, + "grad_norm": 79313.3125, + "learning_rate": 9.152000000000001e-06, + "loss": 52620.475, + "step": 45760 + }, + { + "epoch": 0.09245829579382427, + "grad_norm": 55369.60546875, + "learning_rate": 9.154e-06, + "loss": 126897.575, + "step": 45770 + }, + { + "epoch": 0.09247849642650807, + "grad_norm": 10552.587890625, + "learning_rate": 9.156e-06, + "loss": 217104.2, + "step": 45780 + }, + { + "epoch": 0.09249869705919189, + "grad_norm": 19164.693359375, + "learning_rate": 9.158e-06, + "loss": 218393.05, + "step": 45790 + }, + { + "epoch": 0.09251889769187571, + "grad_norm": 7641.90673828125, + "learning_rate": 9.16e-06, + "loss": 95434.2125, + "step": 45800 + }, + { + "epoch": 0.09253909832455952, + "grad_norm": 164097.3125, + "learning_rate": 9.162000000000001e-06, + "loss": 217943.45, + "step": 45810 + }, + { + "epoch": 0.09255929895724334, + "grad_norm": 22128.044921875, + "learning_rate": 9.164e-06, + "loss": 99981.9312, + "step": 45820 + }, + { + "epoch": 0.09257949958992716, + "grad_norm": 4627.263671875, + "learning_rate": 9.166e-06, + "loss": 239651.975, + "step": 45830 + }, + { + "epoch": 0.09259970022261098, + "grad_norm": 33619.8984375, + "learning_rate": 9.168000000000001e-06, + "loss": 520678.0, + "step": 45840 + }, + { + "epoch": 0.09261990085529478, + "grad_norm": 443065.6875, + "learning_rate": 9.17e-06, + "loss": 244454.275, + "step": 45850 + }, + { + "epoch": 0.0926401014879786, + "grad_norm": 28876.22265625, + "learning_rate": 9.172000000000001e-06, + "loss": 144534.4875, + "step": 45860 + }, + { + "epoch": 0.09266030212066242, + "grad_norm": 154037.21875, + "learning_rate": 9.174e-06, + "loss": 119901.675, + "step": 45870 + }, + { + "epoch": 0.09268050275334623, + "grad_norm": 9757.849609375, + "learning_rate": 9.176e-06, + "loss": 60288.9938, + "step": 45880 + }, + { + "epoch": 0.09270070338603005, + "grad_norm": 141977.125, + "learning_rate": 9.178000000000001e-06, + "loss": 81692.225, + "step": 45890 + }, + { + "epoch": 0.09272090401871387, + "grad_norm": 37593.3671875, + "learning_rate": 9.180000000000002e-06, + "loss": 109969.5875, + "step": 45900 + }, + { + "epoch": 0.09274110465139768, + "grad_norm": 179778.0625, + "learning_rate": 9.182000000000001e-06, + "loss": 99637.0437, + "step": 45910 + }, + { + "epoch": 0.0927613052840815, + "grad_norm": 7618.548828125, + "learning_rate": 9.184e-06, + "loss": 257158.125, + "step": 45920 + }, + { + "epoch": 0.09278150591676532, + "grad_norm": 100727.6484375, + "learning_rate": 9.186e-06, + "loss": 166794.05, + "step": 45930 + }, + { + "epoch": 0.09280170654944912, + "grad_norm": 267865.09375, + "learning_rate": 9.188e-06, + "loss": 226811.025, + "step": 45940 + }, + { + "epoch": 0.09282190718213294, + "grad_norm": 168495.765625, + "learning_rate": 9.190000000000002e-06, + "loss": 91962.4375, + "step": 45950 + }, + { + "epoch": 0.09284210781481676, + "grad_norm": 730383.6875, + "learning_rate": 9.192000000000001e-06, + "loss": 128965.475, + "step": 45960 + }, + { + "epoch": 0.09286230844750057, + "grad_norm": 13105.236328125, + "learning_rate": 9.194e-06, + "loss": 62877.1625, + "step": 45970 + }, + { + "epoch": 0.09288250908018439, + "grad_norm": 188052.53125, + "learning_rate": 9.196e-06, + "loss": 122780.775, + "step": 45980 + }, + { + "epoch": 0.09290270971286821, + "grad_norm": 152901.921875, + "learning_rate": 9.198e-06, + "loss": 234247.9, + "step": 45990 + }, + { + "epoch": 0.09292291034555203, + "grad_norm": 289551.875, + "learning_rate": 9.200000000000002e-06, + "loss": 94951.4125, + "step": 46000 + }, + { + "epoch": 0.09294311097823584, + "grad_norm": 19792.6328125, + "learning_rate": 9.202000000000001e-06, + "loss": 100499.15, + "step": 46010 + }, + { + "epoch": 0.09296331161091966, + "grad_norm": 435793.5, + "learning_rate": 9.204e-06, + "loss": 81436.6, + "step": 46020 + }, + { + "epoch": 0.09298351224360348, + "grad_norm": 262796.78125, + "learning_rate": 9.206000000000001e-06, + "loss": 116217.375, + "step": 46030 + }, + { + "epoch": 0.09300371287628728, + "grad_norm": 100176.0, + "learning_rate": 9.208e-06, + "loss": 68155.9937, + "step": 46040 + }, + { + "epoch": 0.0930239135089711, + "grad_norm": 14314.7529296875, + "learning_rate": 9.210000000000002e-06, + "loss": 96540.7312, + "step": 46050 + }, + { + "epoch": 0.09304411414165492, + "grad_norm": 7914.08251953125, + "learning_rate": 9.212000000000001e-06, + "loss": 184402.0625, + "step": 46060 + }, + { + "epoch": 0.09306431477433873, + "grad_norm": 67720.609375, + "learning_rate": 9.214e-06, + "loss": 230941.35, + "step": 46070 + }, + { + "epoch": 0.09308451540702255, + "grad_norm": 127987.734375, + "learning_rate": 9.216000000000001e-06, + "loss": 159377.225, + "step": 46080 + }, + { + "epoch": 0.09310471603970637, + "grad_norm": 67706.515625, + "learning_rate": 9.218e-06, + "loss": 337769.175, + "step": 46090 + }, + { + "epoch": 0.09312491667239017, + "grad_norm": 28200.7890625, + "learning_rate": 9.220000000000002e-06, + "loss": 234763.325, + "step": 46100 + }, + { + "epoch": 0.093145117305074, + "grad_norm": 77539.6484375, + "learning_rate": 9.222e-06, + "loss": 59200.525, + "step": 46110 + }, + { + "epoch": 0.09316531793775781, + "grad_norm": 28950.923828125, + "learning_rate": 9.224e-06, + "loss": 159616.2625, + "step": 46120 + }, + { + "epoch": 0.09318551857044162, + "grad_norm": 55541.46875, + "learning_rate": 9.226000000000001e-06, + "loss": 128365.35, + "step": 46130 + }, + { + "epoch": 0.09320571920312544, + "grad_norm": 457927.96875, + "learning_rate": 9.228e-06, + "loss": 96575.175, + "step": 46140 + }, + { + "epoch": 0.09322591983580926, + "grad_norm": 13552.2890625, + "learning_rate": 9.230000000000001e-06, + "loss": 105978.2875, + "step": 46150 + }, + { + "epoch": 0.09324612046849308, + "grad_norm": 89470.7890625, + "learning_rate": 9.232e-06, + "loss": 188632.9875, + "step": 46160 + }, + { + "epoch": 0.09326632110117689, + "grad_norm": 235723.9375, + "learning_rate": 9.234e-06, + "loss": 295995.25, + "step": 46170 + }, + { + "epoch": 0.0932865217338607, + "grad_norm": 264200.03125, + "learning_rate": 9.236000000000001e-06, + "loss": 203850.6, + "step": 46180 + }, + { + "epoch": 0.09330672236654453, + "grad_norm": 566543.9375, + "learning_rate": 9.238e-06, + "loss": 438249.95, + "step": 46190 + }, + { + "epoch": 0.09332692299922833, + "grad_norm": 8454.111328125, + "learning_rate": 9.240000000000001e-06, + "loss": 96150.325, + "step": 46200 + }, + { + "epoch": 0.09334712363191215, + "grad_norm": 58054.48828125, + "learning_rate": 9.242e-06, + "loss": 246499.55, + "step": 46210 + }, + { + "epoch": 0.09336732426459597, + "grad_norm": 21365.705078125, + "learning_rate": 9.244e-06, + "loss": 147466.9375, + "step": 46220 + }, + { + "epoch": 0.09338752489727978, + "grad_norm": 265855.53125, + "learning_rate": 9.246000000000001e-06, + "loss": 81254.4875, + "step": 46230 + }, + { + "epoch": 0.0934077255299636, + "grad_norm": 6529.91357421875, + "learning_rate": 9.248e-06, + "loss": 68262.85, + "step": 46240 + }, + { + "epoch": 0.09342792616264742, + "grad_norm": 640.9103393554688, + "learning_rate": 9.250000000000001e-06, + "loss": 78641.0625, + "step": 46250 + }, + { + "epoch": 0.09344812679533122, + "grad_norm": 512495.03125, + "learning_rate": 9.252e-06, + "loss": 261661.575, + "step": 46260 + }, + { + "epoch": 0.09346832742801504, + "grad_norm": 27801.998046875, + "learning_rate": 9.254000000000002e-06, + "loss": 168044.8875, + "step": 46270 + }, + { + "epoch": 0.09348852806069886, + "grad_norm": 62726.86328125, + "learning_rate": 9.256e-06, + "loss": 93235.05, + "step": 46280 + }, + { + "epoch": 0.09350872869338267, + "grad_norm": 337731.375, + "learning_rate": 9.258e-06, + "loss": 214332.475, + "step": 46290 + }, + { + "epoch": 0.09352892932606649, + "grad_norm": 1112114.75, + "learning_rate": 9.260000000000001e-06, + "loss": 327058.9, + "step": 46300 + }, + { + "epoch": 0.09354912995875031, + "grad_norm": 3600.560546875, + "learning_rate": 9.262e-06, + "loss": 265759.55, + "step": 46310 + }, + { + "epoch": 0.09356933059143413, + "grad_norm": 248267.6875, + "learning_rate": 9.264000000000001e-06, + "loss": 100511.875, + "step": 46320 + }, + { + "epoch": 0.09358953122411794, + "grad_norm": 33960.76171875, + "learning_rate": 9.266e-06, + "loss": 134375.425, + "step": 46330 + }, + { + "epoch": 0.09360973185680176, + "grad_norm": 41593.5, + "learning_rate": 9.268e-06, + "loss": 90048.15, + "step": 46340 + }, + { + "epoch": 0.09362993248948558, + "grad_norm": 65261.92578125, + "learning_rate": 9.270000000000001e-06, + "loss": 168625.6875, + "step": 46350 + }, + { + "epoch": 0.09365013312216938, + "grad_norm": 8911.318359375, + "learning_rate": 9.272e-06, + "loss": 277486.9, + "step": 46360 + }, + { + "epoch": 0.0936703337548532, + "grad_norm": 1973.4063720703125, + "learning_rate": 9.274000000000001e-06, + "loss": 119492.4875, + "step": 46370 + }, + { + "epoch": 0.09369053438753702, + "grad_norm": 99769.453125, + "learning_rate": 9.276e-06, + "loss": 240966.825, + "step": 46380 + }, + { + "epoch": 0.09371073502022083, + "grad_norm": 173120.65625, + "learning_rate": 9.278e-06, + "loss": 137719.3375, + "step": 46390 + }, + { + "epoch": 0.09373093565290465, + "grad_norm": 236249.40625, + "learning_rate": 9.280000000000001e-06, + "loss": 250921.725, + "step": 46400 + }, + { + "epoch": 0.09375113628558847, + "grad_norm": 83204.1328125, + "learning_rate": 9.282e-06, + "loss": 171686.9875, + "step": 46410 + }, + { + "epoch": 0.09377133691827227, + "grad_norm": 706518.5625, + "learning_rate": 9.284000000000001e-06, + "loss": 266337.55, + "step": 46420 + }, + { + "epoch": 0.0937915375509561, + "grad_norm": 1477394.125, + "learning_rate": 9.286e-06, + "loss": 263652.65, + "step": 46430 + }, + { + "epoch": 0.09381173818363991, + "grad_norm": 175332.625, + "learning_rate": 9.288e-06, + "loss": 159930.65, + "step": 46440 + }, + { + "epoch": 0.09383193881632372, + "grad_norm": 469963.78125, + "learning_rate": 9.29e-06, + "loss": 241612.45, + "step": 46450 + }, + { + "epoch": 0.09385213944900754, + "grad_norm": 72433.8515625, + "learning_rate": 9.292000000000002e-06, + "loss": 214700.35, + "step": 46460 + }, + { + "epoch": 0.09387234008169136, + "grad_norm": 655.57080078125, + "learning_rate": 9.294000000000001e-06, + "loss": 115784.125, + "step": 46470 + }, + { + "epoch": 0.09389254071437518, + "grad_norm": 18602.376953125, + "learning_rate": 9.296e-06, + "loss": 27272.8187, + "step": 46480 + }, + { + "epoch": 0.09391274134705899, + "grad_norm": 4461.48291015625, + "learning_rate": 9.298e-06, + "loss": 54071.0437, + "step": 46490 + }, + { + "epoch": 0.0939329419797428, + "grad_norm": 159699.3125, + "learning_rate": 9.3e-06, + "loss": 114474.725, + "step": 46500 + }, + { + "epoch": 0.09395314261242663, + "grad_norm": 6248.7861328125, + "learning_rate": 9.302000000000002e-06, + "loss": 139648.4125, + "step": 46510 + }, + { + "epoch": 0.09397334324511043, + "grad_norm": 40442.05078125, + "learning_rate": 9.304000000000001e-06, + "loss": 147081.3875, + "step": 46520 + }, + { + "epoch": 0.09399354387779425, + "grad_norm": 30958.638671875, + "learning_rate": 9.306e-06, + "loss": 30066.8, + "step": 46530 + }, + { + "epoch": 0.09401374451047807, + "grad_norm": 1870.718994140625, + "learning_rate": 9.308e-06, + "loss": 60211.075, + "step": 46540 + }, + { + "epoch": 0.09403394514316188, + "grad_norm": 437759.5625, + "learning_rate": 9.31e-06, + "loss": 376378.125, + "step": 46550 + }, + { + "epoch": 0.0940541457758457, + "grad_norm": 35207.93359375, + "learning_rate": 9.312000000000002e-06, + "loss": 102845.5375, + "step": 46560 + }, + { + "epoch": 0.09407434640852952, + "grad_norm": 54097.26953125, + "learning_rate": 9.314000000000001e-06, + "loss": 67254.8188, + "step": 46570 + }, + { + "epoch": 0.09409454704121333, + "grad_norm": 8544.751953125, + "learning_rate": 9.316e-06, + "loss": 45838.6531, + "step": 46580 + }, + { + "epoch": 0.09411474767389715, + "grad_norm": 96588.9375, + "learning_rate": 9.318e-06, + "loss": 235132.325, + "step": 46590 + }, + { + "epoch": 0.09413494830658097, + "grad_norm": 120378.375, + "learning_rate": 9.32e-06, + "loss": 51236.9, + "step": 46600 + }, + { + "epoch": 0.09415514893926477, + "grad_norm": 82699.671875, + "learning_rate": 9.322000000000002e-06, + "loss": 84128.425, + "step": 46610 + }, + { + "epoch": 0.09417534957194859, + "grad_norm": 65079.8828125, + "learning_rate": 9.324000000000001e-06, + "loss": 123643.45, + "step": 46620 + }, + { + "epoch": 0.09419555020463241, + "grad_norm": 136771.78125, + "learning_rate": 9.326e-06, + "loss": 93164.4625, + "step": 46630 + }, + { + "epoch": 0.09421575083731623, + "grad_norm": 327917.96875, + "learning_rate": 9.328000000000001e-06, + "loss": 127304.1375, + "step": 46640 + }, + { + "epoch": 0.09423595147000004, + "grad_norm": 220615.625, + "learning_rate": 9.33e-06, + "loss": 380145.825, + "step": 46650 + }, + { + "epoch": 0.09425615210268386, + "grad_norm": 27910.52734375, + "learning_rate": 9.332000000000001e-06, + "loss": 91322.775, + "step": 46660 + }, + { + "epoch": 0.09427635273536768, + "grad_norm": 667036.625, + "learning_rate": 9.334e-06, + "loss": 233361.1, + "step": 46670 + }, + { + "epoch": 0.09429655336805148, + "grad_norm": 119362.9140625, + "learning_rate": 9.336e-06, + "loss": 93811.7312, + "step": 46680 + }, + { + "epoch": 0.0943167540007353, + "grad_norm": 50742.92578125, + "learning_rate": 9.338000000000001e-06, + "loss": 77935.3, + "step": 46690 + }, + { + "epoch": 0.09433695463341912, + "grad_norm": 183844.1875, + "learning_rate": 9.340000000000002e-06, + "loss": 63460.4062, + "step": 46700 + }, + { + "epoch": 0.09435715526610293, + "grad_norm": 0.0, + "learning_rate": 9.342000000000001e-06, + "loss": 109776.1375, + "step": 46710 + }, + { + "epoch": 0.09437735589878675, + "grad_norm": 373116.8125, + "learning_rate": 9.344e-06, + "loss": 85444.05, + "step": 46720 + }, + { + "epoch": 0.09439755653147057, + "grad_norm": 3049.95751953125, + "learning_rate": 9.346e-06, + "loss": 89262.1687, + "step": 46730 + }, + { + "epoch": 0.09441775716415438, + "grad_norm": 13289.505859375, + "learning_rate": 9.348000000000001e-06, + "loss": 40492.6781, + "step": 46740 + }, + { + "epoch": 0.0944379577968382, + "grad_norm": 2314.4150390625, + "learning_rate": 9.350000000000002e-06, + "loss": 152365.4, + "step": 46750 + }, + { + "epoch": 0.09445815842952202, + "grad_norm": 115130.0625, + "learning_rate": 9.352000000000001e-06, + "loss": 178593.9625, + "step": 46760 + }, + { + "epoch": 0.09447835906220582, + "grad_norm": 72967.265625, + "learning_rate": 9.354e-06, + "loss": 96656.5437, + "step": 46770 + }, + { + "epoch": 0.09449855969488964, + "grad_norm": 5484.22509765625, + "learning_rate": 9.356e-06, + "loss": 75987.1562, + "step": 46780 + }, + { + "epoch": 0.09451876032757346, + "grad_norm": 63021.53515625, + "learning_rate": 9.358000000000001e-06, + "loss": 126269.3375, + "step": 46790 + }, + { + "epoch": 0.09453896096025728, + "grad_norm": 468822.5, + "learning_rate": 9.360000000000002e-06, + "loss": 232314.875, + "step": 46800 + }, + { + "epoch": 0.09455916159294109, + "grad_norm": 71572.953125, + "learning_rate": 9.362000000000001e-06, + "loss": 163725.8, + "step": 46810 + }, + { + "epoch": 0.09457936222562491, + "grad_norm": 106374.1015625, + "learning_rate": 9.364e-06, + "loss": 141176.75, + "step": 46820 + }, + { + "epoch": 0.09459956285830873, + "grad_norm": 18479.30859375, + "learning_rate": 9.366000000000001e-06, + "loss": 42128.3969, + "step": 46830 + }, + { + "epoch": 0.09461976349099253, + "grad_norm": 41433.46875, + "learning_rate": 9.368e-06, + "loss": 105280.5125, + "step": 46840 + }, + { + "epoch": 0.09463996412367635, + "grad_norm": 91382.3984375, + "learning_rate": 9.370000000000002e-06, + "loss": 98374.7937, + "step": 46850 + }, + { + "epoch": 0.09466016475636017, + "grad_norm": 9517.353515625, + "learning_rate": 9.372000000000001e-06, + "loss": 122243.7875, + "step": 46860 + }, + { + "epoch": 0.09468036538904398, + "grad_norm": 0.0, + "learning_rate": 9.374e-06, + "loss": 50266.85, + "step": 46870 + }, + { + "epoch": 0.0947005660217278, + "grad_norm": 103658.6796875, + "learning_rate": 9.376000000000001e-06, + "loss": 141457.3125, + "step": 46880 + }, + { + "epoch": 0.09472076665441162, + "grad_norm": 347900.84375, + "learning_rate": 9.378e-06, + "loss": 90579.2937, + "step": 46890 + }, + { + "epoch": 0.09474096728709543, + "grad_norm": 15006.875, + "learning_rate": 9.38e-06, + "loss": 352899.125, + "step": 46900 + }, + { + "epoch": 0.09476116791977925, + "grad_norm": 111963.734375, + "learning_rate": 9.382000000000001e-06, + "loss": 56960.025, + "step": 46910 + }, + { + "epoch": 0.09478136855246307, + "grad_norm": 53803.28125, + "learning_rate": 9.384e-06, + "loss": 38664.7, + "step": 46920 + }, + { + "epoch": 0.09480156918514687, + "grad_norm": 115450.2734375, + "learning_rate": 9.386000000000001e-06, + "loss": 141985.1125, + "step": 46930 + }, + { + "epoch": 0.09482176981783069, + "grad_norm": 116046.0546875, + "learning_rate": 9.388e-06, + "loss": 222044.5, + "step": 46940 + }, + { + "epoch": 0.09484197045051451, + "grad_norm": 12411.228515625, + "learning_rate": 9.39e-06, + "loss": 52508.6, + "step": 46950 + }, + { + "epoch": 0.09486217108319833, + "grad_norm": 17579.1875, + "learning_rate": 9.392000000000001e-06, + "loss": 90923.125, + "step": 46960 + }, + { + "epoch": 0.09488237171588214, + "grad_norm": 1430355.75, + "learning_rate": 9.394e-06, + "loss": 187589.1, + "step": 46970 + }, + { + "epoch": 0.09490257234856596, + "grad_norm": 1419422.5, + "learning_rate": 9.396000000000001e-06, + "loss": 358610.975, + "step": 46980 + }, + { + "epoch": 0.09492277298124978, + "grad_norm": 176275.34375, + "learning_rate": 9.398e-06, + "loss": 174626.5625, + "step": 46990 + }, + { + "epoch": 0.09494297361393358, + "grad_norm": 691264.0625, + "learning_rate": 9.4e-06, + "loss": 304611.05, + "step": 47000 + }, + { + "epoch": 0.0949631742466174, + "grad_norm": 229383.328125, + "learning_rate": 9.402e-06, + "loss": 83970.2937, + "step": 47010 + }, + { + "epoch": 0.09498337487930122, + "grad_norm": 42668.19140625, + "learning_rate": 9.404e-06, + "loss": 58459.3938, + "step": 47020 + }, + { + "epoch": 0.09500357551198503, + "grad_norm": 540789.625, + "learning_rate": 9.406000000000001e-06, + "loss": 212511.675, + "step": 47030 + }, + { + "epoch": 0.09502377614466885, + "grad_norm": 15111.4306640625, + "learning_rate": 9.408e-06, + "loss": 153621.475, + "step": 47040 + }, + { + "epoch": 0.09504397677735267, + "grad_norm": 210458.234375, + "learning_rate": 9.41e-06, + "loss": 170857.4125, + "step": 47050 + }, + { + "epoch": 0.09506417741003648, + "grad_norm": 2192.550537109375, + "learning_rate": 9.412e-06, + "loss": 104305.0188, + "step": 47060 + }, + { + "epoch": 0.0950843780427203, + "grad_norm": 281448.25, + "learning_rate": 9.414000000000002e-06, + "loss": 182641.775, + "step": 47070 + }, + { + "epoch": 0.09510457867540412, + "grad_norm": 27010.73046875, + "learning_rate": 9.416000000000001e-06, + "loss": 63038.55, + "step": 47080 + }, + { + "epoch": 0.09512477930808792, + "grad_norm": 26143.119140625, + "learning_rate": 9.418e-06, + "loss": 110448.2125, + "step": 47090 + }, + { + "epoch": 0.09514497994077174, + "grad_norm": 232298.875, + "learning_rate": 9.42e-06, + "loss": 63009.3875, + "step": 47100 + }, + { + "epoch": 0.09516518057345556, + "grad_norm": 478376.34375, + "learning_rate": 9.422e-06, + "loss": 296112.075, + "step": 47110 + }, + { + "epoch": 0.09518538120613938, + "grad_norm": 165395.109375, + "learning_rate": 9.424000000000002e-06, + "loss": 94544.3625, + "step": 47120 + }, + { + "epoch": 0.09520558183882319, + "grad_norm": 31792.802734375, + "learning_rate": 9.426000000000001e-06, + "loss": 123300.0625, + "step": 47130 + }, + { + "epoch": 0.09522578247150701, + "grad_norm": 39666.1640625, + "learning_rate": 9.428e-06, + "loss": 149912.775, + "step": 47140 + }, + { + "epoch": 0.09524598310419083, + "grad_norm": 33672.796875, + "learning_rate": 9.43e-06, + "loss": 229072.225, + "step": 47150 + }, + { + "epoch": 0.09526618373687464, + "grad_norm": 14491.4287109375, + "learning_rate": 9.432e-06, + "loss": 55513.1937, + "step": 47160 + }, + { + "epoch": 0.09528638436955846, + "grad_norm": 140248.4375, + "learning_rate": 9.434000000000001e-06, + "loss": 30409.4531, + "step": 47170 + }, + { + "epoch": 0.09530658500224228, + "grad_norm": 6779.146484375, + "learning_rate": 9.436e-06, + "loss": 74605.5688, + "step": 47180 + }, + { + "epoch": 0.09532678563492608, + "grad_norm": 22752.58203125, + "learning_rate": 9.438e-06, + "loss": 219691.95, + "step": 47190 + }, + { + "epoch": 0.0953469862676099, + "grad_norm": 20991.408203125, + "learning_rate": 9.440000000000001e-06, + "loss": 123538.6125, + "step": 47200 + }, + { + "epoch": 0.09536718690029372, + "grad_norm": 57877.171875, + "learning_rate": 9.442e-06, + "loss": 38174.2937, + "step": 47210 + }, + { + "epoch": 0.09538738753297753, + "grad_norm": 66024.2734375, + "learning_rate": 9.444000000000001e-06, + "loss": 84112.85, + "step": 47220 + }, + { + "epoch": 0.09540758816566135, + "grad_norm": 326956.5625, + "learning_rate": 9.446e-06, + "loss": 158872.4, + "step": 47230 + }, + { + "epoch": 0.09542778879834517, + "grad_norm": 310584.3125, + "learning_rate": 9.448e-06, + "loss": 323963.075, + "step": 47240 + }, + { + "epoch": 0.09544798943102897, + "grad_norm": 168513.859375, + "learning_rate": 9.450000000000001e-06, + "loss": 341210.975, + "step": 47250 + }, + { + "epoch": 0.0954681900637128, + "grad_norm": 61648.453125, + "learning_rate": 9.452000000000002e-06, + "loss": 191189.925, + "step": 47260 + }, + { + "epoch": 0.09548839069639661, + "grad_norm": 182129.328125, + "learning_rate": 9.454000000000001e-06, + "loss": 179232.1625, + "step": 47270 + }, + { + "epoch": 0.09550859132908043, + "grad_norm": 46566.81640625, + "learning_rate": 9.456e-06, + "loss": 170560.3125, + "step": 47280 + }, + { + "epoch": 0.09552879196176424, + "grad_norm": 514769.0625, + "learning_rate": 9.458e-06, + "loss": 356242.6, + "step": 47290 + }, + { + "epoch": 0.09554899259444806, + "grad_norm": 71900.7578125, + "learning_rate": 9.460000000000001e-06, + "loss": 312655.975, + "step": 47300 + }, + { + "epoch": 0.09556919322713188, + "grad_norm": 8757.615234375, + "learning_rate": 9.462000000000002e-06, + "loss": 175948.425, + "step": 47310 + }, + { + "epoch": 0.09558939385981569, + "grad_norm": 19175.158203125, + "learning_rate": 9.464000000000001e-06, + "loss": 124183.8375, + "step": 47320 + }, + { + "epoch": 0.0956095944924995, + "grad_norm": 117808.34375, + "learning_rate": 9.466e-06, + "loss": 138727.0125, + "step": 47330 + }, + { + "epoch": 0.09562979512518333, + "grad_norm": 251998.609375, + "learning_rate": 9.468e-06, + "loss": 251223.475, + "step": 47340 + }, + { + "epoch": 0.09564999575786713, + "grad_norm": 73980.6796875, + "learning_rate": 9.47e-06, + "loss": 273819.975, + "step": 47350 + }, + { + "epoch": 0.09567019639055095, + "grad_norm": 4803.04833984375, + "learning_rate": 9.472000000000002e-06, + "loss": 196638.8, + "step": 47360 + }, + { + "epoch": 0.09569039702323477, + "grad_norm": 432875.375, + "learning_rate": 9.474000000000001e-06, + "loss": 182028.8375, + "step": 47370 + }, + { + "epoch": 0.09571059765591858, + "grad_norm": 353477.3125, + "learning_rate": 9.476e-06, + "loss": 156578.2, + "step": 47380 + }, + { + "epoch": 0.0957307982886024, + "grad_norm": 44556.44921875, + "learning_rate": 9.478e-06, + "loss": 74611.4187, + "step": 47390 + }, + { + "epoch": 0.09575099892128622, + "grad_norm": 58005.3203125, + "learning_rate": 9.48e-06, + "loss": 200448.0625, + "step": 47400 + }, + { + "epoch": 0.09577119955397002, + "grad_norm": 140529.6875, + "learning_rate": 9.482000000000002e-06, + "loss": 205065.0, + "step": 47410 + }, + { + "epoch": 0.09579140018665384, + "grad_norm": 40280.79296875, + "learning_rate": 9.484000000000001e-06, + "loss": 73589.45, + "step": 47420 + }, + { + "epoch": 0.09581160081933766, + "grad_norm": 31893.158203125, + "learning_rate": 9.486e-06, + "loss": 182745.15, + "step": 47430 + }, + { + "epoch": 0.09583180145202148, + "grad_norm": 353724.96875, + "learning_rate": 9.488000000000001e-06, + "loss": 191168.775, + "step": 47440 + }, + { + "epoch": 0.09585200208470529, + "grad_norm": 365116.3125, + "learning_rate": 9.49e-06, + "loss": 230753.35, + "step": 47450 + }, + { + "epoch": 0.09587220271738911, + "grad_norm": 3043.432373046875, + "learning_rate": 9.492000000000002e-06, + "loss": 119435.2875, + "step": 47460 + }, + { + "epoch": 0.09589240335007293, + "grad_norm": 119322.8046875, + "learning_rate": 9.494000000000001e-06, + "loss": 129422.925, + "step": 47470 + }, + { + "epoch": 0.09591260398275674, + "grad_norm": 699840.5625, + "learning_rate": 9.496e-06, + "loss": 191857.45, + "step": 47480 + }, + { + "epoch": 0.09593280461544056, + "grad_norm": 707805.8125, + "learning_rate": 9.498000000000001e-06, + "loss": 203291.6125, + "step": 47490 + }, + { + "epoch": 0.09595300524812438, + "grad_norm": 18952.669921875, + "learning_rate": 9.5e-06, + "loss": 120078.275, + "step": 47500 + }, + { + "epoch": 0.09597320588080818, + "grad_norm": 69820.1640625, + "learning_rate": 9.502000000000002e-06, + "loss": 97512.5625, + "step": 47510 + }, + { + "epoch": 0.095993406513492, + "grad_norm": 64618.1171875, + "learning_rate": 9.504e-06, + "loss": 131178.5125, + "step": 47520 + }, + { + "epoch": 0.09601360714617582, + "grad_norm": 25405.07421875, + "learning_rate": 9.506e-06, + "loss": 350013.35, + "step": 47530 + }, + { + "epoch": 0.09603380777885963, + "grad_norm": 48404.1640625, + "learning_rate": 9.508000000000001e-06, + "loss": 174240.4625, + "step": 47540 + }, + { + "epoch": 0.09605400841154345, + "grad_norm": 116845.3984375, + "learning_rate": 9.51e-06, + "loss": 112633.8375, + "step": 47550 + }, + { + "epoch": 0.09607420904422727, + "grad_norm": 185281.859375, + "learning_rate": 9.512000000000001e-06, + "loss": 195389.375, + "step": 47560 + }, + { + "epoch": 0.09609440967691107, + "grad_norm": 499725.71875, + "learning_rate": 9.514e-06, + "loss": 193901.8625, + "step": 47570 + }, + { + "epoch": 0.0961146103095949, + "grad_norm": 175661.03125, + "learning_rate": 9.516e-06, + "loss": 131472.425, + "step": 47580 + }, + { + "epoch": 0.09613481094227871, + "grad_norm": 33852.640625, + "learning_rate": 9.518000000000001e-06, + "loss": 134070.25, + "step": 47590 + }, + { + "epoch": 0.09615501157496253, + "grad_norm": 91415.4140625, + "learning_rate": 9.52e-06, + "loss": 148481.0, + "step": 47600 + }, + { + "epoch": 0.09617521220764634, + "grad_norm": 516224.75, + "learning_rate": 9.522000000000001e-06, + "loss": 101447.2563, + "step": 47610 + }, + { + "epoch": 0.09619541284033016, + "grad_norm": 486444.28125, + "learning_rate": 9.524e-06, + "loss": 124220.925, + "step": 47620 + }, + { + "epoch": 0.09621561347301398, + "grad_norm": 38447.0390625, + "learning_rate": 9.526000000000002e-06, + "loss": 180603.525, + "step": 47630 + }, + { + "epoch": 0.09623581410569779, + "grad_norm": 10114.1884765625, + "learning_rate": 9.528000000000001e-06, + "loss": 244464.675, + "step": 47640 + }, + { + "epoch": 0.0962560147383816, + "grad_norm": 505567.15625, + "learning_rate": 9.53e-06, + "loss": 209475.425, + "step": 47650 + }, + { + "epoch": 0.09627621537106543, + "grad_norm": 60830.31640625, + "learning_rate": 9.532000000000001e-06, + "loss": 175128.6625, + "step": 47660 + }, + { + "epoch": 0.09629641600374923, + "grad_norm": 2501.8798828125, + "learning_rate": 9.534e-06, + "loss": 153576.45, + "step": 47670 + }, + { + "epoch": 0.09631661663643305, + "grad_norm": 25311.921875, + "learning_rate": 9.536000000000002e-06, + "loss": 218019.0, + "step": 47680 + }, + { + "epoch": 0.09633681726911687, + "grad_norm": 11789.580078125, + "learning_rate": 9.538e-06, + "loss": 41635.9281, + "step": 47690 + }, + { + "epoch": 0.09635701790180068, + "grad_norm": 461897.25, + "learning_rate": 9.54e-06, + "loss": 151856.8625, + "step": 47700 + }, + { + "epoch": 0.0963772185344845, + "grad_norm": 60147.3671875, + "learning_rate": 9.542000000000001e-06, + "loss": 115345.075, + "step": 47710 + }, + { + "epoch": 0.09639741916716832, + "grad_norm": 123717.75, + "learning_rate": 9.544e-06, + "loss": 160591.2375, + "step": 47720 + }, + { + "epoch": 0.09641761979985213, + "grad_norm": 12393.509765625, + "learning_rate": 9.546000000000001e-06, + "loss": 86808.25, + "step": 47730 + }, + { + "epoch": 0.09643782043253595, + "grad_norm": 48144.546875, + "learning_rate": 9.548e-06, + "loss": 94420.625, + "step": 47740 + }, + { + "epoch": 0.09645802106521977, + "grad_norm": 1359.3773193359375, + "learning_rate": 9.55e-06, + "loss": 147550.725, + "step": 47750 + }, + { + "epoch": 0.09647822169790359, + "grad_norm": 6491.76513671875, + "learning_rate": 9.552000000000001e-06, + "loss": 59524.2937, + "step": 47760 + }, + { + "epoch": 0.09649842233058739, + "grad_norm": 514249.3125, + "learning_rate": 9.554e-06, + "loss": 227538.6, + "step": 47770 + }, + { + "epoch": 0.09651862296327121, + "grad_norm": 61787.02734375, + "learning_rate": 9.556000000000001e-06, + "loss": 113564.0125, + "step": 47780 + }, + { + "epoch": 0.09653882359595503, + "grad_norm": 1039623.75, + "learning_rate": 9.558e-06, + "loss": 314163.475, + "step": 47790 + }, + { + "epoch": 0.09655902422863884, + "grad_norm": 38904.28515625, + "learning_rate": 9.56e-06, + "loss": 318370.275, + "step": 47800 + }, + { + "epoch": 0.09657922486132266, + "grad_norm": 29678.046875, + "learning_rate": 9.562000000000001e-06, + "loss": 304996.225, + "step": 47810 + }, + { + "epoch": 0.09659942549400648, + "grad_norm": 56844.44140625, + "learning_rate": 9.564e-06, + "loss": 240709.575, + "step": 47820 + }, + { + "epoch": 0.09661962612669028, + "grad_norm": 96349.25, + "learning_rate": 9.566000000000001e-06, + "loss": 110469.4875, + "step": 47830 + }, + { + "epoch": 0.0966398267593741, + "grad_norm": 227052.578125, + "learning_rate": 9.568e-06, + "loss": 108176.775, + "step": 47840 + }, + { + "epoch": 0.09666002739205792, + "grad_norm": 1018689.25, + "learning_rate": 9.57e-06, + "loss": 238474.5, + "step": 47850 + }, + { + "epoch": 0.09668022802474173, + "grad_norm": 81988.796875, + "learning_rate": 9.572000000000001e-06, + "loss": 264346.45, + "step": 47860 + }, + { + "epoch": 0.09670042865742555, + "grad_norm": 206497.703125, + "learning_rate": 9.574000000000002e-06, + "loss": 131403.7875, + "step": 47870 + }, + { + "epoch": 0.09672062929010937, + "grad_norm": 190005.625, + "learning_rate": 9.576000000000001e-06, + "loss": 309928.9, + "step": 47880 + }, + { + "epoch": 0.09674082992279318, + "grad_norm": 79654.578125, + "learning_rate": 9.578e-06, + "loss": 93532.8813, + "step": 47890 + }, + { + "epoch": 0.096761030555477, + "grad_norm": 39583.3203125, + "learning_rate": 9.58e-06, + "loss": 86935.3, + "step": 47900 + }, + { + "epoch": 0.09678123118816082, + "grad_norm": 470970.375, + "learning_rate": 9.582e-06, + "loss": 99181.125, + "step": 47910 + }, + { + "epoch": 0.09680143182084464, + "grad_norm": 72635.8125, + "learning_rate": 9.584000000000002e-06, + "loss": 57812.4625, + "step": 47920 + }, + { + "epoch": 0.09682163245352844, + "grad_norm": 75479.734375, + "learning_rate": 9.586000000000001e-06, + "loss": 134370.975, + "step": 47930 + }, + { + "epoch": 0.09684183308621226, + "grad_norm": 46360.79296875, + "learning_rate": 9.588e-06, + "loss": 146906.3625, + "step": 47940 + }, + { + "epoch": 0.09686203371889608, + "grad_norm": 138441.125, + "learning_rate": 9.59e-06, + "loss": 152085.3, + "step": 47950 + }, + { + "epoch": 0.09688223435157989, + "grad_norm": 18015.462890625, + "learning_rate": 9.592e-06, + "loss": 37407.8531, + "step": 47960 + }, + { + "epoch": 0.09690243498426371, + "grad_norm": 21201.64453125, + "learning_rate": 9.594000000000002e-06, + "loss": 147313.1125, + "step": 47970 + }, + { + "epoch": 0.09692263561694753, + "grad_norm": 242734.671875, + "learning_rate": 9.596000000000001e-06, + "loss": 109305.7875, + "step": 47980 + }, + { + "epoch": 0.09694283624963133, + "grad_norm": 18331.232421875, + "learning_rate": 9.598e-06, + "loss": 191153.4375, + "step": 47990 + }, + { + "epoch": 0.09696303688231515, + "grad_norm": 166724.296875, + "learning_rate": 9.600000000000001e-06, + "loss": 200958.275, + "step": 48000 + }, + { + "epoch": 0.09698323751499897, + "grad_norm": 7195.322265625, + "learning_rate": 9.602e-06, + "loss": 239762.225, + "step": 48010 + }, + { + "epoch": 0.09700343814768278, + "grad_norm": 16512.119140625, + "learning_rate": 9.604000000000002e-06, + "loss": 149839.575, + "step": 48020 + }, + { + "epoch": 0.0970236387803666, + "grad_norm": 52264.6484375, + "learning_rate": 9.606000000000001e-06, + "loss": 249982.225, + "step": 48030 + }, + { + "epoch": 0.09704383941305042, + "grad_norm": 11157.94921875, + "learning_rate": 9.608e-06, + "loss": 77195.05, + "step": 48040 + }, + { + "epoch": 0.09706404004573423, + "grad_norm": 66870.21875, + "learning_rate": 9.610000000000001e-06, + "loss": 180107.575, + "step": 48050 + }, + { + "epoch": 0.09708424067841805, + "grad_norm": 127686.1796875, + "learning_rate": 9.612000000000002e-06, + "loss": 106123.4875, + "step": 48060 + }, + { + "epoch": 0.09710444131110187, + "grad_norm": 878.9031982421875, + "learning_rate": 9.614000000000001e-06, + "loss": 256863.975, + "step": 48070 + }, + { + "epoch": 0.09712464194378569, + "grad_norm": 90390.8515625, + "learning_rate": 9.616e-06, + "loss": 217518.175, + "step": 48080 + }, + { + "epoch": 0.09714484257646949, + "grad_norm": 17801.66015625, + "learning_rate": 9.618e-06, + "loss": 163474.975, + "step": 48090 + }, + { + "epoch": 0.09716504320915331, + "grad_norm": 51258.91796875, + "learning_rate": 9.620000000000001e-06, + "loss": 139357.05, + "step": 48100 + }, + { + "epoch": 0.09718524384183713, + "grad_norm": 152551.40625, + "learning_rate": 9.622000000000002e-06, + "loss": 201376.2, + "step": 48110 + }, + { + "epoch": 0.09720544447452094, + "grad_norm": 66223.3359375, + "learning_rate": 9.624000000000001e-06, + "loss": 80143.1187, + "step": 48120 + }, + { + "epoch": 0.09722564510720476, + "grad_norm": 135943.578125, + "learning_rate": 9.626e-06, + "loss": 54066.6625, + "step": 48130 + }, + { + "epoch": 0.09724584573988858, + "grad_norm": 280255.84375, + "learning_rate": 9.628e-06, + "loss": 65159.3875, + "step": 48140 + }, + { + "epoch": 0.09726604637257238, + "grad_norm": 0.0, + "learning_rate": 9.630000000000001e-06, + "loss": 264226.8, + "step": 48150 + }, + { + "epoch": 0.0972862470052562, + "grad_norm": 409086.625, + "learning_rate": 9.632e-06, + "loss": 201657.125, + "step": 48160 + }, + { + "epoch": 0.09730644763794002, + "grad_norm": 67073.0, + "learning_rate": 9.634000000000001e-06, + "loss": 80711.4375, + "step": 48170 + }, + { + "epoch": 0.09732664827062383, + "grad_norm": 38320.95703125, + "learning_rate": 9.636e-06, + "loss": 99645.4438, + "step": 48180 + }, + { + "epoch": 0.09734684890330765, + "grad_norm": 23835.734375, + "learning_rate": 9.638e-06, + "loss": 169911.0, + "step": 48190 + }, + { + "epoch": 0.09736704953599147, + "grad_norm": 19157.427734375, + "learning_rate": 9.640000000000001e-06, + "loss": 154046.85, + "step": 48200 + }, + { + "epoch": 0.09738725016867528, + "grad_norm": 8821.78125, + "learning_rate": 9.642e-06, + "loss": 167414.9875, + "step": 48210 + }, + { + "epoch": 0.0974074508013591, + "grad_norm": 9302.7685546875, + "learning_rate": 9.644000000000001e-06, + "loss": 75773.15, + "step": 48220 + }, + { + "epoch": 0.09742765143404292, + "grad_norm": 400779.375, + "learning_rate": 9.646e-06, + "loss": 105847.5875, + "step": 48230 + }, + { + "epoch": 0.09744785206672674, + "grad_norm": 61116.828125, + "learning_rate": 9.648000000000001e-06, + "loss": 221167.925, + "step": 48240 + }, + { + "epoch": 0.09746805269941054, + "grad_norm": 62530.45703125, + "learning_rate": 9.65e-06, + "loss": 110602.925, + "step": 48250 + }, + { + "epoch": 0.09748825333209436, + "grad_norm": 286093.875, + "learning_rate": 9.652e-06, + "loss": 116125.9875, + "step": 48260 + }, + { + "epoch": 0.09750845396477818, + "grad_norm": 341355.9375, + "learning_rate": 9.654000000000001e-06, + "loss": 144220.55, + "step": 48270 + }, + { + "epoch": 0.09752865459746199, + "grad_norm": 44566.953125, + "learning_rate": 9.656e-06, + "loss": 242710.05, + "step": 48280 + }, + { + "epoch": 0.09754885523014581, + "grad_norm": 1348522.375, + "learning_rate": 9.658000000000001e-06, + "loss": 286123.275, + "step": 48290 + }, + { + "epoch": 0.09756905586282963, + "grad_norm": 342709.84375, + "learning_rate": 9.66e-06, + "loss": 119717.575, + "step": 48300 + }, + { + "epoch": 0.09758925649551344, + "grad_norm": 5769.72998046875, + "learning_rate": 9.662e-06, + "loss": 104517.65, + "step": 48310 + }, + { + "epoch": 0.09760945712819726, + "grad_norm": 40212.0, + "learning_rate": 9.664000000000001e-06, + "loss": 87659.0813, + "step": 48320 + }, + { + "epoch": 0.09762965776088108, + "grad_norm": 4927.33740234375, + "learning_rate": 9.666e-06, + "loss": 76892.0875, + "step": 48330 + }, + { + "epoch": 0.09764985839356488, + "grad_norm": 353991.1875, + "learning_rate": 9.668000000000001e-06, + "loss": 151741.475, + "step": 48340 + }, + { + "epoch": 0.0976700590262487, + "grad_norm": 145936.6875, + "learning_rate": 9.67e-06, + "loss": 127014.8375, + "step": 48350 + }, + { + "epoch": 0.09769025965893252, + "grad_norm": 406620.8125, + "learning_rate": 9.672e-06, + "loss": 219247.55, + "step": 48360 + }, + { + "epoch": 0.09771046029161633, + "grad_norm": 45291.28515625, + "learning_rate": 9.674000000000001e-06, + "loss": 64127.9625, + "step": 48370 + }, + { + "epoch": 0.09773066092430015, + "grad_norm": 34062.484375, + "learning_rate": 9.676e-06, + "loss": 152086.0125, + "step": 48380 + }, + { + "epoch": 0.09775086155698397, + "grad_norm": 3312.592529296875, + "learning_rate": 9.678000000000001e-06, + "loss": 166738.725, + "step": 48390 + }, + { + "epoch": 0.09777106218966779, + "grad_norm": 459432.15625, + "learning_rate": 9.68e-06, + "loss": 139597.925, + "step": 48400 + }, + { + "epoch": 0.0977912628223516, + "grad_norm": 44969.10546875, + "learning_rate": 9.682e-06, + "loss": 295618.1, + "step": 48410 + }, + { + "epoch": 0.09781146345503541, + "grad_norm": 62634.1484375, + "learning_rate": 9.684e-06, + "loss": 116815.8375, + "step": 48420 + }, + { + "epoch": 0.09783166408771923, + "grad_norm": 1035006.3125, + "learning_rate": 9.686000000000002e-06, + "loss": 237203.05, + "step": 48430 + }, + { + "epoch": 0.09785186472040304, + "grad_norm": 44298.828125, + "learning_rate": 9.688000000000001e-06, + "loss": 179152.1625, + "step": 48440 + }, + { + "epoch": 0.09787206535308686, + "grad_norm": 235299.921875, + "learning_rate": 9.69e-06, + "loss": 273700.775, + "step": 48450 + }, + { + "epoch": 0.09789226598577068, + "grad_norm": 160474.0625, + "learning_rate": 9.692e-06, + "loss": 231529.375, + "step": 48460 + }, + { + "epoch": 0.09791246661845449, + "grad_norm": 16202.5751953125, + "learning_rate": 9.694e-06, + "loss": 76561.9563, + "step": 48470 + }, + { + "epoch": 0.0979326672511383, + "grad_norm": 160021.6875, + "learning_rate": 9.696000000000002e-06, + "loss": 79471.0312, + "step": 48480 + }, + { + "epoch": 0.09795286788382213, + "grad_norm": 14364.4296875, + "learning_rate": 9.698000000000001e-06, + "loss": 119522.925, + "step": 48490 + }, + { + "epoch": 0.09797306851650593, + "grad_norm": 36869.19921875, + "learning_rate": 9.7e-06, + "loss": 295457.125, + "step": 48500 + }, + { + "epoch": 0.09799326914918975, + "grad_norm": 465426.25, + "learning_rate": 9.702e-06, + "loss": 331330.55, + "step": 48510 + }, + { + "epoch": 0.09801346978187357, + "grad_norm": 65821.5625, + "learning_rate": 9.704e-06, + "loss": 213389.975, + "step": 48520 + }, + { + "epoch": 0.09803367041455738, + "grad_norm": 246624.359375, + "learning_rate": 9.706000000000002e-06, + "loss": 112062.875, + "step": 48530 + }, + { + "epoch": 0.0980538710472412, + "grad_norm": 21761.8125, + "learning_rate": 9.708000000000001e-06, + "loss": 103762.9312, + "step": 48540 + }, + { + "epoch": 0.09807407167992502, + "grad_norm": 50231.37890625, + "learning_rate": 9.71e-06, + "loss": 211297.675, + "step": 48550 + }, + { + "epoch": 0.09809427231260884, + "grad_norm": 0.0, + "learning_rate": 9.712e-06, + "loss": 69922.1938, + "step": 48560 + }, + { + "epoch": 0.09811447294529264, + "grad_norm": 180373.546875, + "learning_rate": 9.714e-06, + "loss": 108258.5125, + "step": 48570 + }, + { + "epoch": 0.09813467357797646, + "grad_norm": 238608.3125, + "learning_rate": 9.716000000000002e-06, + "loss": 81328.5375, + "step": 48580 + }, + { + "epoch": 0.09815487421066028, + "grad_norm": 17597.009765625, + "learning_rate": 9.718e-06, + "loss": 28861.4844, + "step": 48590 + }, + { + "epoch": 0.09817507484334409, + "grad_norm": 34800.85546875, + "learning_rate": 9.72e-06, + "loss": 76492.7188, + "step": 48600 + }, + { + "epoch": 0.09819527547602791, + "grad_norm": 116594.984375, + "learning_rate": 9.722000000000001e-06, + "loss": 111076.2875, + "step": 48610 + }, + { + "epoch": 0.09821547610871173, + "grad_norm": 8457.3974609375, + "learning_rate": 9.724e-06, + "loss": 230027.85, + "step": 48620 + }, + { + "epoch": 0.09823567674139554, + "grad_norm": 94357.9765625, + "learning_rate": 9.726000000000001e-06, + "loss": 105082.15, + "step": 48630 + }, + { + "epoch": 0.09825587737407936, + "grad_norm": 2369.99755859375, + "learning_rate": 9.728e-06, + "loss": 144242.5375, + "step": 48640 + }, + { + "epoch": 0.09827607800676318, + "grad_norm": 212267.203125, + "learning_rate": 9.73e-06, + "loss": 62732.6125, + "step": 48650 + }, + { + "epoch": 0.09829627863944698, + "grad_norm": 131741.671875, + "learning_rate": 9.732000000000001e-06, + "loss": 119610.125, + "step": 48660 + }, + { + "epoch": 0.0983164792721308, + "grad_norm": 271834.84375, + "learning_rate": 9.734000000000002e-06, + "loss": 72496.975, + "step": 48670 + }, + { + "epoch": 0.09833667990481462, + "grad_norm": 14078.43359375, + "learning_rate": 9.736000000000001e-06, + "loss": 69344.6062, + "step": 48680 + }, + { + "epoch": 0.09835688053749843, + "grad_norm": 132570.1875, + "learning_rate": 9.738e-06, + "loss": 112294.4125, + "step": 48690 + }, + { + "epoch": 0.09837708117018225, + "grad_norm": 59331.921875, + "learning_rate": 9.74e-06, + "loss": 101669.3375, + "step": 48700 + }, + { + "epoch": 0.09839728180286607, + "grad_norm": 259610.859375, + "learning_rate": 9.742000000000001e-06, + "loss": 310746.75, + "step": 48710 + }, + { + "epoch": 0.09841748243554987, + "grad_norm": 36424.44140625, + "learning_rate": 9.744000000000002e-06, + "loss": 101567.9187, + "step": 48720 + }, + { + "epoch": 0.0984376830682337, + "grad_norm": 133667.046875, + "learning_rate": 9.746000000000001e-06, + "loss": 90589.9937, + "step": 48730 + }, + { + "epoch": 0.09845788370091751, + "grad_norm": 15896.0830078125, + "learning_rate": 9.748e-06, + "loss": 213279.775, + "step": 48740 + }, + { + "epoch": 0.09847808433360133, + "grad_norm": 66427.3046875, + "learning_rate": 9.75e-06, + "loss": 178260.8375, + "step": 48750 + }, + { + "epoch": 0.09849828496628514, + "grad_norm": 41470.01953125, + "learning_rate": 9.752e-06, + "loss": 239947.425, + "step": 48760 + }, + { + "epoch": 0.09851848559896896, + "grad_norm": 262503.28125, + "learning_rate": 9.754000000000002e-06, + "loss": 167878.275, + "step": 48770 + }, + { + "epoch": 0.09853868623165278, + "grad_norm": 855613.5, + "learning_rate": 9.756000000000001e-06, + "loss": 236510.325, + "step": 48780 + }, + { + "epoch": 0.09855888686433659, + "grad_norm": 154802.53125, + "learning_rate": 9.758e-06, + "loss": 193831.75, + "step": 48790 + }, + { + "epoch": 0.0985790874970204, + "grad_norm": 628548.0625, + "learning_rate": 9.760000000000001e-06, + "loss": 224359.65, + "step": 48800 + }, + { + "epoch": 0.09859928812970423, + "grad_norm": 8727.052734375, + "learning_rate": 9.762e-06, + "loss": 119954.8125, + "step": 48810 + }, + { + "epoch": 0.09861948876238803, + "grad_norm": 1166671.375, + "learning_rate": 9.764000000000002e-06, + "loss": 427897.3, + "step": 48820 + }, + { + "epoch": 0.09863968939507185, + "grad_norm": 35682.14453125, + "learning_rate": 9.766000000000001e-06, + "loss": 91982.975, + "step": 48830 + }, + { + "epoch": 0.09865989002775567, + "grad_norm": 93132.046875, + "learning_rate": 9.768e-06, + "loss": 505733.7, + "step": 48840 + }, + { + "epoch": 0.09868009066043948, + "grad_norm": 13880.31640625, + "learning_rate": 9.770000000000001e-06, + "loss": 69753.5562, + "step": 48850 + }, + { + "epoch": 0.0987002912931233, + "grad_norm": 125278.0234375, + "learning_rate": 9.772e-06, + "loss": 132324.6375, + "step": 48860 + }, + { + "epoch": 0.09872049192580712, + "grad_norm": 684590.1875, + "learning_rate": 9.774000000000002e-06, + "loss": 214495.275, + "step": 48870 + }, + { + "epoch": 0.09874069255849093, + "grad_norm": 59173.75390625, + "learning_rate": 9.776000000000001e-06, + "loss": 155711.2, + "step": 48880 + }, + { + "epoch": 0.09876089319117475, + "grad_norm": 706499.875, + "learning_rate": 9.778e-06, + "loss": 169028.4, + "step": 48890 + }, + { + "epoch": 0.09878109382385857, + "grad_norm": 456589.5625, + "learning_rate": 9.780000000000001e-06, + "loss": 183057.7875, + "step": 48900 + }, + { + "epoch": 0.09880129445654239, + "grad_norm": 80368.8125, + "learning_rate": 9.782e-06, + "loss": 163251.65, + "step": 48910 + }, + { + "epoch": 0.09882149508922619, + "grad_norm": 51544.72265625, + "learning_rate": 9.784000000000002e-06, + "loss": 261237.325, + "step": 48920 + }, + { + "epoch": 0.09884169572191001, + "grad_norm": 8513.732421875, + "learning_rate": 9.786e-06, + "loss": 210300.9, + "step": 48930 + }, + { + "epoch": 0.09886189635459383, + "grad_norm": 957297.4375, + "learning_rate": 9.788e-06, + "loss": 249964.175, + "step": 48940 + }, + { + "epoch": 0.09888209698727764, + "grad_norm": 13400.361328125, + "learning_rate": 9.790000000000001e-06, + "loss": 109592.2875, + "step": 48950 + }, + { + "epoch": 0.09890229761996146, + "grad_norm": 28209.984375, + "learning_rate": 9.792e-06, + "loss": 70437.2312, + "step": 48960 + }, + { + "epoch": 0.09892249825264528, + "grad_norm": 400690.625, + "learning_rate": 9.794000000000001e-06, + "loss": 89258.775, + "step": 48970 + }, + { + "epoch": 0.09894269888532908, + "grad_norm": 392420.78125, + "learning_rate": 9.796e-06, + "loss": 247564.0, + "step": 48980 + }, + { + "epoch": 0.0989628995180129, + "grad_norm": 885358.1875, + "learning_rate": 9.798e-06, + "loss": 309227.35, + "step": 48990 + }, + { + "epoch": 0.09898310015069672, + "grad_norm": 10004.103515625, + "learning_rate": 9.800000000000001e-06, + "loss": 179911.0125, + "step": 49000 + }, + { + "epoch": 0.09900330078338053, + "grad_norm": 196290.71875, + "learning_rate": 9.802e-06, + "loss": 195250.0, + "step": 49010 + }, + { + "epoch": 0.09902350141606435, + "grad_norm": 87764.890625, + "learning_rate": 9.804000000000001e-06, + "loss": 119320.625, + "step": 49020 + }, + { + "epoch": 0.09904370204874817, + "grad_norm": 390049.71875, + "learning_rate": 9.806e-06, + "loss": 140409.75, + "step": 49030 + }, + { + "epoch": 0.09906390268143198, + "grad_norm": 10066.6298828125, + "learning_rate": 9.808000000000002e-06, + "loss": 53032.25, + "step": 49040 + }, + { + "epoch": 0.0990841033141158, + "grad_norm": 415103.8125, + "learning_rate": 9.810000000000001e-06, + "loss": 144956.875, + "step": 49050 + }, + { + "epoch": 0.09910430394679962, + "grad_norm": 23520.73828125, + "learning_rate": 9.812e-06, + "loss": 48662.65, + "step": 49060 + }, + { + "epoch": 0.09912450457948344, + "grad_norm": 165236.78125, + "learning_rate": 9.814000000000001e-06, + "loss": 130720.15, + "step": 49070 + }, + { + "epoch": 0.09914470521216724, + "grad_norm": 56300.2265625, + "learning_rate": 9.816e-06, + "loss": 170405.875, + "step": 49080 + }, + { + "epoch": 0.09916490584485106, + "grad_norm": 7357.17041015625, + "learning_rate": 9.818000000000002e-06, + "loss": 112988.5875, + "step": 49090 + }, + { + "epoch": 0.09918510647753488, + "grad_norm": 671485.375, + "learning_rate": 9.820000000000001e-06, + "loss": 162648.125, + "step": 49100 + }, + { + "epoch": 0.09920530711021869, + "grad_norm": 666112.875, + "learning_rate": 9.822e-06, + "loss": 141848.625, + "step": 49110 + }, + { + "epoch": 0.09922550774290251, + "grad_norm": 121201.46875, + "learning_rate": 9.824000000000001e-06, + "loss": 231517.825, + "step": 49120 + }, + { + "epoch": 0.09924570837558633, + "grad_norm": 2596767.5, + "learning_rate": 9.826e-06, + "loss": 498364.6, + "step": 49130 + }, + { + "epoch": 0.09926590900827013, + "grad_norm": 171143.546875, + "learning_rate": 9.828000000000001e-06, + "loss": 254945.8, + "step": 49140 + }, + { + "epoch": 0.09928610964095395, + "grad_norm": 15729.4365234375, + "learning_rate": 9.83e-06, + "loss": 119355.5375, + "step": 49150 + }, + { + "epoch": 0.09930631027363777, + "grad_norm": 14125.3125, + "learning_rate": 9.832e-06, + "loss": 80011.0562, + "step": 49160 + }, + { + "epoch": 0.09932651090632158, + "grad_norm": 20245.71875, + "learning_rate": 9.834000000000001e-06, + "loss": 277547.025, + "step": 49170 + }, + { + "epoch": 0.0993467115390054, + "grad_norm": 87822.046875, + "learning_rate": 9.836e-06, + "loss": 131201.1, + "step": 49180 + }, + { + "epoch": 0.09936691217168922, + "grad_norm": 35858.69140625, + "learning_rate": 9.838000000000001e-06, + "loss": 204780.7125, + "step": 49190 + }, + { + "epoch": 0.09938711280437303, + "grad_norm": 0.0, + "learning_rate": 9.84e-06, + "loss": 143880.85, + "step": 49200 + }, + { + "epoch": 0.09940731343705685, + "grad_norm": 51749.91015625, + "learning_rate": 9.842e-06, + "loss": 78204.5125, + "step": 49210 + }, + { + "epoch": 0.09942751406974067, + "grad_norm": 28302.658203125, + "learning_rate": 9.844000000000001e-06, + "loss": 199036.05, + "step": 49220 + }, + { + "epoch": 0.09944771470242449, + "grad_norm": 70546.875, + "learning_rate": 9.846000000000002e-06, + "loss": 87940.6, + "step": 49230 + }, + { + "epoch": 0.09946791533510829, + "grad_norm": 49188.39453125, + "learning_rate": 9.848000000000001e-06, + "loss": 123577.5375, + "step": 49240 + }, + { + "epoch": 0.09948811596779211, + "grad_norm": 182791.03125, + "learning_rate": 9.85e-06, + "loss": 117228.6, + "step": 49250 + }, + { + "epoch": 0.09950831660047593, + "grad_norm": 146541.90625, + "learning_rate": 9.852e-06, + "loss": 150850.6875, + "step": 49260 + }, + { + "epoch": 0.09952851723315974, + "grad_norm": 108903.8828125, + "learning_rate": 9.854000000000001e-06, + "loss": 115572.125, + "step": 49270 + }, + { + "epoch": 0.09954871786584356, + "grad_norm": 58110.0546875, + "learning_rate": 9.856000000000002e-06, + "loss": 144621.775, + "step": 49280 + }, + { + "epoch": 0.09956891849852738, + "grad_norm": 45141.71484375, + "learning_rate": 9.858000000000001e-06, + "loss": 48692.0219, + "step": 49290 + }, + { + "epoch": 0.09958911913121118, + "grad_norm": 290433.59375, + "learning_rate": 9.86e-06, + "loss": 68717.2437, + "step": 49300 + }, + { + "epoch": 0.099609319763895, + "grad_norm": 379686.5, + "learning_rate": 9.862e-06, + "loss": 197924.2125, + "step": 49310 + }, + { + "epoch": 0.09962952039657882, + "grad_norm": 87022.8046875, + "learning_rate": 9.864e-06, + "loss": 179778.9375, + "step": 49320 + }, + { + "epoch": 0.09964972102926263, + "grad_norm": 80696.84375, + "learning_rate": 9.866000000000002e-06, + "loss": 190972.1, + "step": 49330 + }, + { + "epoch": 0.09966992166194645, + "grad_norm": 13216.333984375, + "learning_rate": 9.868000000000001e-06, + "loss": 273051.225, + "step": 49340 + }, + { + "epoch": 0.09969012229463027, + "grad_norm": 32312.271484375, + "learning_rate": 9.87e-06, + "loss": 193142.2375, + "step": 49350 + }, + { + "epoch": 0.09971032292731408, + "grad_norm": 748328.4375, + "learning_rate": 9.872e-06, + "loss": 180382.5125, + "step": 49360 + }, + { + "epoch": 0.0997305235599979, + "grad_norm": 530709.875, + "learning_rate": 9.874e-06, + "loss": 379873.975, + "step": 49370 + }, + { + "epoch": 0.09975072419268172, + "grad_norm": 21343.021484375, + "learning_rate": 9.876000000000002e-06, + "loss": 123330.275, + "step": 49380 + }, + { + "epoch": 0.09977092482536554, + "grad_norm": 231977.65625, + "learning_rate": 9.878000000000001e-06, + "loss": 104211.9937, + "step": 49390 + }, + { + "epoch": 0.09979112545804934, + "grad_norm": 27377.4375, + "learning_rate": 9.88e-06, + "loss": 55705.0875, + "step": 49400 + }, + { + "epoch": 0.09981132609073316, + "grad_norm": 21458.046875, + "learning_rate": 9.882000000000001e-06, + "loss": 153933.3125, + "step": 49410 + }, + { + "epoch": 0.09983152672341698, + "grad_norm": 6554.40234375, + "learning_rate": 9.884e-06, + "loss": 168967.775, + "step": 49420 + }, + { + "epoch": 0.09985172735610079, + "grad_norm": 1090487.875, + "learning_rate": 9.886000000000002e-06, + "loss": 253705.85, + "step": 49430 + }, + { + "epoch": 0.09987192798878461, + "grad_norm": 83088.0390625, + "learning_rate": 9.888000000000001e-06, + "loss": 126045.5625, + "step": 49440 + }, + { + "epoch": 0.09989212862146843, + "grad_norm": 65743.859375, + "learning_rate": 9.89e-06, + "loss": 157741.85, + "step": 49450 + }, + { + "epoch": 0.09991232925415224, + "grad_norm": 56607.4453125, + "learning_rate": 9.892000000000001e-06, + "loss": 77339.9812, + "step": 49460 + }, + { + "epoch": 0.09993252988683606, + "grad_norm": 191542.53125, + "learning_rate": 9.894e-06, + "loss": 74129.0813, + "step": 49470 + }, + { + "epoch": 0.09995273051951988, + "grad_norm": 372283.5625, + "learning_rate": 9.896000000000001e-06, + "loss": 175591.7625, + "step": 49480 + }, + { + "epoch": 0.09997293115220368, + "grad_norm": 1398214.25, + "learning_rate": 9.898e-06, + "loss": 259868.025, + "step": 49490 + }, + { + "epoch": 0.0999931317848875, + "grad_norm": 526237.6875, + "learning_rate": 9.9e-06, + "loss": 260292.45, + "step": 49500 + }, + { + "epoch": 0.10001333241757132, + "grad_norm": 716317.6875, + "learning_rate": 9.902000000000001e-06, + "loss": 183597.2375, + "step": 49510 + }, + { + "epoch": 0.10003353305025513, + "grad_norm": 22574.57421875, + "learning_rate": 9.904e-06, + "loss": 57759.9187, + "step": 49520 + }, + { + "epoch": 0.10005373368293895, + "grad_norm": 13826.517578125, + "learning_rate": 9.906000000000001e-06, + "loss": 63630.7937, + "step": 49530 + }, + { + "epoch": 0.10007393431562277, + "grad_norm": 73115.4921875, + "learning_rate": 9.908e-06, + "loss": 88049.9563, + "step": 49540 + }, + { + "epoch": 0.10009413494830659, + "grad_norm": 22217.232421875, + "learning_rate": 9.91e-06, + "loss": 67437.3, + "step": 49550 + }, + { + "epoch": 0.1001143355809904, + "grad_norm": 0.0, + "learning_rate": 9.912000000000001e-06, + "loss": 80822.5562, + "step": 49560 + }, + { + "epoch": 0.10013453621367421, + "grad_norm": 8129.8466796875, + "learning_rate": 9.914e-06, + "loss": 158484.075, + "step": 49570 + }, + { + "epoch": 0.10015473684635803, + "grad_norm": 664933.75, + "learning_rate": 9.916000000000001e-06, + "loss": 154891.5, + "step": 49580 + }, + { + "epoch": 0.10017493747904184, + "grad_norm": 174438.3125, + "learning_rate": 9.918e-06, + "loss": 122843.7125, + "step": 49590 + }, + { + "epoch": 0.10019513811172566, + "grad_norm": 1036448.5625, + "learning_rate": 9.920000000000002e-06, + "loss": 247550.125, + "step": 49600 + }, + { + "epoch": 0.10021533874440948, + "grad_norm": 306731.65625, + "learning_rate": 9.922000000000001e-06, + "loss": 188807.95, + "step": 49610 + }, + { + "epoch": 0.10023553937709329, + "grad_norm": 146406.765625, + "learning_rate": 9.924e-06, + "loss": 143222.1875, + "step": 49620 + }, + { + "epoch": 0.1002557400097771, + "grad_norm": 45525.171875, + "learning_rate": 9.926000000000001e-06, + "loss": 259668.875, + "step": 49630 + }, + { + "epoch": 0.10027594064246093, + "grad_norm": 83787.421875, + "learning_rate": 9.928e-06, + "loss": 230517.775, + "step": 49640 + }, + { + "epoch": 0.10029614127514473, + "grad_norm": 8024.6748046875, + "learning_rate": 9.930000000000001e-06, + "loss": 194250.2875, + "step": 49650 + }, + { + "epoch": 0.10031634190782855, + "grad_norm": 306792.5625, + "learning_rate": 9.932e-06, + "loss": 203163.95, + "step": 49660 + }, + { + "epoch": 0.10033654254051237, + "grad_norm": 24083.716796875, + "learning_rate": 9.934e-06, + "loss": 101107.5625, + "step": 49670 + }, + { + "epoch": 0.10035674317319618, + "grad_norm": 139618.03125, + "learning_rate": 9.936000000000001e-06, + "loss": 199236.7625, + "step": 49680 + }, + { + "epoch": 0.10037694380588, + "grad_norm": 39863.484375, + "learning_rate": 9.938e-06, + "loss": 113726.7, + "step": 49690 + }, + { + "epoch": 0.10039714443856382, + "grad_norm": 4753.6259765625, + "learning_rate": 9.940000000000001e-06, + "loss": 135662.9625, + "step": 49700 + }, + { + "epoch": 0.10041734507124764, + "grad_norm": 7239.66064453125, + "learning_rate": 9.942e-06, + "loss": 363609.325, + "step": 49710 + }, + { + "epoch": 0.10043754570393144, + "grad_norm": 443776.3125, + "learning_rate": 9.944e-06, + "loss": 119680.4, + "step": 49720 + }, + { + "epoch": 0.10045774633661526, + "grad_norm": 33108.43359375, + "learning_rate": 9.946000000000001e-06, + "loss": 208707.7375, + "step": 49730 + }, + { + "epoch": 0.10047794696929908, + "grad_norm": 11136.4541015625, + "learning_rate": 9.948e-06, + "loss": 218772.525, + "step": 49740 + }, + { + "epoch": 0.10049814760198289, + "grad_norm": 50927.95703125, + "learning_rate": 9.950000000000001e-06, + "loss": 164740.2375, + "step": 49750 + }, + { + "epoch": 0.10051834823466671, + "grad_norm": 108832.03125, + "learning_rate": 9.952e-06, + "loss": 123247.4625, + "step": 49760 + }, + { + "epoch": 0.10053854886735053, + "grad_norm": 596624.5, + "learning_rate": 9.954e-06, + "loss": 307307.65, + "step": 49770 + }, + { + "epoch": 0.10055874950003434, + "grad_norm": 842571.625, + "learning_rate": 9.956000000000001e-06, + "loss": 216673.075, + "step": 49780 + }, + { + "epoch": 0.10057895013271816, + "grad_norm": 84144.6640625, + "learning_rate": 9.958e-06, + "loss": 235414.8, + "step": 49790 + }, + { + "epoch": 0.10059915076540198, + "grad_norm": 79421.3046875, + "learning_rate": 9.960000000000001e-06, + "loss": 145502.6375, + "step": 49800 + }, + { + "epoch": 0.10061935139808578, + "grad_norm": 173407.4375, + "learning_rate": 9.962e-06, + "loss": 96159.475, + "step": 49810 + }, + { + "epoch": 0.1006395520307696, + "grad_norm": 12716.681640625, + "learning_rate": 9.964e-06, + "loss": 173828.425, + "step": 49820 + }, + { + "epoch": 0.10065975266345342, + "grad_norm": 7858.46923828125, + "learning_rate": 9.966e-06, + "loss": 129981.175, + "step": 49830 + }, + { + "epoch": 0.10067995329613723, + "grad_norm": 89794.03125, + "learning_rate": 9.968000000000002e-06, + "loss": 216192.3, + "step": 49840 + }, + { + "epoch": 0.10070015392882105, + "grad_norm": 304755.28125, + "learning_rate": 9.970000000000001e-06, + "loss": 164194.0375, + "step": 49850 + }, + { + "epoch": 0.10072035456150487, + "grad_norm": 117260.546875, + "learning_rate": 9.972e-06, + "loss": 247868.075, + "step": 49860 + }, + { + "epoch": 0.10074055519418869, + "grad_norm": 56653.76953125, + "learning_rate": 9.974e-06, + "loss": 179546.6375, + "step": 49870 + }, + { + "epoch": 0.1007607558268725, + "grad_norm": 155191.234375, + "learning_rate": 9.976e-06, + "loss": 127166.6875, + "step": 49880 + }, + { + "epoch": 0.10078095645955631, + "grad_norm": 27416.8359375, + "learning_rate": 9.978000000000002e-06, + "loss": 184834.75, + "step": 49890 + }, + { + "epoch": 0.10080115709224013, + "grad_norm": 547775.25, + "learning_rate": 9.980000000000001e-06, + "loss": 139536.05, + "step": 49900 + }, + { + "epoch": 0.10082135772492394, + "grad_norm": 120752.015625, + "learning_rate": 9.982e-06, + "loss": 257507.4, + "step": 49910 + }, + { + "epoch": 0.10084155835760776, + "grad_norm": 29953.62109375, + "learning_rate": 9.984e-06, + "loss": 132140.525, + "step": 49920 + }, + { + "epoch": 0.10086175899029158, + "grad_norm": 18882.669921875, + "learning_rate": 9.986e-06, + "loss": 174273.3125, + "step": 49930 + }, + { + "epoch": 0.10088195962297539, + "grad_norm": 147226.265625, + "learning_rate": 9.988000000000002e-06, + "loss": 234326.675, + "step": 49940 + }, + { + "epoch": 0.1009021602556592, + "grad_norm": 5150.74609375, + "learning_rate": 9.990000000000001e-06, + "loss": 196520.725, + "step": 49950 + }, + { + "epoch": 0.10092236088834303, + "grad_norm": 109004.046875, + "learning_rate": 9.992e-06, + "loss": 180085.3, + "step": 49960 + }, + { + "epoch": 0.10094256152102683, + "grad_norm": 8824.7978515625, + "learning_rate": 9.994000000000001e-06, + "loss": 110507.5625, + "step": 49970 + }, + { + "epoch": 0.10096276215371065, + "grad_norm": 62077.14453125, + "learning_rate": 9.996e-06, + "loss": 153639.7, + "step": 49980 + }, + { + "epoch": 0.10098296278639447, + "grad_norm": 6556.6484375, + "learning_rate": 9.998000000000002e-06, + "loss": 123813.4875, + "step": 49990 + }, + { + "epoch": 0.10100316341907828, + "grad_norm": 14069.9609375, + "learning_rate": 1e-05, + "loss": 178145.5875, + "step": 50000 + }, + { + "epoch": 0.1010233640517621, + "grad_norm": 13877.287109375, + "learning_rate": 9.999999987815305e-06, + "loss": 104981.9625, + "step": 50010 + }, + { + "epoch": 0.10104356468444592, + "grad_norm": 500777.1875, + "learning_rate": 9.999999951261215e-06, + "loss": 137929.675, + "step": 50020 + }, + { + "epoch": 0.10106376531712974, + "grad_norm": 32557.373046875, + "learning_rate": 9.99999989033773e-06, + "loss": 220057.275, + "step": 50030 + }, + { + "epoch": 0.10108396594981355, + "grad_norm": 359326.8125, + "learning_rate": 9.999999805044853e-06, + "loss": 121473.875, + "step": 50040 + }, + { + "epoch": 0.10110416658249737, + "grad_norm": 15552.4013671875, + "learning_rate": 9.999999695382584e-06, + "loss": 145761.7375, + "step": 50050 + }, + { + "epoch": 0.10112436721518119, + "grad_norm": 37478.72265625, + "learning_rate": 9.999999561350923e-06, + "loss": 130743.0375, + "step": 50060 + }, + { + "epoch": 0.10114456784786499, + "grad_norm": 75680.1953125, + "learning_rate": 9.99999940294987e-06, + "loss": 103223.35, + "step": 50070 + }, + { + "epoch": 0.10116476848054881, + "grad_norm": 277546.15625, + "learning_rate": 9.999999220179426e-06, + "loss": 126883.925, + "step": 50080 + }, + { + "epoch": 0.10118496911323263, + "grad_norm": 96417.4140625, + "learning_rate": 9.999999013039593e-06, + "loss": 68319.2375, + "step": 50090 + }, + { + "epoch": 0.10120516974591644, + "grad_norm": 343420.625, + "learning_rate": 9.999998781530372e-06, + "loss": 310092.25, + "step": 50100 + }, + { + "epoch": 0.10122537037860026, + "grad_norm": 588362.5625, + "learning_rate": 9.999998525651761e-06, + "loss": 238865.55, + "step": 50110 + }, + { + "epoch": 0.10124557101128408, + "grad_norm": 208565.234375, + "learning_rate": 9.999998245403766e-06, + "loss": 121367.7375, + "step": 50120 + }, + { + "epoch": 0.10126577164396788, + "grad_norm": 53871.0625, + "learning_rate": 9.999997940786385e-06, + "loss": 92390.225, + "step": 50130 + }, + { + "epoch": 0.1012859722766517, + "grad_norm": 5319.01171875, + "learning_rate": 9.99999761179962e-06, + "loss": 161331.3875, + "step": 50140 + }, + { + "epoch": 0.10130617290933552, + "grad_norm": 34059.6953125, + "learning_rate": 9.999997258443473e-06, + "loss": 77408.5375, + "step": 50150 + }, + { + "epoch": 0.10132637354201933, + "grad_norm": 91090.46875, + "learning_rate": 9.999996880717946e-06, + "loss": 71829.0437, + "step": 50160 + }, + { + "epoch": 0.10134657417470315, + "grad_norm": 60131.37890625, + "learning_rate": 9.999996478623041e-06, + "loss": 260185.75, + "step": 50170 + }, + { + "epoch": 0.10136677480738697, + "grad_norm": 53528.58203125, + "learning_rate": 9.99999605215876e-06, + "loss": 79166.6187, + "step": 50180 + }, + { + "epoch": 0.10138697544007079, + "grad_norm": 175299.828125, + "learning_rate": 9.999995601325104e-06, + "loss": 122208.475, + "step": 50190 + }, + { + "epoch": 0.1014071760727546, + "grad_norm": 867845.6875, + "learning_rate": 9.999995126122076e-06, + "loss": 119971.0125, + "step": 50200 + }, + { + "epoch": 0.10142737670543842, + "grad_norm": 580539.875, + "learning_rate": 9.999994626549678e-06, + "loss": 320597.8, + "step": 50210 + }, + { + "epoch": 0.10144757733812224, + "grad_norm": 6884.39892578125, + "learning_rate": 9.999994102607912e-06, + "loss": 121651.7125, + "step": 50220 + }, + { + "epoch": 0.10146777797080604, + "grad_norm": 45427.46484375, + "learning_rate": 9.999993554296783e-06, + "loss": 167910.1125, + "step": 50230 + }, + { + "epoch": 0.10148797860348986, + "grad_norm": 56000.15234375, + "learning_rate": 9.999992981616292e-06, + "loss": 169213.575, + "step": 50240 + }, + { + "epoch": 0.10150817923617368, + "grad_norm": 333080.78125, + "learning_rate": 9.99999238456644e-06, + "loss": 196669.675, + "step": 50250 + }, + { + "epoch": 0.10152837986885749, + "grad_norm": 66282.046875, + "learning_rate": 9.999991763147232e-06, + "loss": 95448.775, + "step": 50260 + }, + { + "epoch": 0.10154858050154131, + "grad_norm": 442565.0625, + "learning_rate": 9.99999111735867e-06, + "loss": 230594.075, + "step": 50270 + }, + { + "epoch": 0.10156878113422513, + "grad_norm": 18854.98828125, + "learning_rate": 9.999990447200758e-06, + "loss": 251522.65, + "step": 50280 + }, + { + "epoch": 0.10158898176690893, + "grad_norm": 5595.306640625, + "learning_rate": 9.9999897526735e-06, + "loss": 288395.525, + "step": 50290 + }, + { + "epoch": 0.10160918239959275, + "grad_norm": 5990.18505859375, + "learning_rate": 9.999989033776898e-06, + "loss": 307096.925, + "step": 50300 + }, + { + "epoch": 0.10162938303227657, + "grad_norm": 0.0, + "learning_rate": 9.999988290510955e-06, + "loss": 149979.025, + "step": 50310 + }, + { + "epoch": 0.10164958366496038, + "grad_norm": 657274.75, + "learning_rate": 9.999987522875676e-06, + "loss": 127577.4625, + "step": 50320 + }, + { + "epoch": 0.1016697842976442, + "grad_norm": 42308.515625, + "learning_rate": 9.999986730871065e-06, + "loss": 166987.7375, + "step": 50330 + }, + { + "epoch": 0.10168998493032802, + "grad_norm": 812126.9375, + "learning_rate": 9.999985914497124e-06, + "loss": 293978.35, + "step": 50340 + }, + { + "epoch": 0.10171018556301184, + "grad_norm": 73659.6015625, + "learning_rate": 9.999985073753857e-06, + "loss": 154971.5625, + "step": 50350 + }, + { + "epoch": 0.10173038619569565, + "grad_norm": 451581.0, + "learning_rate": 9.999984208641271e-06, + "loss": 168980.525, + "step": 50360 + }, + { + "epoch": 0.10175058682837947, + "grad_norm": 43855.765625, + "learning_rate": 9.999983319159368e-06, + "loss": 127863.6125, + "step": 50370 + }, + { + "epoch": 0.10177078746106329, + "grad_norm": 553291.1875, + "learning_rate": 9.999982405308154e-06, + "loss": 180080.55, + "step": 50380 + }, + { + "epoch": 0.10179098809374709, + "grad_norm": 0.0, + "learning_rate": 9.999981467087629e-06, + "loss": 211397.95, + "step": 50390 + }, + { + "epoch": 0.10181118872643091, + "grad_norm": 148340.3125, + "learning_rate": 9.999980504497803e-06, + "loss": 66092.1187, + "step": 50400 + }, + { + "epoch": 0.10183138935911473, + "grad_norm": 55658.984375, + "learning_rate": 9.999979517538677e-06, + "loss": 79142.1562, + "step": 50410 + }, + { + "epoch": 0.10185158999179854, + "grad_norm": 47321.73828125, + "learning_rate": 9.99997850621026e-06, + "loss": 154638.95, + "step": 50420 + }, + { + "epoch": 0.10187179062448236, + "grad_norm": 14335.2451171875, + "learning_rate": 9.999977470512551e-06, + "loss": 85579.8062, + "step": 50430 + }, + { + "epoch": 0.10189199125716618, + "grad_norm": 5324.52587890625, + "learning_rate": 9.999976410445563e-06, + "loss": 70916.725, + "step": 50440 + }, + { + "epoch": 0.10191219188984998, + "grad_norm": 46336.8671875, + "learning_rate": 9.999975326009292e-06, + "loss": 118920.3125, + "step": 50450 + }, + { + "epoch": 0.1019323925225338, + "grad_norm": 441087.15625, + "learning_rate": 9.999974217203749e-06, + "loss": 149000.725, + "step": 50460 + }, + { + "epoch": 0.10195259315521762, + "grad_norm": 392978.25, + "learning_rate": 9.999973084028938e-06, + "loss": 230658.65, + "step": 50470 + }, + { + "epoch": 0.10197279378790143, + "grad_norm": 438937.65625, + "learning_rate": 9.999971926484865e-06, + "loss": 231768.1, + "step": 50480 + }, + { + "epoch": 0.10199299442058525, + "grad_norm": 367763.96875, + "learning_rate": 9.999970744571534e-06, + "loss": 241821.35, + "step": 50490 + }, + { + "epoch": 0.10201319505326907, + "grad_norm": 108604.59375, + "learning_rate": 9.999969538288953e-06, + "loss": 65471.5563, + "step": 50500 + }, + { + "epoch": 0.10203339568595289, + "grad_norm": 123181.578125, + "learning_rate": 9.999968307637127e-06, + "loss": 85952.8188, + "step": 50510 + }, + { + "epoch": 0.1020535963186367, + "grad_norm": 63759.0546875, + "learning_rate": 9.999967052616061e-06, + "loss": 93675.8188, + "step": 50520 + }, + { + "epoch": 0.10207379695132052, + "grad_norm": 70745.984375, + "learning_rate": 9.999965773225762e-06, + "loss": 336543.55, + "step": 50530 + }, + { + "epoch": 0.10209399758400434, + "grad_norm": 23618.099609375, + "learning_rate": 9.999964469466236e-06, + "loss": 80979.6625, + "step": 50540 + }, + { + "epoch": 0.10211419821668814, + "grad_norm": 23555.45703125, + "learning_rate": 9.999963141337493e-06, + "loss": 133549.525, + "step": 50550 + }, + { + "epoch": 0.10213439884937196, + "grad_norm": 27238.609375, + "learning_rate": 9.999961788839533e-06, + "loss": 118145.95, + "step": 50560 + }, + { + "epoch": 0.10215459948205578, + "grad_norm": 165082.625, + "learning_rate": 9.999960411972366e-06, + "loss": 309305.325, + "step": 50570 + }, + { + "epoch": 0.10217480011473959, + "grad_norm": 4650.18603515625, + "learning_rate": 9.999959010735997e-06, + "loss": 57352.8562, + "step": 50580 + }, + { + "epoch": 0.10219500074742341, + "grad_norm": 97991.7421875, + "learning_rate": 9.999957585130438e-06, + "loss": 84438.9062, + "step": 50590 + }, + { + "epoch": 0.10221520138010723, + "grad_norm": 26306.0703125, + "learning_rate": 9.999956135155688e-06, + "loss": 59551.6625, + "step": 50600 + }, + { + "epoch": 0.10223540201279104, + "grad_norm": 223941.15625, + "learning_rate": 9.999954660811761e-06, + "loss": 173352.075, + "step": 50610 + }, + { + "epoch": 0.10225560264547486, + "grad_norm": 107164.09375, + "learning_rate": 9.99995316209866e-06, + "loss": 98656.1062, + "step": 50620 + }, + { + "epoch": 0.10227580327815868, + "grad_norm": 129465.015625, + "learning_rate": 9.999951639016396e-06, + "loss": 386715.35, + "step": 50630 + }, + { + "epoch": 0.10229600391084248, + "grad_norm": 36869.3125, + "learning_rate": 9.999950091564972e-06, + "loss": 160691.9375, + "step": 50640 + }, + { + "epoch": 0.1023162045435263, + "grad_norm": 0.0, + "learning_rate": 9.999948519744397e-06, + "loss": 57489.6687, + "step": 50650 + }, + { + "epoch": 0.10233640517621012, + "grad_norm": 47413.48828125, + "learning_rate": 9.999946923554681e-06, + "loss": 193361.45, + "step": 50660 + }, + { + "epoch": 0.10235660580889394, + "grad_norm": 115579.0625, + "learning_rate": 9.99994530299583e-06, + "loss": 215783.0, + "step": 50670 + }, + { + "epoch": 0.10237680644157775, + "grad_norm": 49732.9453125, + "learning_rate": 9.99994365806785e-06, + "loss": 130134.0125, + "step": 50680 + }, + { + "epoch": 0.10239700707426157, + "grad_norm": 0.0, + "learning_rate": 9.999941988770754e-06, + "loss": 239513.475, + "step": 50690 + }, + { + "epoch": 0.10241720770694539, + "grad_norm": 50350.2421875, + "learning_rate": 9.999940295104546e-06, + "loss": 145267.075, + "step": 50700 + }, + { + "epoch": 0.1024374083396292, + "grad_norm": 0.0, + "learning_rate": 9.999938577069235e-06, + "loss": 136358.425, + "step": 50710 + }, + { + "epoch": 0.10245760897231301, + "grad_norm": 46376.10546875, + "learning_rate": 9.99993683466483e-06, + "loss": 110545.075, + "step": 50720 + }, + { + "epoch": 0.10247780960499683, + "grad_norm": 782817.625, + "learning_rate": 9.999935067891339e-06, + "loss": 177344.75, + "step": 50730 + }, + { + "epoch": 0.10249801023768064, + "grad_norm": 59496.265625, + "learning_rate": 9.999933276748772e-06, + "loss": 102334.2063, + "step": 50740 + }, + { + "epoch": 0.10251821087036446, + "grad_norm": 93645.9609375, + "learning_rate": 9.999931461237135e-06, + "loss": 239598.55, + "step": 50750 + }, + { + "epoch": 0.10253841150304828, + "grad_norm": 0.0, + "learning_rate": 9.99992962135644e-06, + "loss": 48412.875, + "step": 50760 + }, + { + "epoch": 0.10255861213573209, + "grad_norm": 123319.96875, + "learning_rate": 9.999927757106693e-06, + "loss": 123658.775, + "step": 50770 + }, + { + "epoch": 0.1025788127684159, + "grad_norm": 88232.8671875, + "learning_rate": 9.999925868487905e-06, + "loss": 91395.1313, + "step": 50780 + }, + { + "epoch": 0.10259901340109973, + "grad_norm": 3516.827880859375, + "learning_rate": 9.999923955500085e-06, + "loss": 322491.65, + "step": 50790 + }, + { + "epoch": 0.10261921403378353, + "grad_norm": 1855.8297119140625, + "learning_rate": 9.999922018143242e-06, + "loss": 172134.5875, + "step": 50800 + }, + { + "epoch": 0.10263941466646735, + "grad_norm": 518976.40625, + "learning_rate": 9.999920056417385e-06, + "loss": 108453.125, + "step": 50810 + }, + { + "epoch": 0.10265961529915117, + "grad_norm": 32457.083984375, + "learning_rate": 9.999918070322525e-06, + "loss": 252809.475, + "step": 50820 + }, + { + "epoch": 0.10267981593183499, + "grad_norm": 242924.015625, + "learning_rate": 9.999916059858669e-06, + "loss": 150208.15, + "step": 50830 + }, + { + "epoch": 0.1027000165645188, + "grad_norm": 12725.9140625, + "learning_rate": 9.999914025025831e-06, + "loss": 166316.325, + "step": 50840 + }, + { + "epoch": 0.10272021719720262, + "grad_norm": 38896.62890625, + "learning_rate": 9.999911965824018e-06, + "loss": 105988.7625, + "step": 50850 + }, + { + "epoch": 0.10274041782988644, + "grad_norm": 58846.41796875, + "learning_rate": 9.99990988225324e-06, + "loss": 88113.0375, + "step": 50860 + }, + { + "epoch": 0.10276061846257024, + "grad_norm": 1309.5352783203125, + "learning_rate": 9.999907774313507e-06, + "loss": 320198.775, + "step": 50870 + }, + { + "epoch": 0.10278081909525406, + "grad_norm": 203127.609375, + "learning_rate": 9.99990564200483e-06, + "loss": 129059.1125, + "step": 50880 + }, + { + "epoch": 0.10280101972793788, + "grad_norm": 0.0, + "learning_rate": 9.999903485327221e-06, + "loss": 145506.1375, + "step": 50890 + }, + { + "epoch": 0.10282122036062169, + "grad_norm": 38436.89453125, + "learning_rate": 9.999901304280686e-06, + "loss": 169141.6125, + "step": 50900 + }, + { + "epoch": 0.10284142099330551, + "grad_norm": 129082.234375, + "learning_rate": 9.99989909886524e-06, + "loss": 152268.6375, + "step": 50910 + }, + { + "epoch": 0.10286162162598933, + "grad_norm": 116358.265625, + "learning_rate": 9.999896869080893e-06, + "loss": 186348.4875, + "step": 50920 + }, + { + "epoch": 0.10288182225867314, + "grad_norm": 167313.046875, + "learning_rate": 9.999894614927655e-06, + "loss": 215178.25, + "step": 50930 + }, + { + "epoch": 0.10290202289135696, + "grad_norm": 139144.453125, + "learning_rate": 9.999892336405534e-06, + "loss": 166605.0875, + "step": 50940 + }, + { + "epoch": 0.10292222352404078, + "grad_norm": 86628.8984375, + "learning_rate": 9.999890033514547e-06, + "loss": 86633.5875, + "step": 50950 + }, + { + "epoch": 0.10294242415672458, + "grad_norm": 138571.796875, + "learning_rate": 9.999887706254703e-06, + "loss": 176097.6, + "step": 50960 + }, + { + "epoch": 0.1029626247894084, + "grad_norm": 30913.193359375, + "learning_rate": 9.999885354626011e-06, + "loss": 124489.1625, + "step": 50970 + }, + { + "epoch": 0.10298282542209222, + "grad_norm": 53996.87890625, + "learning_rate": 9.999882978628485e-06, + "loss": 89550.5188, + "step": 50980 + }, + { + "epoch": 0.10300302605477604, + "grad_norm": 2554794.25, + "learning_rate": 9.999880578262135e-06, + "loss": 393523.05, + "step": 50990 + }, + { + "epoch": 0.10302322668745985, + "grad_norm": 274567.78125, + "learning_rate": 9.999878153526974e-06, + "loss": 162703.9375, + "step": 51000 + }, + { + "epoch": 0.10304342732014367, + "grad_norm": 53046.2421875, + "learning_rate": 9.999875704423015e-06, + "loss": 113175.25, + "step": 51010 + }, + { + "epoch": 0.10306362795282749, + "grad_norm": 24038.0703125, + "learning_rate": 9.999873230950265e-06, + "loss": 135933.7875, + "step": 51020 + }, + { + "epoch": 0.1030838285855113, + "grad_norm": 36907.45703125, + "learning_rate": 9.99987073310874e-06, + "loss": 239075.025, + "step": 51030 + }, + { + "epoch": 0.10310402921819511, + "grad_norm": 24381.240234375, + "learning_rate": 9.999868210898454e-06, + "loss": 73981.9375, + "step": 51040 + }, + { + "epoch": 0.10312422985087893, + "grad_norm": 49927.94140625, + "learning_rate": 9.999865664319414e-06, + "loss": 398823.25, + "step": 51050 + }, + { + "epoch": 0.10314443048356274, + "grad_norm": 17444.634765625, + "learning_rate": 9.999863093371638e-06, + "loss": 266676.9, + "step": 51060 + }, + { + "epoch": 0.10316463111624656, + "grad_norm": 22020.697265625, + "learning_rate": 9.999860498055134e-06, + "loss": 63224.9437, + "step": 51070 + }, + { + "epoch": 0.10318483174893038, + "grad_norm": 662491.625, + "learning_rate": 9.999857878369917e-06, + "loss": 200193.25, + "step": 51080 + }, + { + "epoch": 0.10320503238161419, + "grad_norm": 16546.400390625, + "learning_rate": 9.999855234315997e-06, + "loss": 100148.5, + "step": 51090 + }, + { + "epoch": 0.103225233014298, + "grad_norm": 66178.7109375, + "learning_rate": 9.99985256589339e-06, + "loss": 303253.2, + "step": 51100 + }, + { + "epoch": 0.10324543364698183, + "grad_norm": 1478.9439697265625, + "learning_rate": 9.999849873102108e-06, + "loss": 97527.8, + "step": 51110 + }, + { + "epoch": 0.10326563427966563, + "grad_norm": 262877.0625, + "learning_rate": 9.999847155942165e-06, + "loss": 69099.975, + "step": 51120 + }, + { + "epoch": 0.10328583491234945, + "grad_norm": 217987.21875, + "learning_rate": 9.999844414413574e-06, + "loss": 119011.9375, + "step": 51130 + }, + { + "epoch": 0.10330603554503327, + "grad_norm": 0.0, + "learning_rate": 9.999841648516347e-06, + "loss": 65866.5312, + "step": 51140 + }, + { + "epoch": 0.10332623617771709, + "grad_norm": 9761.560546875, + "learning_rate": 9.999838858250497e-06, + "loss": 143849.375, + "step": 51150 + }, + { + "epoch": 0.1033464368104009, + "grad_norm": 4892.84326171875, + "learning_rate": 9.99983604361604e-06, + "loss": 73686.825, + "step": 51160 + }, + { + "epoch": 0.10336663744308472, + "grad_norm": 305432.1875, + "learning_rate": 9.999833204612988e-06, + "loss": 184145.225, + "step": 51170 + }, + { + "epoch": 0.10338683807576854, + "grad_norm": 291997.40625, + "learning_rate": 9.999830341241354e-06, + "loss": 147288.625, + "step": 51180 + }, + { + "epoch": 0.10340703870845235, + "grad_norm": 129642.7265625, + "learning_rate": 9.999827453501156e-06, + "loss": 229303.225, + "step": 51190 + }, + { + "epoch": 0.10342723934113617, + "grad_norm": 404646.3125, + "learning_rate": 9.999824541392404e-06, + "loss": 108932.6375, + "step": 51200 + }, + { + "epoch": 0.10344743997381999, + "grad_norm": 367046.03125, + "learning_rate": 9.999821604915114e-06, + "loss": 224350.05, + "step": 51210 + }, + { + "epoch": 0.10346764060650379, + "grad_norm": 27270.4140625, + "learning_rate": 9.999818644069299e-06, + "loss": 118020.65, + "step": 51220 + }, + { + "epoch": 0.10348784123918761, + "grad_norm": 56158.9296875, + "learning_rate": 9.999815658854976e-06, + "loss": 58393.8187, + "step": 51230 + }, + { + "epoch": 0.10350804187187143, + "grad_norm": 527135.0, + "learning_rate": 9.999812649272157e-06, + "loss": 124408.2, + "step": 51240 + }, + { + "epoch": 0.10352824250455524, + "grad_norm": 109706.90625, + "learning_rate": 9.999809615320857e-06, + "loss": 53465.6125, + "step": 51250 + }, + { + "epoch": 0.10354844313723906, + "grad_norm": 355266.28125, + "learning_rate": 9.999806557001092e-06, + "loss": 133786.725, + "step": 51260 + }, + { + "epoch": 0.10356864376992288, + "grad_norm": 33377.1875, + "learning_rate": 9.999803474312877e-06, + "loss": 222013.825, + "step": 51270 + }, + { + "epoch": 0.10358884440260668, + "grad_norm": 2200067.0, + "learning_rate": 9.999800367256225e-06, + "loss": 374907.1, + "step": 51280 + }, + { + "epoch": 0.1036090450352905, + "grad_norm": 276584.21875, + "learning_rate": 9.999797235831153e-06, + "loss": 253189.625, + "step": 51290 + }, + { + "epoch": 0.10362924566797432, + "grad_norm": 10801.8505859375, + "learning_rate": 9.999794080037675e-06, + "loss": 62823.2125, + "step": 51300 + }, + { + "epoch": 0.10364944630065814, + "grad_norm": 1549994.125, + "learning_rate": 9.999790899875807e-06, + "loss": 261427.45, + "step": 51310 + }, + { + "epoch": 0.10366964693334195, + "grad_norm": 104371.9453125, + "learning_rate": 9.999787695345565e-06, + "loss": 202064.975, + "step": 51320 + }, + { + "epoch": 0.10368984756602577, + "grad_norm": 547226.125, + "learning_rate": 9.999784466446965e-06, + "loss": 160725.85, + "step": 51330 + }, + { + "epoch": 0.10371004819870959, + "grad_norm": 1339006.875, + "learning_rate": 9.99978121318002e-06, + "loss": 193042.2125, + "step": 51340 + }, + { + "epoch": 0.1037302488313934, + "grad_norm": 70313.0234375, + "learning_rate": 9.99977793554475e-06, + "loss": 126629.4, + "step": 51350 + }, + { + "epoch": 0.10375044946407722, + "grad_norm": 3511.90087890625, + "learning_rate": 9.999774633541169e-06, + "loss": 78603.525, + "step": 51360 + }, + { + "epoch": 0.10377065009676104, + "grad_norm": 588111.9375, + "learning_rate": 9.999771307169291e-06, + "loss": 208533.2375, + "step": 51370 + }, + { + "epoch": 0.10379085072944484, + "grad_norm": 56215.5234375, + "learning_rate": 9.999767956429135e-06, + "loss": 108721.1, + "step": 51380 + }, + { + "epoch": 0.10381105136212866, + "grad_norm": 318156.0, + "learning_rate": 9.999764581320714e-06, + "loss": 221447.15, + "step": 51390 + }, + { + "epoch": 0.10383125199481248, + "grad_norm": 1597057.375, + "learning_rate": 9.99976118184405e-06, + "loss": 268299.65, + "step": 51400 + }, + { + "epoch": 0.10385145262749629, + "grad_norm": 147075.84375, + "learning_rate": 9.999757757999155e-06, + "loss": 77506.925, + "step": 51410 + }, + { + "epoch": 0.10387165326018011, + "grad_norm": 145645.953125, + "learning_rate": 9.999754309786047e-06, + "loss": 90245.0625, + "step": 51420 + }, + { + "epoch": 0.10389185389286393, + "grad_norm": 172546.1875, + "learning_rate": 9.999750837204743e-06, + "loss": 94922.1375, + "step": 51430 + }, + { + "epoch": 0.10391205452554773, + "grad_norm": 155441.96875, + "learning_rate": 9.99974734025526e-06, + "loss": 73961.575, + "step": 51440 + }, + { + "epoch": 0.10393225515823155, + "grad_norm": 249393.875, + "learning_rate": 9.999743818937614e-06, + "loss": 166757.7375, + "step": 51450 + }, + { + "epoch": 0.10395245579091537, + "grad_norm": 67178.65625, + "learning_rate": 9.999740273251824e-06, + "loss": 225976.75, + "step": 51460 + }, + { + "epoch": 0.1039726564235992, + "grad_norm": 800465.5625, + "learning_rate": 9.999736703197907e-06, + "loss": 109502.525, + "step": 51470 + }, + { + "epoch": 0.103992857056283, + "grad_norm": 350575.25, + "learning_rate": 9.999733108775878e-06, + "loss": 130675.4, + "step": 51480 + }, + { + "epoch": 0.10401305768896682, + "grad_norm": 25652.880859375, + "learning_rate": 9.999729489985757e-06, + "loss": 45598.4656, + "step": 51490 + }, + { + "epoch": 0.10403325832165064, + "grad_norm": 944341.4375, + "learning_rate": 9.999725846827562e-06, + "loss": 197533.4, + "step": 51500 + }, + { + "epoch": 0.10405345895433445, + "grad_norm": 314406.25, + "learning_rate": 9.999722179301309e-06, + "loss": 239438.85, + "step": 51510 + }, + { + "epoch": 0.10407365958701827, + "grad_norm": 639632.625, + "learning_rate": 9.999718487407015e-06, + "loss": 122725.4125, + "step": 51520 + }, + { + "epoch": 0.10409386021970209, + "grad_norm": 59250.30859375, + "learning_rate": 9.9997147711447e-06, + "loss": 92324.375, + "step": 51530 + }, + { + "epoch": 0.10411406085238589, + "grad_norm": 130950.9765625, + "learning_rate": 9.999711030514383e-06, + "loss": 215234.825, + "step": 51540 + }, + { + "epoch": 0.10413426148506971, + "grad_norm": 65112.2109375, + "learning_rate": 9.99970726551608e-06, + "loss": 69766.8, + "step": 51550 + }, + { + "epoch": 0.10415446211775353, + "grad_norm": 184600.28125, + "learning_rate": 9.999703476149808e-06, + "loss": 174347.0375, + "step": 51560 + }, + { + "epoch": 0.10417466275043734, + "grad_norm": 76825.7890625, + "learning_rate": 9.999699662415592e-06, + "loss": 181786.3875, + "step": 51570 + }, + { + "epoch": 0.10419486338312116, + "grad_norm": 5364.4228515625, + "learning_rate": 9.999695824313443e-06, + "loss": 185517.0125, + "step": 51580 + }, + { + "epoch": 0.10421506401580498, + "grad_norm": 95621.3203125, + "learning_rate": 9.999691961843385e-06, + "loss": 276598.875, + "step": 51590 + }, + { + "epoch": 0.10423526464848878, + "grad_norm": 87196.296875, + "learning_rate": 9.999688075005434e-06, + "loss": 182999.9625, + "step": 51600 + }, + { + "epoch": 0.1042554652811726, + "grad_norm": 34553.33984375, + "learning_rate": 9.999684163799609e-06, + "loss": 130225.175, + "step": 51610 + }, + { + "epoch": 0.10427566591385642, + "grad_norm": 29485.41015625, + "learning_rate": 9.99968022822593e-06, + "loss": 120833.275, + "step": 51620 + }, + { + "epoch": 0.10429586654654023, + "grad_norm": 91728.3671875, + "learning_rate": 9.999676268284416e-06, + "loss": 91996.7625, + "step": 51630 + }, + { + "epoch": 0.10431606717922405, + "grad_norm": 47796.7421875, + "learning_rate": 9.999672283975085e-06, + "loss": 484226.3, + "step": 51640 + }, + { + "epoch": 0.10433626781190787, + "grad_norm": 40859.91015625, + "learning_rate": 9.99966827529796e-06, + "loss": 360451.675, + "step": 51650 + }, + { + "epoch": 0.10435646844459169, + "grad_norm": 6896.607421875, + "learning_rate": 9.999664242253058e-06, + "loss": 63167.875, + "step": 51660 + }, + { + "epoch": 0.1043766690772755, + "grad_norm": 566240.0625, + "learning_rate": 9.999660184840398e-06, + "loss": 391964.5, + "step": 51670 + }, + { + "epoch": 0.10439686970995932, + "grad_norm": 88719.6953125, + "learning_rate": 9.999656103060001e-06, + "loss": 132413.1625, + "step": 51680 + }, + { + "epoch": 0.10441707034264314, + "grad_norm": 36647.7265625, + "learning_rate": 9.999651996911886e-06, + "loss": 96282.5188, + "step": 51690 + }, + { + "epoch": 0.10443727097532694, + "grad_norm": 5172.80517578125, + "learning_rate": 9.999647866396073e-06, + "loss": 177258.2375, + "step": 51700 + }, + { + "epoch": 0.10445747160801076, + "grad_norm": 69375.6171875, + "learning_rate": 9.999643711512586e-06, + "loss": 230104.55, + "step": 51710 + }, + { + "epoch": 0.10447767224069458, + "grad_norm": 78841.6875, + "learning_rate": 9.999639532261438e-06, + "loss": 41120.0125, + "step": 51720 + }, + { + "epoch": 0.10449787287337839, + "grad_norm": 1459763.375, + "learning_rate": 9.999635328642655e-06, + "loss": 274355.9, + "step": 51730 + }, + { + "epoch": 0.10451807350606221, + "grad_norm": 0.0, + "learning_rate": 9.999631100656255e-06, + "loss": 160526.225, + "step": 51740 + }, + { + "epoch": 0.10453827413874603, + "grad_norm": 390184.9375, + "learning_rate": 9.999626848302261e-06, + "loss": 158017.85, + "step": 51750 + }, + { + "epoch": 0.10455847477142984, + "grad_norm": 93016.0546875, + "learning_rate": 9.99962257158069e-06, + "loss": 165096.725, + "step": 51760 + }, + { + "epoch": 0.10457867540411366, + "grad_norm": 956446.5625, + "learning_rate": 9.999618270491567e-06, + "loss": 136158.35, + "step": 51770 + }, + { + "epoch": 0.10459887603679748, + "grad_norm": 123163.109375, + "learning_rate": 9.999613945034909e-06, + "loss": 202780.6125, + "step": 51780 + }, + { + "epoch": 0.10461907666948128, + "grad_norm": 33689.09765625, + "learning_rate": 9.999609595210743e-06, + "loss": 48960.5344, + "step": 51790 + }, + { + "epoch": 0.1046392773021651, + "grad_norm": 102015.7578125, + "learning_rate": 9.999605221019082e-06, + "loss": 248173.15, + "step": 51800 + }, + { + "epoch": 0.10465947793484892, + "grad_norm": 31239.044921875, + "learning_rate": 9.999600822459952e-06, + "loss": 59203.825, + "step": 51810 + }, + { + "epoch": 0.10467967856753274, + "grad_norm": 154129.421875, + "learning_rate": 9.999596399533375e-06, + "loss": 286958.35, + "step": 51820 + }, + { + "epoch": 0.10469987920021655, + "grad_norm": 15041.455078125, + "learning_rate": 9.999591952239371e-06, + "loss": 100190.5375, + "step": 51830 + }, + { + "epoch": 0.10472007983290037, + "grad_norm": 257016.171875, + "learning_rate": 9.999587480577964e-06, + "loss": 80839.6875, + "step": 51840 + }, + { + "epoch": 0.10474028046558419, + "grad_norm": 171096.796875, + "learning_rate": 9.999582984549172e-06, + "loss": 98966.925, + "step": 51850 + }, + { + "epoch": 0.104760481098268, + "grad_norm": 7984.30712890625, + "learning_rate": 9.99957846415302e-06, + "loss": 115606.725, + "step": 51860 + }, + { + "epoch": 0.10478068173095181, + "grad_norm": 70379.5234375, + "learning_rate": 9.999573919389527e-06, + "loss": 139137.3, + "step": 51870 + }, + { + "epoch": 0.10480088236363563, + "grad_norm": 60940.1875, + "learning_rate": 9.999569350258717e-06, + "loss": 322044.35, + "step": 51880 + }, + { + "epoch": 0.10482108299631944, + "grad_norm": 121937.6015625, + "learning_rate": 9.999564756760616e-06, + "loss": 189303.55, + "step": 51890 + }, + { + "epoch": 0.10484128362900326, + "grad_norm": 66176.5390625, + "learning_rate": 9.999560138895238e-06, + "loss": 141858.8375, + "step": 51900 + }, + { + "epoch": 0.10486148426168708, + "grad_norm": 299843.125, + "learning_rate": 9.999555496662614e-06, + "loss": 135560.25, + "step": 51910 + }, + { + "epoch": 0.10488168489437089, + "grad_norm": 341106.875, + "learning_rate": 9.999550830062762e-06, + "loss": 142663.65, + "step": 51920 + }, + { + "epoch": 0.1049018855270547, + "grad_norm": 83478.578125, + "learning_rate": 9.999546139095706e-06, + "loss": 50990.8313, + "step": 51930 + }, + { + "epoch": 0.10492208615973853, + "grad_norm": 325367.8125, + "learning_rate": 9.999541423761468e-06, + "loss": 235498.85, + "step": 51940 + }, + { + "epoch": 0.10494228679242233, + "grad_norm": 23171.7109375, + "learning_rate": 9.999536684060071e-06, + "loss": 303805.525, + "step": 51950 + }, + { + "epoch": 0.10496248742510615, + "grad_norm": 854740.625, + "learning_rate": 9.999531919991538e-06, + "loss": 155765.875, + "step": 51960 + }, + { + "epoch": 0.10498268805778997, + "grad_norm": 308322.9375, + "learning_rate": 9.999527131555894e-06, + "loss": 161814.575, + "step": 51970 + }, + { + "epoch": 0.10500288869047379, + "grad_norm": 87555.484375, + "learning_rate": 9.99952231875316e-06, + "loss": 203872.3625, + "step": 51980 + }, + { + "epoch": 0.1050230893231576, + "grad_norm": 28925.265625, + "learning_rate": 9.999517481583363e-06, + "loss": 61307.7625, + "step": 51990 + }, + { + "epoch": 0.10504328995584142, + "grad_norm": 311326.8125, + "learning_rate": 9.999512620046523e-06, + "loss": 93239.25, + "step": 52000 + }, + { + "epoch": 0.10506349058852524, + "grad_norm": 118993.234375, + "learning_rate": 9.999507734142663e-06, + "loss": 104798.4438, + "step": 52010 + }, + { + "epoch": 0.10508369122120904, + "grad_norm": 37587.96875, + "learning_rate": 9.999502823871809e-06, + "loss": 82339.3875, + "step": 52020 + }, + { + "epoch": 0.10510389185389286, + "grad_norm": 231354.796875, + "learning_rate": 9.999497889233987e-06, + "loss": 117838.7125, + "step": 52030 + }, + { + "epoch": 0.10512409248657668, + "grad_norm": 122762.0859375, + "learning_rate": 9.999492930229217e-06, + "loss": 84532.0813, + "step": 52040 + }, + { + "epoch": 0.10514429311926049, + "grad_norm": 5320.54638671875, + "learning_rate": 9.999487946857526e-06, + "loss": 117439.825, + "step": 52050 + }, + { + "epoch": 0.10516449375194431, + "grad_norm": 71587.4921875, + "learning_rate": 9.999482939118936e-06, + "loss": 105341.725, + "step": 52060 + }, + { + "epoch": 0.10518469438462813, + "grad_norm": 325296.84375, + "learning_rate": 9.999477907013473e-06, + "loss": 248595.1, + "step": 52070 + }, + { + "epoch": 0.10520489501731194, + "grad_norm": 183132.8125, + "learning_rate": 9.999472850541161e-06, + "loss": 283963.0, + "step": 52080 + }, + { + "epoch": 0.10522509564999576, + "grad_norm": 119270.3046875, + "learning_rate": 9.999467769702023e-06, + "loss": 162483.5875, + "step": 52090 + }, + { + "epoch": 0.10524529628267958, + "grad_norm": 130270.0390625, + "learning_rate": 9.999462664496088e-06, + "loss": 89672.5562, + "step": 52100 + }, + { + "epoch": 0.10526549691536338, + "grad_norm": 1040030.0625, + "learning_rate": 9.999457534923377e-06, + "loss": 298630.1, + "step": 52110 + }, + { + "epoch": 0.1052856975480472, + "grad_norm": 43004.2109375, + "learning_rate": 9.999452380983915e-06, + "loss": 195212.3, + "step": 52120 + }, + { + "epoch": 0.10530589818073102, + "grad_norm": 7323.94677734375, + "learning_rate": 9.999447202677732e-06, + "loss": 120674.1, + "step": 52130 + }, + { + "epoch": 0.10532609881341484, + "grad_norm": 72991.6171875, + "learning_rate": 9.999442000004848e-06, + "loss": 133477.825, + "step": 52140 + }, + { + "epoch": 0.10534629944609865, + "grad_norm": 451371.875, + "learning_rate": 9.99943677296529e-06, + "loss": 177692.0, + "step": 52150 + }, + { + "epoch": 0.10536650007878247, + "grad_norm": 134145.34375, + "learning_rate": 9.999431521559081e-06, + "loss": 120364.7625, + "step": 52160 + }, + { + "epoch": 0.10538670071146629, + "grad_norm": 6545.10009765625, + "learning_rate": 9.999426245786253e-06, + "loss": 36993.5563, + "step": 52170 + }, + { + "epoch": 0.1054069013441501, + "grad_norm": 172153.0, + "learning_rate": 9.999420945646828e-06, + "loss": 165183.425, + "step": 52180 + }, + { + "epoch": 0.10542710197683391, + "grad_norm": 117947.0703125, + "learning_rate": 9.99941562114083e-06, + "loss": 45812.2937, + "step": 52190 + }, + { + "epoch": 0.10544730260951773, + "grad_norm": 6768.6162109375, + "learning_rate": 9.999410272268285e-06, + "loss": 102507.8313, + "step": 52200 + }, + { + "epoch": 0.10546750324220154, + "grad_norm": 2970.314453125, + "learning_rate": 9.999404899029222e-06, + "loss": 147438.05, + "step": 52210 + }, + { + "epoch": 0.10548770387488536, + "grad_norm": 134191.65625, + "learning_rate": 9.999399501423667e-06, + "loss": 130042.325, + "step": 52220 + }, + { + "epoch": 0.10550790450756918, + "grad_norm": 3843.542724609375, + "learning_rate": 9.999394079451643e-06, + "loss": 90510.7375, + "step": 52230 + }, + { + "epoch": 0.10552810514025299, + "grad_norm": 12223.322265625, + "learning_rate": 9.99938863311318e-06, + "loss": 45516.6531, + "step": 52240 + }, + { + "epoch": 0.1055483057729368, + "grad_norm": 57418.88671875, + "learning_rate": 9.999383162408303e-06, + "loss": 125669.0625, + "step": 52250 + }, + { + "epoch": 0.10556850640562063, + "grad_norm": 677611.3125, + "learning_rate": 9.99937766733704e-06, + "loss": 135137.55, + "step": 52260 + }, + { + "epoch": 0.10558870703830443, + "grad_norm": 120980.6171875, + "learning_rate": 9.999372147899416e-06, + "loss": 76824.9563, + "step": 52270 + }, + { + "epoch": 0.10560890767098825, + "grad_norm": 26710.021484375, + "learning_rate": 9.999366604095458e-06, + "loss": 313943.375, + "step": 52280 + }, + { + "epoch": 0.10562910830367207, + "grad_norm": 213309.46875, + "learning_rate": 9.999361035925193e-06, + "loss": 267359.5, + "step": 52290 + }, + { + "epoch": 0.10564930893635589, + "grad_norm": 80339.4375, + "learning_rate": 9.999355443388649e-06, + "loss": 177825.525, + "step": 52300 + }, + { + "epoch": 0.1056695095690397, + "grad_norm": 390460.5, + "learning_rate": 9.999349826485854e-06, + "loss": 144689.225, + "step": 52310 + }, + { + "epoch": 0.10568971020172352, + "grad_norm": 52006.33203125, + "learning_rate": 9.999344185216833e-06, + "loss": 157039.075, + "step": 52320 + }, + { + "epoch": 0.10570991083440734, + "grad_norm": 841614.6875, + "learning_rate": 9.999338519581616e-06, + "loss": 130844.6, + "step": 52330 + }, + { + "epoch": 0.10573011146709115, + "grad_norm": 150675.71875, + "learning_rate": 9.999332829580227e-06, + "loss": 157165.9625, + "step": 52340 + }, + { + "epoch": 0.10575031209977497, + "grad_norm": 209654.390625, + "learning_rate": 9.999327115212698e-06, + "loss": 141705.1125, + "step": 52350 + }, + { + "epoch": 0.10577051273245879, + "grad_norm": 9258.96875, + "learning_rate": 9.999321376479054e-06, + "loss": 132175.975, + "step": 52360 + }, + { + "epoch": 0.10579071336514259, + "grad_norm": 17581.287109375, + "learning_rate": 9.999315613379326e-06, + "loss": 106719.2375, + "step": 52370 + }, + { + "epoch": 0.10581091399782641, + "grad_norm": 10836.95703125, + "learning_rate": 9.999309825913538e-06, + "loss": 112966.475, + "step": 52380 + }, + { + "epoch": 0.10583111463051023, + "grad_norm": 603536.1875, + "learning_rate": 9.999304014081721e-06, + "loss": 111059.6375, + "step": 52390 + }, + { + "epoch": 0.10585131526319404, + "grad_norm": 10038.173828125, + "learning_rate": 9.999298177883902e-06, + "loss": 108732.9, + "step": 52400 + }, + { + "epoch": 0.10587151589587786, + "grad_norm": 293009.21875, + "learning_rate": 9.999292317320112e-06, + "loss": 76714.825, + "step": 52410 + }, + { + "epoch": 0.10589171652856168, + "grad_norm": 323730.6875, + "learning_rate": 9.999286432390376e-06, + "loss": 287587.225, + "step": 52420 + }, + { + "epoch": 0.10591191716124548, + "grad_norm": 29177.341796875, + "learning_rate": 9.999280523094724e-06, + "loss": 100374.0063, + "step": 52430 + }, + { + "epoch": 0.1059321177939293, + "grad_norm": 21012.578125, + "learning_rate": 9.999274589433186e-06, + "loss": 183153.1375, + "step": 52440 + }, + { + "epoch": 0.10595231842661312, + "grad_norm": 107660.0078125, + "learning_rate": 9.99926863140579e-06, + "loss": 166569.35, + "step": 52450 + }, + { + "epoch": 0.10597251905929694, + "grad_norm": 316879.375, + "learning_rate": 9.999262649012564e-06, + "loss": 125563.9875, + "step": 52460 + }, + { + "epoch": 0.10599271969198075, + "grad_norm": 20607.783203125, + "learning_rate": 9.99925664225354e-06, + "loss": 22828.3984, + "step": 52470 + }, + { + "epoch": 0.10601292032466457, + "grad_norm": 317218.84375, + "learning_rate": 9.999250611128743e-06, + "loss": 179496.4875, + "step": 52480 + }, + { + "epoch": 0.10603312095734839, + "grad_norm": 16542.095703125, + "learning_rate": 9.999244555638205e-06, + "loss": 43333.9531, + "step": 52490 + }, + { + "epoch": 0.1060533215900322, + "grad_norm": 49868.0625, + "learning_rate": 9.999238475781957e-06, + "loss": 60222.35, + "step": 52500 + }, + { + "epoch": 0.10607352222271602, + "grad_norm": 6264.6044921875, + "learning_rate": 9.999232371560027e-06, + "loss": 99976.8125, + "step": 52510 + }, + { + "epoch": 0.10609372285539984, + "grad_norm": 678288.875, + "learning_rate": 9.999226242972445e-06, + "loss": 97272.6375, + "step": 52520 + }, + { + "epoch": 0.10611392348808364, + "grad_norm": 162474.53125, + "learning_rate": 9.999220090019238e-06, + "loss": 146801.925, + "step": 52530 + }, + { + "epoch": 0.10613412412076746, + "grad_norm": 263082.34375, + "learning_rate": 9.99921391270044e-06, + "loss": 170995.35, + "step": 52540 + }, + { + "epoch": 0.10615432475345128, + "grad_norm": 319480.40625, + "learning_rate": 9.999207711016081e-06, + "loss": 178460.3, + "step": 52550 + }, + { + "epoch": 0.10617452538613509, + "grad_norm": 46904.08203125, + "learning_rate": 9.999201484966188e-06, + "loss": 71268.7688, + "step": 52560 + }, + { + "epoch": 0.10619472601881891, + "grad_norm": 48779.96875, + "learning_rate": 9.999195234550796e-06, + "loss": 95546.4812, + "step": 52570 + }, + { + "epoch": 0.10621492665150273, + "grad_norm": 180313.953125, + "learning_rate": 9.99918895976993e-06, + "loss": 226147.85, + "step": 52580 + }, + { + "epoch": 0.10623512728418653, + "grad_norm": 122313.6875, + "learning_rate": 9.999182660623625e-06, + "loss": 107570.55, + "step": 52590 + }, + { + "epoch": 0.10625532791687035, + "grad_norm": 195452.296875, + "learning_rate": 9.999176337111908e-06, + "loss": 195147.0625, + "step": 52600 + }, + { + "epoch": 0.10627552854955417, + "grad_norm": 236321.171875, + "learning_rate": 9.999169989234815e-06, + "loss": 112383.3, + "step": 52610 + }, + { + "epoch": 0.106295729182238, + "grad_norm": 350372.75, + "learning_rate": 9.999163616992371e-06, + "loss": 257017.4, + "step": 52620 + }, + { + "epoch": 0.1063159298149218, + "grad_norm": 395.0524597167969, + "learning_rate": 9.999157220384612e-06, + "loss": 29427.6437, + "step": 52630 + }, + { + "epoch": 0.10633613044760562, + "grad_norm": 1081396.125, + "learning_rate": 9.999150799411565e-06, + "loss": 154915.025, + "step": 52640 + }, + { + "epoch": 0.10635633108028944, + "grad_norm": 821540.0, + "learning_rate": 9.999144354073264e-06, + "loss": 335972.8, + "step": 52650 + }, + { + "epoch": 0.10637653171297325, + "grad_norm": 304731.65625, + "learning_rate": 9.999137884369741e-06, + "loss": 251805.625, + "step": 52660 + }, + { + "epoch": 0.10639673234565707, + "grad_norm": 275943.625, + "learning_rate": 9.999131390301027e-06, + "loss": 141648.6, + "step": 52670 + }, + { + "epoch": 0.10641693297834089, + "grad_norm": 265947.1875, + "learning_rate": 9.99912487186715e-06, + "loss": 153873.2625, + "step": 52680 + }, + { + "epoch": 0.10643713361102469, + "grad_norm": 1266117.875, + "learning_rate": 9.999118329068148e-06, + "loss": 236478.6, + "step": 52690 + }, + { + "epoch": 0.10645733424370851, + "grad_norm": 139782.953125, + "learning_rate": 9.999111761904046e-06, + "loss": 363131.425, + "step": 52700 + }, + { + "epoch": 0.10647753487639233, + "grad_norm": 9311.603515625, + "learning_rate": 9.999105170374881e-06, + "loss": 183642.525, + "step": 52710 + }, + { + "epoch": 0.10649773550907614, + "grad_norm": 128085.578125, + "learning_rate": 9.999098554480685e-06, + "loss": 78856.05, + "step": 52720 + }, + { + "epoch": 0.10651793614175996, + "grad_norm": 0.0, + "learning_rate": 9.999091914221487e-06, + "loss": 140971.05, + "step": 52730 + }, + { + "epoch": 0.10653813677444378, + "grad_norm": 796525.375, + "learning_rate": 9.999085249597322e-06, + "loss": 163037.5375, + "step": 52740 + }, + { + "epoch": 0.10655833740712758, + "grad_norm": 11619.6162109375, + "learning_rate": 9.999078560608221e-06, + "loss": 191288.1, + "step": 52750 + }, + { + "epoch": 0.1065785380398114, + "grad_norm": 22983.390625, + "learning_rate": 9.999071847254219e-06, + "loss": 112997.325, + "step": 52760 + }, + { + "epoch": 0.10659873867249522, + "grad_norm": 1373097.375, + "learning_rate": 9.999065109535346e-06, + "loss": 246926.1, + "step": 52770 + }, + { + "epoch": 0.10661893930517904, + "grad_norm": 30857.103515625, + "learning_rate": 9.999058347451638e-06, + "loss": 142449.4, + "step": 52780 + }, + { + "epoch": 0.10663913993786285, + "grad_norm": 9798.9462890625, + "learning_rate": 9.999051561003124e-06, + "loss": 96162.3375, + "step": 52790 + }, + { + "epoch": 0.10665934057054667, + "grad_norm": 177260.5625, + "learning_rate": 9.99904475018984e-06, + "loss": 85070.4625, + "step": 52800 + }, + { + "epoch": 0.10667954120323049, + "grad_norm": 618305.875, + "learning_rate": 9.999037915011819e-06, + "loss": 239062.1, + "step": 52810 + }, + { + "epoch": 0.1066997418359143, + "grad_norm": 279118.5, + "learning_rate": 9.999031055469091e-06, + "loss": 359429.45, + "step": 52820 + }, + { + "epoch": 0.10671994246859812, + "grad_norm": 5358.40576171875, + "learning_rate": 9.999024171561693e-06, + "loss": 96862.25, + "step": 52830 + }, + { + "epoch": 0.10674014310128194, + "grad_norm": 25169.078125, + "learning_rate": 9.999017263289656e-06, + "loss": 153548.5125, + "step": 52840 + }, + { + "epoch": 0.10676034373396574, + "grad_norm": 1587777.625, + "learning_rate": 9.999010330653019e-06, + "loss": 224311.825, + "step": 52850 + }, + { + "epoch": 0.10678054436664956, + "grad_norm": 21391.111328125, + "learning_rate": 9.999003373651809e-06, + "loss": 159376.6, + "step": 52860 + }, + { + "epoch": 0.10680074499933338, + "grad_norm": 4461.337890625, + "learning_rate": 9.998996392286062e-06, + "loss": 129582.125, + "step": 52870 + }, + { + "epoch": 0.10682094563201719, + "grad_norm": 184933.8125, + "learning_rate": 9.998989386555815e-06, + "loss": 266382.45, + "step": 52880 + }, + { + "epoch": 0.10684114626470101, + "grad_norm": 301743.59375, + "learning_rate": 9.9989823564611e-06, + "loss": 69804.3875, + "step": 52890 + }, + { + "epoch": 0.10686134689738483, + "grad_norm": 0.0, + "learning_rate": 9.99897530200195e-06, + "loss": 67945.9937, + "step": 52900 + }, + { + "epoch": 0.10688154753006864, + "grad_norm": 486678.25, + "learning_rate": 9.998968223178402e-06, + "loss": 170716.7, + "step": 52910 + }, + { + "epoch": 0.10690174816275246, + "grad_norm": 153889.390625, + "learning_rate": 9.99896111999049e-06, + "loss": 68534.1687, + "step": 52920 + }, + { + "epoch": 0.10692194879543628, + "grad_norm": 79028.7890625, + "learning_rate": 9.998953992438245e-06, + "loss": 159680.9875, + "step": 52930 + }, + { + "epoch": 0.1069421494281201, + "grad_norm": 276276.4375, + "learning_rate": 9.998946840521706e-06, + "loss": 102362.0312, + "step": 52940 + }, + { + "epoch": 0.1069623500608039, + "grad_norm": 75528.9765625, + "learning_rate": 9.998939664240908e-06, + "loss": 146051.85, + "step": 52950 + }, + { + "epoch": 0.10698255069348772, + "grad_norm": 426842.46875, + "learning_rate": 9.998932463595882e-06, + "loss": 140802.6625, + "step": 52960 + }, + { + "epoch": 0.10700275132617154, + "grad_norm": 16852.775390625, + "learning_rate": 9.998925238586666e-06, + "loss": 141547.75, + "step": 52970 + }, + { + "epoch": 0.10702295195885535, + "grad_norm": 30475.76953125, + "learning_rate": 9.998917989213296e-06, + "loss": 172244.5375, + "step": 52980 + }, + { + "epoch": 0.10704315259153917, + "grad_norm": 11793.36328125, + "learning_rate": 9.998910715475804e-06, + "loss": 126609.3375, + "step": 52990 + }, + { + "epoch": 0.10706335322422299, + "grad_norm": 38380.92578125, + "learning_rate": 9.998903417374228e-06, + "loss": 87454.8125, + "step": 53000 + }, + { + "epoch": 0.1070835538569068, + "grad_norm": 267525.71875, + "learning_rate": 9.998896094908603e-06, + "loss": 127360.6375, + "step": 53010 + }, + { + "epoch": 0.10710375448959061, + "grad_norm": 124677.2109375, + "learning_rate": 9.998888748078966e-06, + "loss": 112755.9125, + "step": 53020 + }, + { + "epoch": 0.10712395512227443, + "grad_norm": 2795.489501953125, + "learning_rate": 9.99888137688535e-06, + "loss": 161230.175, + "step": 53030 + }, + { + "epoch": 0.10714415575495824, + "grad_norm": 1008.2382202148438, + "learning_rate": 9.998873981327795e-06, + "loss": 105937.05, + "step": 53040 + }, + { + "epoch": 0.10716435638764206, + "grad_norm": 15049.2275390625, + "learning_rate": 9.998866561406331e-06, + "loss": 254459.675, + "step": 53050 + }, + { + "epoch": 0.10718455702032588, + "grad_norm": 1761160.75, + "learning_rate": 9.998859117121e-06, + "loss": 212472.65, + "step": 53060 + }, + { + "epoch": 0.10720475765300969, + "grad_norm": 184354.34375, + "learning_rate": 9.998851648471834e-06, + "loss": 92593.8438, + "step": 53070 + }, + { + "epoch": 0.1072249582856935, + "grad_norm": 89388.4296875, + "learning_rate": 9.998844155458873e-06, + "loss": 101656.8, + "step": 53080 + }, + { + "epoch": 0.10724515891837733, + "grad_norm": 205393.71875, + "learning_rate": 9.998836638082152e-06, + "loss": 101028.6375, + "step": 53090 + }, + { + "epoch": 0.10726535955106115, + "grad_norm": 115739.4140625, + "learning_rate": 9.998829096341706e-06, + "loss": 203584.5125, + "step": 53100 + }, + { + "epoch": 0.10728556018374495, + "grad_norm": 96117.578125, + "learning_rate": 9.998821530237576e-06, + "loss": 106899.65, + "step": 53110 + }, + { + "epoch": 0.10730576081642877, + "grad_norm": 495410.4375, + "learning_rate": 9.998813939769794e-06, + "loss": 177496.225, + "step": 53120 + }, + { + "epoch": 0.10732596144911259, + "grad_norm": 157641.390625, + "learning_rate": 9.9988063249384e-06, + "loss": 135123.65, + "step": 53130 + }, + { + "epoch": 0.1073461620817964, + "grad_norm": 707540.75, + "learning_rate": 9.99879868574343e-06, + "loss": 84795.625, + "step": 53140 + }, + { + "epoch": 0.10736636271448022, + "grad_norm": 431210.03125, + "learning_rate": 9.998791022184921e-06, + "loss": 315332.225, + "step": 53150 + }, + { + "epoch": 0.10738656334716404, + "grad_norm": 33316.74609375, + "learning_rate": 9.998783334262911e-06, + "loss": 299452.975, + "step": 53160 + }, + { + "epoch": 0.10740676397984784, + "grad_norm": 16561.98828125, + "learning_rate": 9.998775621977438e-06, + "loss": 163493.3875, + "step": 53170 + }, + { + "epoch": 0.10742696461253166, + "grad_norm": 10063.8984375, + "learning_rate": 9.998767885328538e-06, + "loss": 261349.0, + "step": 53180 + }, + { + "epoch": 0.10744716524521548, + "grad_norm": 133738.359375, + "learning_rate": 9.99876012431625e-06, + "loss": 191236.5875, + "step": 53190 + }, + { + "epoch": 0.10746736587789929, + "grad_norm": 1138654.875, + "learning_rate": 9.998752338940612e-06, + "loss": 267914.2, + "step": 53200 + }, + { + "epoch": 0.10748756651058311, + "grad_norm": 12313.2783203125, + "learning_rate": 9.99874452920166e-06, + "loss": 152364.5625, + "step": 53210 + }, + { + "epoch": 0.10750776714326693, + "grad_norm": 387321.03125, + "learning_rate": 9.998736695099434e-06, + "loss": 128336.7, + "step": 53220 + }, + { + "epoch": 0.10752796777595074, + "grad_norm": 1238.62060546875, + "learning_rate": 9.998728836633972e-06, + "loss": 94634.5188, + "step": 53230 + }, + { + "epoch": 0.10754816840863456, + "grad_norm": 141398.140625, + "learning_rate": 9.998720953805312e-06, + "loss": 37978.2625, + "step": 53240 + }, + { + "epoch": 0.10756836904131838, + "grad_norm": 300564.59375, + "learning_rate": 9.998713046613492e-06, + "loss": 118606.25, + "step": 53250 + }, + { + "epoch": 0.1075885696740022, + "grad_norm": 36329.2265625, + "learning_rate": 9.998705115058552e-06, + "loss": 133401.05, + "step": 53260 + }, + { + "epoch": 0.107608770306686, + "grad_norm": 99374.3671875, + "learning_rate": 9.998697159140528e-06, + "loss": 111494.5625, + "step": 53270 + }, + { + "epoch": 0.10762897093936982, + "grad_norm": 1076611.375, + "learning_rate": 9.998689178859461e-06, + "loss": 272786.675, + "step": 53280 + }, + { + "epoch": 0.10764917157205364, + "grad_norm": 18808.69140625, + "learning_rate": 9.99868117421539e-06, + "loss": 67935.25, + "step": 53290 + }, + { + "epoch": 0.10766937220473745, + "grad_norm": 365134.34375, + "learning_rate": 9.998673145208351e-06, + "loss": 108579.4, + "step": 53300 + }, + { + "epoch": 0.10768957283742127, + "grad_norm": 459671.875, + "learning_rate": 9.998665091838386e-06, + "loss": 270125.7, + "step": 53310 + }, + { + "epoch": 0.10770977347010509, + "grad_norm": 1046133.0, + "learning_rate": 9.998657014105535e-06, + "loss": 283553.575, + "step": 53320 + }, + { + "epoch": 0.1077299741027889, + "grad_norm": 144089.078125, + "learning_rate": 9.998648912009835e-06, + "loss": 259137.35, + "step": 53330 + }, + { + "epoch": 0.10775017473547271, + "grad_norm": 1773158.875, + "learning_rate": 9.998640785551327e-06, + "loss": 182029.6, + "step": 53340 + }, + { + "epoch": 0.10777037536815653, + "grad_norm": 110243.8671875, + "learning_rate": 9.99863263473005e-06, + "loss": 148472.4125, + "step": 53350 + }, + { + "epoch": 0.10779057600084034, + "grad_norm": 199711.734375, + "learning_rate": 9.998624459546043e-06, + "loss": 89472.475, + "step": 53360 + }, + { + "epoch": 0.10781077663352416, + "grad_norm": 8412.16796875, + "learning_rate": 9.998616259999348e-06, + "loss": 135383.0125, + "step": 53370 + }, + { + "epoch": 0.10783097726620798, + "grad_norm": 1297811.125, + "learning_rate": 9.998608036090003e-06, + "loss": 294141.85, + "step": 53380 + }, + { + "epoch": 0.10785117789889179, + "grad_norm": 10453.599609375, + "learning_rate": 9.998599787818048e-06, + "loss": 63443.8063, + "step": 53390 + }, + { + "epoch": 0.1078713785315756, + "grad_norm": 45832.1875, + "learning_rate": 9.998591515183524e-06, + "loss": 60546.5188, + "step": 53400 + }, + { + "epoch": 0.10789157916425943, + "grad_norm": 19611.45703125, + "learning_rate": 9.998583218186471e-06, + "loss": 57682.0375, + "step": 53410 + }, + { + "epoch": 0.10791177979694325, + "grad_norm": 190969.484375, + "learning_rate": 9.998574896826931e-06, + "loss": 191710.375, + "step": 53420 + }, + { + "epoch": 0.10793198042962705, + "grad_norm": 107449.234375, + "learning_rate": 9.998566551104943e-06, + "loss": 102673.0813, + "step": 53430 + }, + { + "epoch": 0.10795218106231087, + "grad_norm": 18728.689453125, + "learning_rate": 9.998558181020547e-06, + "loss": 364890.325, + "step": 53440 + }, + { + "epoch": 0.10797238169499469, + "grad_norm": 2387.052490234375, + "learning_rate": 9.998549786573785e-06, + "loss": 195391.2375, + "step": 53450 + }, + { + "epoch": 0.1079925823276785, + "grad_norm": 182427.953125, + "learning_rate": 9.998541367764699e-06, + "loss": 170719.1875, + "step": 53460 + }, + { + "epoch": 0.10801278296036232, + "grad_norm": 253692.6875, + "learning_rate": 9.998532924593327e-06, + "loss": 133815.675, + "step": 53470 + }, + { + "epoch": 0.10803298359304614, + "grad_norm": 23747.248046875, + "learning_rate": 9.99852445705971e-06, + "loss": 98425.975, + "step": 53480 + }, + { + "epoch": 0.10805318422572995, + "grad_norm": 3771.00537109375, + "learning_rate": 9.998515965163894e-06, + "loss": 86236.7063, + "step": 53490 + }, + { + "epoch": 0.10807338485841377, + "grad_norm": 11392.9912109375, + "learning_rate": 9.998507448905917e-06, + "loss": 254565.425, + "step": 53500 + }, + { + "epoch": 0.10809358549109759, + "grad_norm": 150269.3125, + "learning_rate": 9.99849890828582e-06, + "loss": 77951.2812, + "step": 53510 + }, + { + "epoch": 0.10811378612378139, + "grad_norm": 161070.21875, + "learning_rate": 9.998490343303646e-06, + "loss": 68266.725, + "step": 53520 + }, + { + "epoch": 0.10813398675646521, + "grad_norm": 152508.375, + "learning_rate": 9.998481753959436e-06, + "loss": 225114.525, + "step": 53530 + }, + { + "epoch": 0.10815418738914903, + "grad_norm": 22766.830078125, + "learning_rate": 9.998473140253234e-06, + "loss": 168868.6375, + "step": 53540 + }, + { + "epoch": 0.10817438802183284, + "grad_norm": 27129.849609375, + "learning_rate": 9.998464502185076e-06, + "loss": 287594.875, + "step": 53550 + }, + { + "epoch": 0.10819458865451666, + "grad_norm": 190183.359375, + "learning_rate": 9.998455839755013e-06, + "loss": 106583.5875, + "step": 53560 + }, + { + "epoch": 0.10821478928720048, + "grad_norm": 32430.1015625, + "learning_rate": 9.99844715296308e-06, + "loss": 101725.8875, + "step": 53570 + }, + { + "epoch": 0.1082349899198843, + "grad_norm": 92865.8046875, + "learning_rate": 9.998438441809322e-06, + "loss": 197511.05, + "step": 53580 + }, + { + "epoch": 0.1082551905525681, + "grad_norm": 193676.25, + "learning_rate": 9.998429706293781e-06, + "loss": 96493.3375, + "step": 53590 + }, + { + "epoch": 0.10827539118525192, + "grad_norm": 145901.125, + "learning_rate": 9.9984209464165e-06, + "loss": 179724.9375, + "step": 53600 + }, + { + "epoch": 0.10829559181793574, + "grad_norm": 21652.4296875, + "learning_rate": 9.998412162177523e-06, + "loss": 208456.7625, + "step": 53610 + }, + { + "epoch": 0.10831579245061955, + "grad_norm": 69330.640625, + "learning_rate": 9.99840335357689e-06, + "loss": 178840.425, + "step": 53620 + }, + { + "epoch": 0.10833599308330337, + "grad_norm": 124632.4765625, + "learning_rate": 9.998394520614645e-06, + "loss": 147235.5, + "step": 53630 + }, + { + "epoch": 0.10835619371598719, + "grad_norm": 40723.3359375, + "learning_rate": 9.998385663290833e-06, + "loss": 174852.6125, + "step": 53640 + }, + { + "epoch": 0.108376394348671, + "grad_norm": 63557.79296875, + "learning_rate": 9.998376781605493e-06, + "loss": 132138.775, + "step": 53650 + }, + { + "epoch": 0.10839659498135482, + "grad_norm": 1426128.25, + "learning_rate": 9.998367875558673e-06, + "loss": 239319.6, + "step": 53660 + }, + { + "epoch": 0.10841679561403864, + "grad_norm": 106356.734375, + "learning_rate": 9.998358945150412e-06, + "loss": 106064.3, + "step": 53670 + }, + { + "epoch": 0.10843699624672244, + "grad_norm": 83737.2578125, + "learning_rate": 9.998349990380757e-06, + "loss": 114638.675, + "step": 53680 + }, + { + "epoch": 0.10845719687940626, + "grad_norm": 5548.248046875, + "learning_rate": 9.998341011249752e-06, + "loss": 128988.6625, + "step": 53690 + }, + { + "epoch": 0.10847739751209008, + "grad_norm": 5801.17919921875, + "learning_rate": 9.998332007757436e-06, + "loss": 84971.1062, + "step": 53700 + }, + { + "epoch": 0.10849759814477389, + "grad_norm": 229400.890625, + "learning_rate": 9.998322979903859e-06, + "loss": 54070.5312, + "step": 53710 + }, + { + "epoch": 0.10851779877745771, + "grad_norm": 1064.8841552734375, + "learning_rate": 9.99831392768906e-06, + "loss": 99242.1812, + "step": 53720 + }, + { + "epoch": 0.10853799941014153, + "grad_norm": 48927.19140625, + "learning_rate": 9.998304851113086e-06, + "loss": 67590.125, + "step": 53730 + }, + { + "epoch": 0.10855820004282535, + "grad_norm": 21707.2890625, + "learning_rate": 9.99829575017598e-06, + "loss": 141829.225, + "step": 53740 + }, + { + "epoch": 0.10857840067550915, + "grad_norm": 532659.5, + "learning_rate": 9.998286624877786e-06, + "loss": 146804.675, + "step": 53750 + }, + { + "epoch": 0.10859860130819297, + "grad_norm": 82221.125, + "learning_rate": 9.998277475218552e-06, + "loss": 47881.3656, + "step": 53760 + }, + { + "epoch": 0.1086188019408768, + "grad_norm": 89105.0625, + "learning_rate": 9.998268301198317e-06, + "loss": 167126.75, + "step": 53770 + }, + { + "epoch": 0.1086390025735606, + "grad_norm": 236190.640625, + "learning_rate": 9.99825910281713e-06, + "loss": 77770.975, + "step": 53780 + }, + { + "epoch": 0.10865920320624442, + "grad_norm": 186756.921875, + "learning_rate": 9.998249880075033e-06, + "loss": 197435.3875, + "step": 53790 + }, + { + "epoch": 0.10867940383892824, + "grad_norm": 247157.09375, + "learning_rate": 9.998240632972073e-06, + "loss": 270440.1, + "step": 53800 + }, + { + "epoch": 0.10869960447161205, + "grad_norm": 29608.994140625, + "learning_rate": 9.998231361508295e-06, + "loss": 353919.3, + "step": 53810 + }, + { + "epoch": 0.10871980510429587, + "grad_norm": 11779.5615234375, + "learning_rate": 9.998222065683743e-06, + "loss": 234317.95, + "step": 53820 + }, + { + "epoch": 0.10874000573697969, + "grad_norm": 80046.421875, + "learning_rate": 9.998212745498464e-06, + "loss": 160342.9375, + "step": 53830 + }, + { + "epoch": 0.10876020636966349, + "grad_norm": 7954.47021484375, + "learning_rate": 9.9982034009525e-06, + "loss": 139141.8875, + "step": 53840 + }, + { + "epoch": 0.10878040700234731, + "grad_norm": 246543.484375, + "learning_rate": 9.9981940320459e-06, + "loss": 206848.375, + "step": 53850 + }, + { + "epoch": 0.10880060763503113, + "grad_norm": 14716.19140625, + "learning_rate": 9.998184638778708e-06, + "loss": 81737.4187, + "step": 53860 + }, + { + "epoch": 0.10882080826771494, + "grad_norm": 289646.53125, + "learning_rate": 9.99817522115097e-06, + "loss": 90219.775, + "step": 53870 + }, + { + "epoch": 0.10884100890039876, + "grad_norm": 119510.8125, + "learning_rate": 9.998165779162734e-06, + "loss": 70741.55, + "step": 53880 + }, + { + "epoch": 0.10886120953308258, + "grad_norm": 891576.1875, + "learning_rate": 9.998156312814043e-06, + "loss": 318909.7, + "step": 53890 + }, + { + "epoch": 0.1088814101657664, + "grad_norm": 6990.75146484375, + "learning_rate": 9.998146822104943e-06, + "loss": 138117.225, + "step": 53900 + }, + { + "epoch": 0.1089016107984502, + "grad_norm": 173906.875, + "learning_rate": 9.998137307035486e-06, + "loss": 68808.95, + "step": 53910 + }, + { + "epoch": 0.10892181143113402, + "grad_norm": 35413.78515625, + "learning_rate": 9.99812776760571e-06, + "loss": 101507.0188, + "step": 53920 + }, + { + "epoch": 0.10894201206381784, + "grad_norm": 228647.546875, + "learning_rate": 9.998118203815666e-06, + "loss": 149977.2375, + "step": 53930 + }, + { + "epoch": 0.10896221269650165, + "grad_norm": 4377.0966796875, + "learning_rate": 9.9981086156654e-06, + "loss": 87109.8625, + "step": 53940 + }, + { + "epoch": 0.10898241332918547, + "grad_norm": 78211.2421875, + "learning_rate": 9.99809900315496e-06, + "loss": 97566.125, + "step": 53950 + }, + { + "epoch": 0.10900261396186929, + "grad_norm": 6458.1640625, + "learning_rate": 9.998089366284392e-06, + "loss": 78169.9, + "step": 53960 + }, + { + "epoch": 0.1090228145945531, + "grad_norm": 48163.046875, + "learning_rate": 9.99807970505374e-06, + "loss": 222314.05, + "step": 53970 + }, + { + "epoch": 0.10904301522723692, + "grad_norm": 33492.4140625, + "learning_rate": 9.998070019463055e-06, + "loss": 137014.7, + "step": 53980 + }, + { + "epoch": 0.10906321585992074, + "grad_norm": 72100.359375, + "learning_rate": 9.998060309512384e-06, + "loss": 74284.7188, + "step": 53990 + }, + { + "epoch": 0.10908341649260454, + "grad_norm": 7214.423828125, + "learning_rate": 9.998050575201772e-06, + "loss": 119205.35, + "step": 54000 + }, + { + "epoch": 0.10910361712528836, + "grad_norm": 180378.5625, + "learning_rate": 9.99804081653127e-06, + "loss": 158864.5375, + "step": 54010 + }, + { + "epoch": 0.10912381775797218, + "grad_norm": 4383.4345703125, + "learning_rate": 9.99803103350092e-06, + "loss": 170378.4125, + "step": 54020 + }, + { + "epoch": 0.10914401839065599, + "grad_norm": 48079.01953125, + "learning_rate": 9.998021226110775e-06, + "loss": 80811.35, + "step": 54030 + }, + { + "epoch": 0.10916421902333981, + "grad_norm": 484561.125, + "learning_rate": 9.99801139436088e-06, + "loss": 140982.2125, + "step": 54040 + }, + { + "epoch": 0.10918441965602363, + "grad_norm": 11601.25390625, + "learning_rate": 9.998001538251283e-06, + "loss": 248204.75, + "step": 54050 + }, + { + "epoch": 0.10920462028870745, + "grad_norm": 4204.86865234375, + "learning_rate": 9.997991657782033e-06, + "loss": 182327.1125, + "step": 54060 + }, + { + "epoch": 0.10922482092139126, + "grad_norm": 4833.8076171875, + "learning_rate": 9.997981752953179e-06, + "loss": 159321.2875, + "step": 54070 + }, + { + "epoch": 0.10924502155407508, + "grad_norm": 113164.3828125, + "learning_rate": 9.997971823764766e-06, + "loss": 124710.2375, + "step": 54080 + }, + { + "epoch": 0.1092652221867589, + "grad_norm": 1170559.5, + "learning_rate": 9.997961870216849e-06, + "loss": 314383.6, + "step": 54090 + }, + { + "epoch": 0.1092854228194427, + "grad_norm": 28173.814453125, + "learning_rate": 9.997951892309468e-06, + "loss": 80908.0125, + "step": 54100 + }, + { + "epoch": 0.10930562345212652, + "grad_norm": 142975.765625, + "learning_rate": 9.997941890042677e-06, + "loss": 196738.5125, + "step": 54110 + }, + { + "epoch": 0.10932582408481034, + "grad_norm": 4913.74853515625, + "learning_rate": 9.997931863416522e-06, + "loss": 248160.975, + "step": 54120 + }, + { + "epoch": 0.10934602471749415, + "grad_norm": 418052.0625, + "learning_rate": 9.997921812431055e-06, + "loss": 270569.55, + "step": 54130 + }, + { + "epoch": 0.10936622535017797, + "grad_norm": 12058.0400390625, + "learning_rate": 9.997911737086322e-06, + "loss": 194599.0375, + "step": 54140 + }, + { + "epoch": 0.10938642598286179, + "grad_norm": 219831.5, + "learning_rate": 9.997901637382375e-06, + "loss": 170995.0625, + "step": 54150 + }, + { + "epoch": 0.1094066266155456, + "grad_norm": 207216.484375, + "learning_rate": 9.99789151331926e-06, + "loss": 88369.975, + "step": 54160 + }, + { + "epoch": 0.10942682724822941, + "grad_norm": 809795.0, + "learning_rate": 9.997881364897028e-06, + "loss": 146708.9625, + "step": 54170 + }, + { + "epoch": 0.10944702788091323, + "grad_norm": 276716.9375, + "learning_rate": 9.99787119211573e-06, + "loss": 70820.2563, + "step": 54180 + }, + { + "epoch": 0.10946722851359704, + "grad_norm": 117401.0703125, + "learning_rate": 9.997860994975412e-06, + "loss": 267313.075, + "step": 54190 + }, + { + "epoch": 0.10948742914628086, + "grad_norm": 7403.0390625, + "learning_rate": 9.997850773476126e-06, + "loss": 140054.925, + "step": 54200 + }, + { + "epoch": 0.10950762977896468, + "grad_norm": 9005.5068359375, + "learning_rate": 9.997840527617921e-06, + "loss": 84738.8188, + "step": 54210 + }, + { + "epoch": 0.1095278304116485, + "grad_norm": 294045.40625, + "learning_rate": 9.99783025740085e-06, + "loss": 103823.925, + "step": 54220 + }, + { + "epoch": 0.1095480310443323, + "grad_norm": 5449.14111328125, + "learning_rate": 9.997819962824958e-06, + "loss": 75576.1938, + "step": 54230 + }, + { + "epoch": 0.10956823167701613, + "grad_norm": 524769.3125, + "learning_rate": 9.9978096438903e-06, + "loss": 96312.2437, + "step": 54240 + }, + { + "epoch": 0.10958843230969995, + "grad_norm": 7925.8544921875, + "learning_rate": 9.997799300596921e-06, + "loss": 35541.1219, + "step": 54250 + }, + { + "epoch": 0.10960863294238375, + "grad_norm": 55049.41796875, + "learning_rate": 9.997788932944877e-06, + "loss": 93135.0375, + "step": 54260 + }, + { + "epoch": 0.10962883357506757, + "grad_norm": 63698.66796875, + "learning_rate": 9.997778540934213e-06, + "loss": 58447.525, + "step": 54270 + }, + { + "epoch": 0.10964903420775139, + "grad_norm": 361426.34375, + "learning_rate": 9.997768124564986e-06, + "loss": 149475.5, + "step": 54280 + }, + { + "epoch": 0.1096692348404352, + "grad_norm": 4606.4267578125, + "learning_rate": 9.997757683837242e-06, + "loss": 133959.3, + "step": 54290 + }, + { + "epoch": 0.10968943547311902, + "grad_norm": 20273.427734375, + "learning_rate": 9.997747218751032e-06, + "loss": 97326.1187, + "step": 54300 + }, + { + "epoch": 0.10970963610580284, + "grad_norm": 110398.375, + "learning_rate": 9.997736729306409e-06, + "loss": 165106.45, + "step": 54310 + }, + { + "epoch": 0.10972983673848664, + "grad_norm": 6450.46240234375, + "learning_rate": 9.997726215503422e-06, + "loss": 150716.2875, + "step": 54320 + }, + { + "epoch": 0.10975003737117046, + "grad_norm": 35620.8046875, + "learning_rate": 9.997715677342126e-06, + "loss": 140511.1875, + "step": 54330 + }, + { + "epoch": 0.10977023800385428, + "grad_norm": 59831.734375, + "learning_rate": 9.99770511482257e-06, + "loss": 129589.6375, + "step": 54340 + }, + { + "epoch": 0.10979043863653809, + "grad_norm": 136546.078125, + "learning_rate": 9.997694527944804e-06, + "loss": 102178.45, + "step": 54350 + }, + { + "epoch": 0.10981063926922191, + "grad_norm": 68749.3671875, + "learning_rate": 9.99768391670888e-06, + "loss": 47469.5031, + "step": 54360 + }, + { + "epoch": 0.10983083990190573, + "grad_norm": 17263.677734375, + "learning_rate": 9.997673281114852e-06, + "loss": 161972.6125, + "step": 54370 + }, + { + "epoch": 0.10985104053458955, + "grad_norm": 309007.71875, + "learning_rate": 9.99766262116277e-06, + "loss": 215381.65, + "step": 54380 + }, + { + "epoch": 0.10987124116727336, + "grad_norm": 35404.53515625, + "learning_rate": 9.997651936852689e-06, + "loss": 135121.1, + "step": 54390 + }, + { + "epoch": 0.10989144179995718, + "grad_norm": 136081.453125, + "learning_rate": 9.997641228184656e-06, + "loss": 79566.525, + "step": 54400 + }, + { + "epoch": 0.109911642432641, + "grad_norm": 307945.25, + "learning_rate": 9.997630495158728e-06, + "loss": 123764.2125, + "step": 54410 + }, + { + "epoch": 0.1099318430653248, + "grad_norm": 223697.03125, + "learning_rate": 9.997619737774953e-06, + "loss": 280909.7, + "step": 54420 + }, + { + "epoch": 0.10995204369800862, + "grad_norm": 3926.441650390625, + "learning_rate": 9.997608956033386e-06, + "loss": 92606.5688, + "step": 54430 + }, + { + "epoch": 0.10997224433069244, + "grad_norm": 925054.0, + "learning_rate": 9.99759814993408e-06, + "loss": 226707.2, + "step": 54440 + }, + { + "epoch": 0.10999244496337625, + "grad_norm": 493139.125, + "learning_rate": 9.997587319477084e-06, + "loss": 134807.675, + "step": 54450 + }, + { + "epoch": 0.11001264559606007, + "grad_norm": 10854.6220703125, + "learning_rate": 9.997576464662458e-06, + "loss": 100177.5688, + "step": 54460 + }, + { + "epoch": 0.11003284622874389, + "grad_norm": 8936.880859375, + "learning_rate": 9.997565585490247e-06, + "loss": 106858.8, + "step": 54470 + }, + { + "epoch": 0.1100530468614277, + "grad_norm": 150243.84375, + "learning_rate": 9.997554681960508e-06, + "loss": 173233.425, + "step": 54480 + }, + { + "epoch": 0.11007324749411151, + "grad_norm": 5822.34375, + "learning_rate": 9.997543754073295e-06, + "loss": 72998.2188, + "step": 54490 + }, + { + "epoch": 0.11009344812679533, + "grad_norm": 261178.390625, + "learning_rate": 9.997532801828659e-06, + "loss": 106703.725, + "step": 54500 + }, + { + "epoch": 0.11011364875947914, + "grad_norm": 50657.859375, + "learning_rate": 9.997521825226654e-06, + "loss": 100061.2688, + "step": 54510 + }, + { + "epoch": 0.11013384939216296, + "grad_norm": 325238.96875, + "learning_rate": 9.997510824267334e-06, + "loss": 84031.2625, + "step": 54520 + }, + { + "epoch": 0.11015405002484678, + "grad_norm": 36607.54296875, + "learning_rate": 9.997499798950752e-06, + "loss": 171881.9125, + "step": 54530 + }, + { + "epoch": 0.1101742506575306, + "grad_norm": 297821.375, + "learning_rate": 9.997488749276962e-06, + "loss": 151805.1625, + "step": 54540 + }, + { + "epoch": 0.1101944512902144, + "grad_norm": 17115.248046875, + "learning_rate": 9.997477675246018e-06, + "loss": 82201.7437, + "step": 54550 + }, + { + "epoch": 0.11021465192289823, + "grad_norm": 763505.9375, + "learning_rate": 9.997466576857974e-06, + "loss": 326547.275, + "step": 54560 + }, + { + "epoch": 0.11023485255558205, + "grad_norm": 15148.09765625, + "learning_rate": 9.997455454112885e-06, + "loss": 72028.9125, + "step": 54570 + }, + { + "epoch": 0.11025505318826585, + "grad_norm": 1629174.625, + "learning_rate": 9.997444307010804e-06, + "loss": 283592.85, + "step": 54580 + }, + { + "epoch": 0.11027525382094967, + "grad_norm": 117733.1484375, + "learning_rate": 9.997433135551786e-06, + "loss": 234144.85, + "step": 54590 + }, + { + "epoch": 0.11029545445363349, + "grad_norm": 273417.21875, + "learning_rate": 9.997421939735885e-06, + "loss": 143854.225, + "step": 54600 + }, + { + "epoch": 0.1103156550863173, + "grad_norm": 10317.4794921875, + "learning_rate": 9.997410719563155e-06, + "loss": 113458.375, + "step": 54610 + }, + { + "epoch": 0.11033585571900112, + "grad_norm": 4995.8369140625, + "learning_rate": 9.997399475033648e-06, + "loss": 86339.475, + "step": 54620 + }, + { + "epoch": 0.11035605635168494, + "grad_norm": 56175.59375, + "learning_rate": 9.997388206147427e-06, + "loss": 260059.95, + "step": 54630 + }, + { + "epoch": 0.11037625698436875, + "grad_norm": 31192.501953125, + "learning_rate": 9.99737691290454e-06, + "loss": 136651.5, + "step": 54640 + }, + { + "epoch": 0.11039645761705257, + "grad_norm": 0.0, + "learning_rate": 9.997365595305045e-06, + "loss": 153163.9, + "step": 54650 + }, + { + "epoch": 0.11041665824973639, + "grad_norm": 127264.1171875, + "learning_rate": 9.997354253348994e-06, + "loss": 66703.3313, + "step": 54660 + }, + { + "epoch": 0.11043685888242019, + "grad_norm": 1788597.625, + "learning_rate": 9.997342887036446e-06, + "loss": 218946.775, + "step": 54670 + }, + { + "epoch": 0.11045705951510401, + "grad_norm": 1502453.5, + "learning_rate": 9.997331496367455e-06, + "loss": 259467.3, + "step": 54680 + }, + { + "epoch": 0.11047726014778783, + "grad_norm": 137473.421875, + "learning_rate": 9.997320081342076e-06, + "loss": 124044.8875, + "step": 54690 + }, + { + "epoch": 0.11049746078047164, + "grad_norm": 46128.2109375, + "learning_rate": 9.997308641960365e-06, + "loss": 258869.825, + "step": 54700 + }, + { + "epoch": 0.11051766141315546, + "grad_norm": 69046.1640625, + "learning_rate": 9.997297178222378e-06, + "loss": 71374.8062, + "step": 54710 + }, + { + "epoch": 0.11053786204583928, + "grad_norm": 10608.3466796875, + "learning_rate": 9.997285690128172e-06, + "loss": 222306.525, + "step": 54720 + }, + { + "epoch": 0.1105580626785231, + "grad_norm": 62603.79296875, + "learning_rate": 9.997274177677799e-06, + "loss": 134721.6, + "step": 54730 + }, + { + "epoch": 0.1105782633112069, + "grad_norm": 183363.359375, + "learning_rate": 9.997262640871319e-06, + "loss": 111121.1, + "step": 54740 + }, + { + "epoch": 0.11059846394389072, + "grad_norm": 73948.1875, + "learning_rate": 9.997251079708788e-06, + "loss": 54986.0938, + "step": 54750 + }, + { + "epoch": 0.11061866457657454, + "grad_norm": 16485.21484375, + "learning_rate": 9.997239494190258e-06, + "loss": 172106.1875, + "step": 54760 + }, + { + "epoch": 0.11063886520925835, + "grad_norm": 63071.68359375, + "learning_rate": 9.997227884315792e-06, + "loss": 129623.7875, + "step": 54770 + }, + { + "epoch": 0.11065906584194217, + "grad_norm": 223275.9375, + "learning_rate": 9.997216250085441e-06, + "loss": 86857.6938, + "step": 54780 + }, + { + "epoch": 0.11067926647462599, + "grad_norm": 218959.78125, + "learning_rate": 9.997204591499266e-06, + "loss": 103059.4937, + "step": 54790 + }, + { + "epoch": 0.1106994671073098, + "grad_norm": 410126.59375, + "learning_rate": 9.997192908557322e-06, + "loss": 123120.5875, + "step": 54800 + }, + { + "epoch": 0.11071966773999362, + "grad_norm": 95412.578125, + "learning_rate": 9.997181201259664e-06, + "loss": 168019.6375, + "step": 54810 + }, + { + "epoch": 0.11073986837267744, + "grad_norm": 1687066.375, + "learning_rate": 9.997169469606353e-06, + "loss": 230161.15, + "step": 54820 + }, + { + "epoch": 0.11076006900536124, + "grad_norm": 27539.06640625, + "learning_rate": 9.997157713597444e-06, + "loss": 155345.825, + "step": 54830 + }, + { + "epoch": 0.11078026963804506, + "grad_norm": 189076.125, + "learning_rate": 9.997145933232994e-06, + "loss": 233999.975, + "step": 54840 + }, + { + "epoch": 0.11080047027072888, + "grad_norm": 92616.8359375, + "learning_rate": 9.99713412851306e-06, + "loss": 99761.4812, + "step": 54850 + }, + { + "epoch": 0.11082067090341269, + "grad_norm": 358516.9375, + "learning_rate": 9.9971222994377e-06, + "loss": 115106.7375, + "step": 54860 + }, + { + "epoch": 0.11084087153609651, + "grad_norm": 73098.90625, + "learning_rate": 9.997110446006974e-06, + "loss": 148938.2875, + "step": 54870 + }, + { + "epoch": 0.11086107216878033, + "grad_norm": 30625.416015625, + "learning_rate": 9.997098568220937e-06, + "loss": 104699.0625, + "step": 54880 + }, + { + "epoch": 0.11088127280146415, + "grad_norm": 223641.65625, + "learning_rate": 9.997086666079647e-06, + "loss": 280604.65, + "step": 54890 + }, + { + "epoch": 0.11090147343414795, + "grad_norm": 1564.117431640625, + "learning_rate": 9.997074739583162e-06, + "loss": 251736.25, + "step": 54900 + }, + { + "epoch": 0.11092167406683177, + "grad_norm": 1959.3726806640625, + "learning_rate": 9.997062788731541e-06, + "loss": 196024.325, + "step": 54910 + }, + { + "epoch": 0.1109418746995156, + "grad_norm": 120613.8828125, + "learning_rate": 9.997050813524843e-06, + "loss": 74847.6875, + "step": 54920 + }, + { + "epoch": 0.1109620753321994, + "grad_norm": 15254.4296875, + "learning_rate": 9.997038813963126e-06, + "loss": 160907.2125, + "step": 54930 + }, + { + "epoch": 0.11098227596488322, + "grad_norm": 43884.6796875, + "learning_rate": 9.997026790046446e-06, + "loss": 120149.9625, + "step": 54940 + }, + { + "epoch": 0.11100247659756704, + "grad_norm": 84584.109375, + "learning_rate": 9.997014741774866e-06, + "loss": 125535.6125, + "step": 54950 + }, + { + "epoch": 0.11102267723025085, + "grad_norm": 1852.337158203125, + "learning_rate": 9.99700266914844e-06, + "loss": 127055.0, + "step": 54960 + }, + { + "epoch": 0.11104287786293467, + "grad_norm": 38135.375, + "learning_rate": 9.996990572167229e-06, + "loss": 206292.7625, + "step": 54970 + }, + { + "epoch": 0.11106307849561849, + "grad_norm": 5476.8310546875, + "learning_rate": 9.996978450831293e-06, + "loss": 86366.925, + "step": 54980 + }, + { + "epoch": 0.11108327912830229, + "grad_norm": 983364.0625, + "learning_rate": 9.99696630514069e-06, + "loss": 302903.725, + "step": 54990 + }, + { + "epoch": 0.11110347976098611, + "grad_norm": 21091.2265625, + "learning_rate": 9.99695413509548e-06, + "loss": 75212.2937, + "step": 55000 + }, + { + "epoch": 0.11112368039366993, + "grad_norm": 64896.41015625, + "learning_rate": 9.99694194069572e-06, + "loss": 53378.7375, + "step": 55010 + }, + { + "epoch": 0.11114388102635374, + "grad_norm": 105320.5234375, + "learning_rate": 9.996929721941472e-06, + "loss": 101309.1687, + "step": 55020 + }, + { + "epoch": 0.11116408165903756, + "grad_norm": 218319.75, + "learning_rate": 9.996917478832794e-06, + "loss": 134228.7, + "step": 55030 + }, + { + "epoch": 0.11118428229172138, + "grad_norm": 6741.875, + "learning_rate": 9.996905211369748e-06, + "loss": 135877.2625, + "step": 55040 + }, + { + "epoch": 0.1112044829244052, + "grad_norm": 44068.8828125, + "learning_rate": 9.99689291955239e-06, + "loss": 84713.1375, + "step": 55050 + }, + { + "epoch": 0.111224683557089, + "grad_norm": 6851.66064453125, + "learning_rate": 9.996880603380784e-06, + "loss": 195282.575, + "step": 55060 + }, + { + "epoch": 0.11124488418977282, + "grad_norm": 31368.08984375, + "learning_rate": 9.996868262854986e-06, + "loss": 153261.1125, + "step": 55070 + }, + { + "epoch": 0.11126508482245664, + "grad_norm": 18269.875, + "learning_rate": 9.996855897975058e-06, + "loss": 219202.5, + "step": 55080 + }, + { + "epoch": 0.11128528545514045, + "grad_norm": 90221.375, + "learning_rate": 9.996843508741061e-06, + "loss": 59043.0125, + "step": 55090 + }, + { + "epoch": 0.11130548608782427, + "grad_norm": 48330.24609375, + "learning_rate": 9.996831095153054e-06, + "loss": 83044.4062, + "step": 55100 + }, + { + "epoch": 0.11132568672050809, + "grad_norm": 299518.6875, + "learning_rate": 9.9968186572111e-06, + "loss": 121720.825, + "step": 55110 + }, + { + "epoch": 0.1113458873531919, + "grad_norm": 67980.9140625, + "learning_rate": 9.996806194915258e-06, + "loss": 74556.2812, + "step": 55120 + }, + { + "epoch": 0.11136608798587572, + "grad_norm": 135483.203125, + "learning_rate": 9.996793708265586e-06, + "loss": 121355.775, + "step": 55130 + }, + { + "epoch": 0.11138628861855954, + "grad_norm": 320137.59375, + "learning_rate": 9.99678119726215e-06, + "loss": 179140.3, + "step": 55140 + }, + { + "epoch": 0.11140648925124334, + "grad_norm": 29643.6953125, + "learning_rate": 9.996768661905008e-06, + "loss": 88556.8813, + "step": 55150 + }, + { + "epoch": 0.11142668988392716, + "grad_norm": 7434.5146484375, + "learning_rate": 9.996756102194222e-06, + "loss": 139122.525, + "step": 55160 + }, + { + "epoch": 0.11144689051661098, + "grad_norm": 93646.734375, + "learning_rate": 9.996743518129852e-06, + "loss": 144504.6, + "step": 55170 + }, + { + "epoch": 0.11146709114929479, + "grad_norm": 278981.125, + "learning_rate": 9.99673090971196e-06, + "loss": 63078.1875, + "step": 55180 + }, + { + "epoch": 0.11148729178197861, + "grad_norm": 158605.0625, + "learning_rate": 9.996718276940608e-06, + "loss": 367977.725, + "step": 55190 + }, + { + "epoch": 0.11150749241466243, + "grad_norm": 9859.0712890625, + "learning_rate": 9.996705619815857e-06, + "loss": 194140.1375, + "step": 55200 + }, + { + "epoch": 0.11152769304734625, + "grad_norm": 2357.838134765625, + "learning_rate": 9.996692938337768e-06, + "loss": 141816.3, + "step": 55210 + }, + { + "epoch": 0.11154789368003006, + "grad_norm": 593574.625, + "learning_rate": 9.996680232506404e-06, + "loss": 152463.825, + "step": 55220 + }, + { + "epoch": 0.11156809431271388, + "grad_norm": 10450.8349609375, + "learning_rate": 9.996667502321829e-06, + "loss": 257121.95, + "step": 55230 + }, + { + "epoch": 0.1115882949453977, + "grad_norm": 26556.310546875, + "learning_rate": 9.9966547477841e-06, + "loss": 175207.4, + "step": 55240 + }, + { + "epoch": 0.1116084955780815, + "grad_norm": 54100.98828125, + "learning_rate": 9.996641968893281e-06, + "loss": 114257.3625, + "step": 55250 + }, + { + "epoch": 0.11162869621076532, + "grad_norm": 13938.4453125, + "learning_rate": 9.996629165649437e-06, + "loss": 34989.625, + "step": 55260 + }, + { + "epoch": 0.11164889684344914, + "grad_norm": 790101.8125, + "learning_rate": 9.996616338052629e-06, + "loss": 116377.45, + "step": 55270 + }, + { + "epoch": 0.11166909747613295, + "grad_norm": 36898.7734375, + "learning_rate": 9.996603486102918e-06, + "loss": 58981.3063, + "step": 55280 + }, + { + "epoch": 0.11168929810881677, + "grad_norm": 19855.51171875, + "learning_rate": 9.996590609800367e-06, + "loss": 244970.675, + "step": 55290 + }, + { + "epoch": 0.11170949874150059, + "grad_norm": 41810.43359375, + "learning_rate": 9.99657770914504e-06, + "loss": 133125.4875, + "step": 55300 + }, + { + "epoch": 0.1117296993741844, + "grad_norm": 10241.39453125, + "learning_rate": 9.996564784137e-06, + "loss": 87671.0063, + "step": 55310 + }, + { + "epoch": 0.11174990000686821, + "grad_norm": 99591.5078125, + "learning_rate": 9.996551834776309e-06, + "loss": 148457.3125, + "step": 55320 + }, + { + "epoch": 0.11177010063955203, + "grad_norm": 318152.90625, + "learning_rate": 9.996538861063029e-06, + "loss": 181260.45, + "step": 55330 + }, + { + "epoch": 0.11179030127223584, + "grad_norm": 15247.50390625, + "learning_rate": 9.996525862997225e-06, + "loss": 37840.3844, + "step": 55340 + }, + { + "epoch": 0.11181050190491966, + "grad_norm": 6618.49169921875, + "learning_rate": 9.99651284057896e-06, + "loss": 63529.85, + "step": 55350 + }, + { + "epoch": 0.11183070253760348, + "grad_norm": 92883.2421875, + "learning_rate": 9.9964997938083e-06, + "loss": 162345.375, + "step": 55360 + }, + { + "epoch": 0.1118509031702873, + "grad_norm": 35396.671875, + "learning_rate": 9.996486722685302e-06, + "loss": 183944.1875, + "step": 55370 + }, + { + "epoch": 0.1118711038029711, + "grad_norm": 57995.3203125, + "learning_rate": 9.996473627210035e-06, + "loss": 51840.7156, + "step": 55380 + }, + { + "epoch": 0.11189130443565493, + "grad_norm": 113471.5703125, + "learning_rate": 9.996460507382563e-06, + "loss": 119085.95, + "step": 55390 + }, + { + "epoch": 0.11191150506833875, + "grad_norm": 360358.78125, + "learning_rate": 9.996447363202947e-06, + "loss": 111493.1375, + "step": 55400 + }, + { + "epoch": 0.11193170570102255, + "grad_norm": 217659.5, + "learning_rate": 9.996434194671254e-06, + "loss": 226489.0, + "step": 55410 + }, + { + "epoch": 0.11195190633370637, + "grad_norm": 499047.0625, + "learning_rate": 9.996421001787545e-06, + "loss": 109743.925, + "step": 55420 + }, + { + "epoch": 0.11197210696639019, + "grad_norm": 12473.708984375, + "learning_rate": 9.996407784551888e-06, + "loss": 184662.575, + "step": 55430 + }, + { + "epoch": 0.111992307599074, + "grad_norm": 106953.96875, + "learning_rate": 9.996394542964343e-06, + "loss": 50041.15, + "step": 55440 + }, + { + "epoch": 0.11201250823175782, + "grad_norm": 79788.03125, + "learning_rate": 9.996381277024978e-06, + "loss": 195327.0, + "step": 55450 + }, + { + "epoch": 0.11203270886444164, + "grad_norm": 1026596.625, + "learning_rate": 9.996367986733857e-06, + "loss": 277666.05, + "step": 55460 + }, + { + "epoch": 0.11205290949712544, + "grad_norm": 20957.365234375, + "learning_rate": 9.996354672091044e-06, + "loss": 67508.7625, + "step": 55470 + }, + { + "epoch": 0.11207311012980926, + "grad_norm": 34511.109375, + "learning_rate": 9.996341333096606e-06, + "loss": 85056.2812, + "step": 55480 + }, + { + "epoch": 0.11209331076249308, + "grad_norm": 181015.9375, + "learning_rate": 9.996327969750605e-06, + "loss": 199523.9625, + "step": 55490 + }, + { + "epoch": 0.11211351139517689, + "grad_norm": 47974.53125, + "learning_rate": 9.996314582053106e-06, + "loss": 104416.2812, + "step": 55500 + }, + { + "epoch": 0.11213371202786071, + "grad_norm": 171289.953125, + "learning_rate": 9.996301170004179e-06, + "loss": 132848.2125, + "step": 55510 + }, + { + "epoch": 0.11215391266054453, + "grad_norm": 74219.9453125, + "learning_rate": 9.996287733603883e-06, + "loss": 74909.0562, + "step": 55520 + }, + { + "epoch": 0.11217411329322835, + "grad_norm": 15526.55078125, + "learning_rate": 9.996274272852289e-06, + "loss": 175728.85, + "step": 55530 + }, + { + "epoch": 0.11219431392591216, + "grad_norm": 62117.66796875, + "learning_rate": 9.996260787749457e-06, + "loss": 60316.75, + "step": 55540 + }, + { + "epoch": 0.11221451455859598, + "grad_norm": 782248.6875, + "learning_rate": 9.99624727829546e-06, + "loss": 179647.575, + "step": 55550 + }, + { + "epoch": 0.1122347151912798, + "grad_norm": 45095.48828125, + "learning_rate": 9.996233744490356e-06, + "loss": 227316.85, + "step": 55560 + }, + { + "epoch": 0.1122549158239636, + "grad_norm": 69359.359375, + "learning_rate": 9.996220186334217e-06, + "loss": 59095.75, + "step": 55570 + }, + { + "epoch": 0.11227511645664742, + "grad_norm": 90750.703125, + "learning_rate": 9.996206603827105e-06, + "loss": 128404.325, + "step": 55580 + }, + { + "epoch": 0.11229531708933124, + "grad_norm": 1060928.875, + "learning_rate": 9.99619299696909e-06, + "loss": 379020.05, + "step": 55590 + }, + { + "epoch": 0.11231551772201505, + "grad_norm": 109536.5390625, + "learning_rate": 9.996179365760235e-06, + "loss": 202028.725, + "step": 55600 + }, + { + "epoch": 0.11233571835469887, + "grad_norm": 80374.46875, + "learning_rate": 9.996165710200607e-06, + "loss": 184441.5625, + "step": 55610 + }, + { + "epoch": 0.11235591898738269, + "grad_norm": 270974.8125, + "learning_rate": 9.996152030290276e-06, + "loss": 113150.95, + "step": 55620 + }, + { + "epoch": 0.1123761196200665, + "grad_norm": 113436.2265625, + "learning_rate": 9.996138326029303e-06, + "loss": 182925.4375, + "step": 55630 + }, + { + "epoch": 0.11239632025275031, + "grad_norm": 276579.21875, + "learning_rate": 9.99612459741776e-06, + "loss": 63217.975, + "step": 55640 + }, + { + "epoch": 0.11241652088543413, + "grad_norm": 839887.6875, + "learning_rate": 9.99611084445571e-06, + "loss": 220391.425, + "step": 55650 + }, + { + "epoch": 0.11243672151811794, + "grad_norm": 21314.0390625, + "learning_rate": 9.996097067143223e-06, + "loss": 100580.4688, + "step": 55660 + }, + { + "epoch": 0.11245692215080176, + "grad_norm": 141891.046875, + "learning_rate": 9.996083265480366e-06, + "loss": 98361.2875, + "step": 55670 + }, + { + "epoch": 0.11247712278348558, + "grad_norm": 204361.34375, + "learning_rate": 9.996069439467203e-06, + "loss": 110203.075, + "step": 55680 + }, + { + "epoch": 0.1124973234161694, + "grad_norm": 289891.5, + "learning_rate": 9.996055589103804e-06, + "loss": 278815.7, + "step": 55690 + }, + { + "epoch": 0.1125175240488532, + "grad_norm": 107508.8984375, + "learning_rate": 9.996041714390235e-06, + "loss": 225333.525, + "step": 55700 + }, + { + "epoch": 0.11253772468153703, + "grad_norm": 28627.734375, + "learning_rate": 9.996027815326565e-06, + "loss": 178371.375, + "step": 55710 + }, + { + "epoch": 0.11255792531422085, + "grad_norm": 384022.375, + "learning_rate": 9.996013891912862e-06, + "loss": 159785.525, + "step": 55720 + }, + { + "epoch": 0.11257812594690465, + "grad_norm": 4941.1650390625, + "learning_rate": 9.995999944149192e-06, + "loss": 165765.95, + "step": 55730 + }, + { + "epoch": 0.11259832657958847, + "grad_norm": 48973.16015625, + "learning_rate": 9.995985972035626e-06, + "loss": 106546.35, + "step": 55740 + }, + { + "epoch": 0.11261852721227229, + "grad_norm": 143250.390625, + "learning_rate": 9.995971975572231e-06, + "loss": 89346.45, + "step": 55750 + }, + { + "epoch": 0.1126387278449561, + "grad_norm": 528081.0625, + "learning_rate": 9.995957954759073e-06, + "loss": 162654.325, + "step": 55760 + }, + { + "epoch": 0.11265892847763992, + "grad_norm": 74857.2578125, + "learning_rate": 9.995943909596222e-06, + "loss": 229488.95, + "step": 55770 + }, + { + "epoch": 0.11267912911032374, + "grad_norm": 373350.40625, + "learning_rate": 9.995929840083746e-06, + "loss": 138811.2625, + "step": 55780 + }, + { + "epoch": 0.11269932974300755, + "grad_norm": 553921.125, + "learning_rate": 9.995915746221715e-06, + "loss": 276429.8, + "step": 55790 + }, + { + "epoch": 0.11271953037569137, + "grad_norm": 78356.578125, + "learning_rate": 9.995901628010196e-06, + "loss": 97833.6125, + "step": 55800 + }, + { + "epoch": 0.11273973100837519, + "grad_norm": 180963.1875, + "learning_rate": 9.995887485449257e-06, + "loss": 96760.55, + "step": 55810 + }, + { + "epoch": 0.11275993164105899, + "grad_norm": 38712.171875, + "learning_rate": 9.99587331853897e-06, + "loss": 68611.2875, + "step": 55820 + }, + { + "epoch": 0.11278013227374281, + "grad_norm": 33298.13671875, + "learning_rate": 9.995859127279402e-06, + "loss": 219726.375, + "step": 55830 + }, + { + "epoch": 0.11280033290642663, + "grad_norm": 146468.953125, + "learning_rate": 9.995844911670623e-06, + "loss": 152629.225, + "step": 55840 + }, + { + "epoch": 0.11282053353911045, + "grad_norm": 75249.7890625, + "learning_rate": 9.995830671712701e-06, + "loss": 262414.0, + "step": 55850 + }, + { + "epoch": 0.11284073417179426, + "grad_norm": 10144.0556640625, + "learning_rate": 9.995816407405708e-06, + "loss": 89894.0188, + "step": 55860 + }, + { + "epoch": 0.11286093480447808, + "grad_norm": 219201.21875, + "learning_rate": 9.995802118749708e-06, + "loss": 93360.3875, + "step": 55870 + }, + { + "epoch": 0.1128811354371619, + "grad_norm": 837166.75, + "learning_rate": 9.995787805744778e-06, + "loss": 197375.45, + "step": 55880 + }, + { + "epoch": 0.1129013360698457, + "grad_norm": 58176.34375, + "learning_rate": 9.995773468390983e-06, + "loss": 183474.4375, + "step": 55890 + }, + { + "epoch": 0.11292153670252952, + "grad_norm": 169002.984375, + "learning_rate": 9.995759106688394e-06, + "loss": 138986.9125, + "step": 55900 + }, + { + "epoch": 0.11294173733521334, + "grad_norm": 43251.3515625, + "learning_rate": 9.99574472063708e-06, + "loss": 106550.9375, + "step": 55910 + }, + { + "epoch": 0.11296193796789715, + "grad_norm": 270873.96875, + "learning_rate": 9.995730310237113e-06, + "loss": 131535.5625, + "step": 55920 + }, + { + "epoch": 0.11298213860058097, + "grad_norm": 558529.4375, + "learning_rate": 9.995715875488563e-06, + "loss": 268359.25, + "step": 55930 + }, + { + "epoch": 0.11300233923326479, + "grad_norm": 233798.59375, + "learning_rate": 9.9957014163915e-06, + "loss": 48104.3, + "step": 55940 + }, + { + "epoch": 0.1130225398659486, + "grad_norm": 409287.0, + "learning_rate": 9.995686932945993e-06, + "loss": 101441.2, + "step": 55950 + }, + { + "epoch": 0.11304274049863242, + "grad_norm": 1293931.25, + "learning_rate": 9.995672425152115e-06, + "loss": 148812.6375, + "step": 55960 + }, + { + "epoch": 0.11306294113131624, + "grad_norm": 15140.103515625, + "learning_rate": 9.995657893009933e-06, + "loss": 107571.6625, + "step": 55970 + }, + { + "epoch": 0.11308314176400004, + "grad_norm": 166531.015625, + "learning_rate": 9.995643336519523e-06, + "loss": 290768.725, + "step": 55980 + }, + { + "epoch": 0.11310334239668386, + "grad_norm": 3378.697265625, + "learning_rate": 9.995628755680952e-06, + "loss": 99966.8313, + "step": 55990 + }, + { + "epoch": 0.11312354302936768, + "grad_norm": 1398321.875, + "learning_rate": 9.995614150494293e-06, + "loss": 267097.7, + "step": 56000 + }, + { + "epoch": 0.1131437436620515, + "grad_norm": 14912.912109375, + "learning_rate": 9.995599520959615e-06, + "loss": 163566.4875, + "step": 56010 + }, + { + "epoch": 0.11316394429473531, + "grad_norm": 4928.16552734375, + "learning_rate": 9.995584867076994e-06, + "loss": 152356.6, + "step": 56020 + }, + { + "epoch": 0.11318414492741913, + "grad_norm": 49441.99609375, + "learning_rate": 9.995570188846495e-06, + "loss": 78478.7625, + "step": 56030 + }, + { + "epoch": 0.11320434556010295, + "grad_norm": 143099.171875, + "learning_rate": 9.995555486268193e-06, + "loss": 104191.1, + "step": 56040 + }, + { + "epoch": 0.11322454619278675, + "grad_norm": 58110.20703125, + "learning_rate": 9.995540759342161e-06, + "loss": 108707.0, + "step": 56050 + }, + { + "epoch": 0.11324474682547057, + "grad_norm": 7975.6611328125, + "learning_rate": 9.995526008068469e-06, + "loss": 159993.375, + "step": 56060 + }, + { + "epoch": 0.1132649474581544, + "grad_norm": 415540.8125, + "learning_rate": 9.995511232447189e-06, + "loss": 182802.0125, + "step": 56070 + }, + { + "epoch": 0.1132851480908382, + "grad_norm": 149230.640625, + "learning_rate": 9.995496432478392e-06, + "loss": 47946.5, + "step": 56080 + }, + { + "epoch": 0.11330534872352202, + "grad_norm": 92272.1953125, + "learning_rate": 9.995481608162154e-06, + "loss": 181859.175, + "step": 56090 + }, + { + "epoch": 0.11332554935620584, + "grad_norm": 1013319.0625, + "learning_rate": 9.995466759498543e-06, + "loss": 165460.15, + "step": 56100 + }, + { + "epoch": 0.11334574998888965, + "grad_norm": 59542.5234375, + "learning_rate": 9.995451886487632e-06, + "loss": 70066.425, + "step": 56110 + }, + { + "epoch": 0.11336595062157347, + "grad_norm": 76427.3671875, + "learning_rate": 9.995436989129495e-06, + "loss": 58726.8375, + "step": 56120 + }, + { + "epoch": 0.11338615125425729, + "grad_norm": 37425.9453125, + "learning_rate": 9.995422067424203e-06, + "loss": 101141.375, + "step": 56130 + }, + { + "epoch": 0.11340635188694109, + "grad_norm": 517396.84375, + "learning_rate": 9.995407121371832e-06, + "loss": 387699.55, + "step": 56140 + }, + { + "epoch": 0.11342655251962491, + "grad_norm": 165970.3125, + "learning_rate": 9.995392150972451e-06, + "loss": 76901.6375, + "step": 56150 + }, + { + "epoch": 0.11344675315230873, + "grad_norm": 2005.3304443359375, + "learning_rate": 9.995377156226133e-06, + "loss": 64033.2562, + "step": 56160 + }, + { + "epoch": 0.11346695378499255, + "grad_norm": 43798.39453125, + "learning_rate": 9.995362137132956e-06, + "loss": 71718.5063, + "step": 56170 + }, + { + "epoch": 0.11348715441767636, + "grad_norm": 129457.375, + "learning_rate": 9.995347093692987e-06, + "loss": 250231.425, + "step": 56180 + }, + { + "epoch": 0.11350735505036018, + "grad_norm": 261203.25, + "learning_rate": 9.995332025906304e-06, + "loss": 138290.425, + "step": 56190 + }, + { + "epoch": 0.113527555683044, + "grad_norm": 549633.3125, + "learning_rate": 9.995316933772978e-06, + "loss": 174263.3, + "step": 56200 + }, + { + "epoch": 0.1135477563157278, + "grad_norm": 77587.4921875, + "learning_rate": 9.995301817293084e-06, + "loss": 195730.1875, + "step": 56210 + }, + { + "epoch": 0.11356795694841162, + "grad_norm": 128169.6484375, + "learning_rate": 9.995286676466694e-06, + "loss": 74759.1562, + "step": 56220 + }, + { + "epoch": 0.11358815758109544, + "grad_norm": 17116.4921875, + "learning_rate": 9.995271511293881e-06, + "loss": 233287.675, + "step": 56230 + }, + { + "epoch": 0.11360835821377925, + "grad_norm": 173100.578125, + "learning_rate": 9.995256321774722e-06, + "loss": 102389.925, + "step": 56240 + }, + { + "epoch": 0.11362855884646307, + "grad_norm": 269963.9375, + "learning_rate": 9.99524110790929e-06, + "loss": 151654.2875, + "step": 56250 + }, + { + "epoch": 0.11364875947914689, + "grad_norm": 14696.3212890625, + "learning_rate": 9.995225869697657e-06, + "loss": 166624.5125, + "step": 56260 + }, + { + "epoch": 0.1136689601118307, + "grad_norm": 192349.96875, + "learning_rate": 9.9952106071399e-06, + "loss": 229091.3, + "step": 56270 + }, + { + "epoch": 0.11368916074451452, + "grad_norm": 45356.10546875, + "learning_rate": 9.995195320236093e-06, + "loss": 89351.0063, + "step": 56280 + }, + { + "epoch": 0.11370936137719834, + "grad_norm": 43469.4453125, + "learning_rate": 9.995180008986309e-06, + "loss": 149434.35, + "step": 56290 + }, + { + "epoch": 0.11372956200988214, + "grad_norm": 3659.379150390625, + "learning_rate": 9.995164673390624e-06, + "loss": 120494.775, + "step": 56300 + }, + { + "epoch": 0.11374976264256596, + "grad_norm": 178626.15625, + "learning_rate": 9.995149313449114e-06, + "loss": 157350.15, + "step": 56310 + }, + { + "epoch": 0.11376996327524978, + "grad_norm": 350175.84375, + "learning_rate": 9.995133929161848e-06, + "loss": 181704.65, + "step": 56320 + }, + { + "epoch": 0.1137901639079336, + "grad_norm": 7289.486328125, + "learning_rate": 9.995118520528908e-06, + "loss": 55529.1, + "step": 56330 + }, + { + "epoch": 0.11381036454061741, + "grad_norm": 60174.2109375, + "learning_rate": 9.995103087550366e-06, + "loss": 194649.9375, + "step": 56340 + }, + { + "epoch": 0.11383056517330123, + "grad_norm": 220068.453125, + "learning_rate": 9.995087630226295e-06, + "loss": 102650.5375, + "step": 56350 + }, + { + "epoch": 0.11385076580598505, + "grad_norm": 644218.125, + "learning_rate": 9.995072148556776e-06, + "loss": 248099.075, + "step": 56360 + }, + { + "epoch": 0.11387096643866886, + "grad_norm": 29889.474609375, + "learning_rate": 9.995056642541879e-06, + "loss": 112759.875, + "step": 56370 + }, + { + "epoch": 0.11389116707135268, + "grad_norm": 372154.59375, + "learning_rate": 9.995041112181683e-06, + "loss": 104856.7375, + "step": 56380 + }, + { + "epoch": 0.1139113677040365, + "grad_norm": 59441.21484375, + "learning_rate": 9.99502555747626e-06, + "loss": 94422.7937, + "step": 56390 + }, + { + "epoch": 0.1139315683367203, + "grad_norm": 86228.859375, + "learning_rate": 9.995009978425692e-06, + "loss": 115896.7875, + "step": 56400 + }, + { + "epoch": 0.11395176896940412, + "grad_norm": 31190.7109375, + "learning_rate": 9.994994375030048e-06, + "loss": 74107.3562, + "step": 56410 + }, + { + "epoch": 0.11397196960208794, + "grad_norm": 9557.3525390625, + "learning_rate": 9.994978747289408e-06, + "loss": 158610.925, + "step": 56420 + }, + { + "epoch": 0.11399217023477175, + "grad_norm": 6476.23388671875, + "learning_rate": 9.994963095203849e-06, + "loss": 88936.925, + "step": 56430 + }, + { + "epoch": 0.11401237086745557, + "grad_norm": 177901.109375, + "learning_rate": 9.994947418773445e-06, + "loss": 101580.575, + "step": 56440 + }, + { + "epoch": 0.11403257150013939, + "grad_norm": 6804.99609375, + "learning_rate": 9.994931717998272e-06, + "loss": 152019.6375, + "step": 56450 + }, + { + "epoch": 0.1140527721328232, + "grad_norm": 144727.921875, + "learning_rate": 9.99491599287841e-06, + "loss": 82307.5437, + "step": 56460 + }, + { + "epoch": 0.11407297276550701, + "grad_norm": 376098.3125, + "learning_rate": 9.99490024341393e-06, + "loss": 154838.2375, + "step": 56470 + }, + { + "epoch": 0.11409317339819083, + "grad_norm": 537808.25, + "learning_rate": 9.994884469604913e-06, + "loss": 170419.475, + "step": 56480 + }, + { + "epoch": 0.11411337403087465, + "grad_norm": 96295.3515625, + "learning_rate": 9.994868671451436e-06, + "loss": 57129.1813, + "step": 56490 + }, + { + "epoch": 0.11413357466355846, + "grad_norm": 444160.5, + "learning_rate": 9.994852848953574e-06, + "loss": 248400.675, + "step": 56500 + }, + { + "epoch": 0.11415377529624228, + "grad_norm": 71958.734375, + "learning_rate": 9.994837002111407e-06, + "loss": 122669.3, + "step": 56510 + }, + { + "epoch": 0.1141739759289261, + "grad_norm": 245871.03125, + "learning_rate": 9.994821130925007e-06, + "loss": 55508.5375, + "step": 56520 + }, + { + "epoch": 0.1141941765616099, + "grad_norm": 132964.859375, + "learning_rate": 9.994805235394456e-06, + "loss": 166447.1, + "step": 56530 + }, + { + "epoch": 0.11421437719429373, + "grad_norm": 53972.03515625, + "learning_rate": 9.99478931551983e-06, + "loss": 262581.525, + "step": 56540 + }, + { + "epoch": 0.11423457782697755, + "grad_norm": 269099.78125, + "learning_rate": 9.994773371301208e-06, + "loss": 282983.7, + "step": 56550 + }, + { + "epoch": 0.11425477845966135, + "grad_norm": 220082.578125, + "learning_rate": 9.994757402738666e-06, + "loss": 301098.15, + "step": 56560 + }, + { + "epoch": 0.11427497909234517, + "grad_norm": 68975.515625, + "learning_rate": 9.99474140983228e-06, + "loss": 114217.75, + "step": 56570 + }, + { + "epoch": 0.11429517972502899, + "grad_norm": 152946.359375, + "learning_rate": 9.994725392582132e-06, + "loss": 84371.0562, + "step": 56580 + }, + { + "epoch": 0.1143153803577128, + "grad_norm": 192340.203125, + "learning_rate": 9.994709350988299e-06, + "loss": 65839.625, + "step": 56590 + }, + { + "epoch": 0.11433558099039662, + "grad_norm": 24976.171875, + "learning_rate": 9.994693285050858e-06, + "loss": 131325.55, + "step": 56600 + }, + { + "epoch": 0.11435578162308044, + "grad_norm": 174063.921875, + "learning_rate": 9.994677194769886e-06, + "loss": 106921.125, + "step": 56610 + }, + { + "epoch": 0.11437598225576424, + "grad_norm": 2977.579833984375, + "learning_rate": 9.994661080145464e-06, + "loss": 165853.2375, + "step": 56620 + }, + { + "epoch": 0.11439618288844806, + "grad_norm": 136180.953125, + "learning_rate": 9.99464494117767e-06, + "loss": 247190.075, + "step": 56630 + }, + { + "epoch": 0.11441638352113188, + "grad_norm": 48115.59765625, + "learning_rate": 9.994628777866582e-06, + "loss": 247481.125, + "step": 56640 + }, + { + "epoch": 0.1144365841538157, + "grad_norm": 8485.798828125, + "learning_rate": 9.99461259021228e-06, + "loss": 156531.175, + "step": 56650 + }, + { + "epoch": 0.11445678478649951, + "grad_norm": 25615.728515625, + "learning_rate": 9.99459637821484e-06, + "loss": 166179.225, + "step": 56660 + }, + { + "epoch": 0.11447698541918333, + "grad_norm": 43321.9453125, + "learning_rate": 9.994580141874345e-06, + "loss": 143836.1875, + "step": 56670 + }, + { + "epoch": 0.11449718605186715, + "grad_norm": 19236.5, + "learning_rate": 9.994563881190874e-06, + "loss": 201659.4625, + "step": 56680 + }, + { + "epoch": 0.11451738668455096, + "grad_norm": 43040.31640625, + "learning_rate": 9.9945475961645e-06, + "loss": 277288.9, + "step": 56690 + }, + { + "epoch": 0.11453758731723478, + "grad_norm": 519916.125, + "learning_rate": 9.994531286795309e-06, + "loss": 137427.775, + "step": 56700 + }, + { + "epoch": 0.1145577879499186, + "grad_norm": 50353.2421875, + "learning_rate": 9.994514953083379e-06, + "loss": 64469.2063, + "step": 56710 + }, + { + "epoch": 0.1145779885826024, + "grad_norm": 103722.359375, + "learning_rate": 9.994498595028787e-06, + "loss": 76419.175, + "step": 56720 + }, + { + "epoch": 0.11459818921528622, + "grad_norm": 55424.19921875, + "learning_rate": 9.994482212631616e-06, + "loss": 102406.3375, + "step": 56730 + }, + { + "epoch": 0.11461838984797004, + "grad_norm": 214102.0625, + "learning_rate": 9.994465805891944e-06, + "loss": 145687.1375, + "step": 56740 + }, + { + "epoch": 0.11463859048065385, + "grad_norm": 98818.6875, + "learning_rate": 9.994449374809851e-06, + "loss": 149912.8125, + "step": 56750 + }, + { + "epoch": 0.11465879111333767, + "grad_norm": 216674.265625, + "learning_rate": 9.994432919385417e-06, + "loss": 156188.525, + "step": 56760 + }, + { + "epoch": 0.11467899174602149, + "grad_norm": 140491.859375, + "learning_rate": 9.994416439618723e-06, + "loss": 145686.2, + "step": 56770 + }, + { + "epoch": 0.1146991923787053, + "grad_norm": 202360.765625, + "learning_rate": 9.994399935509851e-06, + "loss": 171674.4625, + "step": 56780 + }, + { + "epoch": 0.11471939301138911, + "grad_norm": 49874.99609375, + "learning_rate": 9.994383407058878e-06, + "loss": 188410.3, + "step": 56790 + }, + { + "epoch": 0.11473959364407293, + "grad_norm": 192798.375, + "learning_rate": 9.994366854265886e-06, + "loss": 141107.9, + "step": 56800 + }, + { + "epoch": 0.11475979427675675, + "grad_norm": 211547.34375, + "learning_rate": 9.994350277130956e-06, + "loss": 99625.8875, + "step": 56810 + }, + { + "epoch": 0.11477999490944056, + "grad_norm": 22526.951171875, + "learning_rate": 9.994333675654169e-06, + "loss": 132481.375, + "step": 56820 + }, + { + "epoch": 0.11480019554212438, + "grad_norm": 1796654.75, + "learning_rate": 9.994317049835604e-06, + "loss": 215679.85, + "step": 56830 + }, + { + "epoch": 0.1148203961748082, + "grad_norm": 684112.0625, + "learning_rate": 9.994300399675342e-06, + "loss": 115421.55, + "step": 56840 + }, + { + "epoch": 0.114840596807492, + "grad_norm": 23362.232421875, + "learning_rate": 9.994283725173468e-06, + "loss": 55669.3938, + "step": 56850 + }, + { + "epoch": 0.11486079744017583, + "grad_norm": 496186.25, + "learning_rate": 9.994267026330063e-06, + "loss": 286139.525, + "step": 56860 + }, + { + "epoch": 0.11488099807285965, + "grad_norm": 445852.75, + "learning_rate": 9.994250303145203e-06, + "loss": 131103.075, + "step": 56870 + }, + { + "epoch": 0.11490119870554345, + "grad_norm": 26983.677734375, + "learning_rate": 9.994233555618973e-06, + "loss": 140167.2125, + "step": 56880 + }, + { + "epoch": 0.11492139933822727, + "grad_norm": 203558.75, + "learning_rate": 9.994216783751457e-06, + "loss": 102375.5, + "step": 56890 + }, + { + "epoch": 0.11494159997091109, + "grad_norm": 326970.8125, + "learning_rate": 9.99419998754273e-06, + "loss": 122933.5, + "step": 56900 + }, + { + "epoch": 0.1149618006035949, + "grad_norm": 141735.53125, + "learning_rate": 9.99418316699288e-06, + "loss": 130164.8625, + "step": 56910 + }, + { + "epoch": 0.11498200123627872, + "grad_norm": 89088.75, + "learning_rate": 9.994166322101988e-06, + "loss": 99165.625, + "step": 56920 + }, + { + "epoch": 0.11500220186896254, + "grad_norm": 12978.318359375, + "learning_rate": 9.994149452870133e-06, + "loss": 56854.6562, + "step": 56930 + }, + { + "epoch": 0.11502240250164635, + "grad_norm": 86387.0078125, + "learning_rate": 9.9941325592974e-06, + "loss": 148924.45, + "step": 56940 + }, + { + "epoch": 0.11504260313433017, + "grad_norm": 1278174.625, + "learning_rate": 9.994115641383872e-06, + "loss": 173278.5625, + "step": 56950 + }, + { + "epoch": 0.11506280376701399, + "grad_norm": 163222.90625, + "learning_rate": 9.994098699129628e-06, + "loss": 188514.7, + "step": 56960 + }, + { + "epoch": 0.1150830043996978, + "grad_norm": 10130.3125, + "learning_rate": 9.994081732534755e-06, + "loss": 243752.7, + "step": 56970 + }, + { + "epoch": 0.11510320503238161, + "grad_norm": 2727.24072265625, + "learning_rate": 9.994064741599332e-06, + "loss": 256515.3, + "step": 56980 + }, + { + "epoch": 0.11512340566506543, + "grad_norm": 23927.3828125, + "learning_rate": 9.994047726323442e-06, + "loss": 231327.275, + "step": 56990 + }, + { + "epoch": 0.11514360629774925, + "grad_norm": 3069.55126953125, + "learning_rate": 9.994030686707171e-06, + "loss": 110157.375, + "step": 57000 + }, + { + "epoch": 0.11516380693043306, + "grad_norm": 117970.4453125, + "learning_rate": 9.9940136227506e-06, + "loss": 133904.4875, + "step": 57010 + }, + { + "epoch": 0.11518400756311688, + "grad_norm": 501234.625, + "learning_rate": 9.993996534453812e-06, + "loss": 83742.4875, + "step": 57020 + }, + { + "epoch": 0.1152042081958007, + "grad_norm": 236863.546875, + "learning_rate": 9.993979421816889e-06, + "loss": 179359.675, + "step": 57030 + }, + { + "epoch": 0.1152244088284845, + "grad_norm": 4235.44921875, + "learning_rate": 9.993962284839918e-06, + "loss": 47596.7375, + "step": 57040 + }, + { + "epoch": 0.11524460946116832, + "grad_norm": 166768.875, + "learning_rate": 9.99394512352298e-06, + "loss": 95084.175, + "step": 57050 + }, + { + "epoch": 0.11526481009385214, + "grad_norm": 12793.15234375, + "learning_rate": 9.993927937866158e-06, + "loss": 41642.4938, + "step": 57060 + }, + { + "epoch": 0.11528501072653595, + "grad_norm": 517578.0625, + "learning_rate": 9.993910727869538e-06, + "loss": 79799.95, + "step": 57070 + }, + { + "epoch": 0.11530521135921977, + "grad_norm": 149457.703125, + "learning_rate": 9.993893493533203e-06, + "loss": 59873.525, + "step": 57080 + }, + { + "epoch": 0.11532541199190359, + "grad_norm": 682.0084228515625, + "learning_rate": 9.993876234857236e-06, + "loss": 137049.325, + "step": 57090 + }, + { + "epoch": 0.1153456126245874, + "grad_norm": 403229.1875, + "learning_rate": 9.993858951841724e-06, + "loss": 139819.25, + "step": 57100 + }, + { + "epoch": 0.11536581325727122, + "grad_norm": 19959.96484375, + "learning_rate": 9.993841644486747e-06, + "loss": 150618.5875, + "step": 57110 + }, + { + "epoch": 0.11538601388995504, + "grad_norm": 15236.345703125, + "learning_rate": 9.993824312792393e-06, + "loss": 91647.8813, + "step": 57120 + }, + { + "epoch": 0.11540621452263886, + "grad_norm": 916552.625, + "learning_rate": 9.993806956758743e-06, + "loss": 277710.3, + "step": 57130 + }, + { + "epoch": 0.11542641515532266, + "grad_norm": 221736.390625, + "learning_rate": 9.993789576385884e-06, + "loss": 236965.425, + "step": 57140 + }, + { + "epoch": 0.11544661578800648, + "grad_norm": 207860.484375, + "learning_rate": 9.993772171673901e-06, + "loss": 176250.675, + "step": 57150 + }, + { + "epoch": 0.1154668164206903, + "grad_norm": 102163.703125, + "learning_rate": 9.993754742622879e-06, + "loss": 178319.3, + "step": 57160 + }, + { + "epoch": 0.11548701705337411, + "grad_norm": 210274.84375, + "learning_rate": 9.993737289232902e-06, + "loss": 177881.65, + "step": 57170 + }, + { + "epoch": 0.11550721768605793, + "grad_norm": 189389.546875, + "learning_rate": 9.993719811504053e-06, + "loss": 264633.85, + "step": 57180 + }, + { + "epoch": 0.11552741831874175, + "grad_norm": 327596.5625, + "learning_rate": 9.993702309436419e-06, + "loss": 153224.0875, + "step": 57190 + }, + { + "epoch": 0.11554761895142555, + "grad_norm": 330171.15625, + "learning_rate": 9.99368478303009e-06, + "loss": 146224.2, + "step": 57200 + }, + { + "epoch": 0.11556781958410937, + "grad_norm": 276563.46875, + "learning_rate": 9.993667232285142e-06, + "loss": 168524.8, + "step": 57210 + }, + { + "epoch": 0.1155880202167932, + "grad_norm": 29506.154296875, + "learning_rate": 9.993649657201669e-06, + "loss": 158644.2625, + "step": 57220 + }, + { + "epoch": 0.115608220849477, + "grad_norm": 280226.3125, + "learning_rate": 9.993632057779752e-06, + "loss": 99262.4937, + "step": 57230 + }, + { + "epoch": 0.11562842148216082, + "grad_norm": 96132.1328125, + "learning_rate": 9.993614434019476e-06, + "loss": 155166.875, + "step": 57240 + }, + { + "epoch": 0.11564862211484464, + "grad_norm": 9063.3994140625, + "learning_rate": 9.993596785920932e-06, + "loss": 154595.325, + "step": 57250 + }, + { + "epoch": 0.11566882274752845, + "grad_norm": 71567.640625, + "learning_rate": 9.993579113484202e-06, + "loss": 49869.6937, + "step": 57260 + }, + { + "epoch": 0.11568902338021227, + "grad_norm": 234315.921875, + "learning_rate": 9.993561416709372e-06, + "loss": 118417.9, + "step": 57270 + }, + { + "epoch": 0.11570922401289609, + "grad_norm": 350735.90625, + "learning_rate": 9.99354369559653e-06, + "loss": 78247.5625, + "step": 57280 + }, + { + "epoch": 0.1157294246455799, + "grad_norm": 57940.5859375, + "learning_rate": 9.993525950145761e-06, + "loss": 303997.25, + "step": 57290 + }, + { + "epoch": 0.11574962527826371, + "grad_norm": 8024.9921875, + "learning_rate": 9.993508180357154e-06, + "loss": 101062.1938, + "step": 57300 + }, + { + "epoch": 0.11576982591094753, + "grad_norm": 93947.4375, + "learning_rate": 9.993490386230793e-06, + "loss": 61079.6875, + "step": 57310 + }, + { + "epoch": 0.11579002654363135, + "grad_norm": 8874.1044921875, + "learning_rate": 9.993472567766764e-06, + "loss": 122902.9875, + "step": 57320 + }, + { + "epoch": 0.11581022717631516, + "grad_norm": 34896.359375, + "learning_rate": 9.993454724965157e-06, + "loss": 229902.675, + "step": 57330 + }, + { + "epoch": 0.11583042780899898, + "grad_norm": 245505.203125, + "learning_rate": 9.993436857826058e-06, + "loss": 107886.425, + "step": 57340 + }, + { + "epoch": 0.1158506284416828, + "grad_norm": 974048.8125, + "learning_rate": 9.993418966349551e-06, + "loss": 295651.25, + "step": 57350 + }, + { + "epoch": 0.1158708290743666, + "grad_norm": 91671.265625, + "learning_rate": 9.993401050535726e-06, + "loss": 159601.0125, + "step": 57360 + }, + { + "epoch": 0.11589102970705042, + "grad_norm": 43953.87109375, + "learning_rate": 9.993383110384673e-06, + "loss": 192491.4875, + "step": 57370 + }, + { + "epoch": 0.11591123033973424, + "grad_norm": 1211640.625, + "learning_rate": 9.993365145896473e-06, + "loss": 199060.175, + "step": 57380 + }, + { + "epoch": 0.11593143097241805, + "grad_norm": 66963.328125, + "learning_rate": 9.993347157071218e-06, + "loss": 129134.875, + "step": 57390 + }, + { + "epoch": 0.11595163160510187, + "grad_norm": 349409.03125, + "learning_rate": 9.993329143908994e-06, + "loss": 99752.2, + "step": 57400 + }, + { + "epoch": 0.11597183223778569, + "grad_norm": 266329.4375, + "learning_rate": 9.993311106409891e-06, + "loss": 187660.5875, + "step": 57410 + }, + { + "epoch": 0.1159920328704695, + "grad_norm": 330782.96875, + "learning_rate": 9.993293044573995e-06, + "loss": 111284.425, + "step": 57420 + }, + { + "epoch": 0.11601223350315332, + "grad_norm": 30227.236328125, + "learning_rate": 9.993274958401392e-06, + "loss": 88064.5312, + "step": 57430 + }, + { + "epoch": 0.11603243413583714, + "grad_norm": 1048158.375, + "learning_rate": 9.993256847892175e-06, + "loss": 238968.9, + "step": 57440 + }, + { + "epoch": 0.11605263476852096, + "grad_norm": 12363.8349609375, + "learning_rate": 9.993238713046428e-06, + "loss": 59047.5563, + "step": 57450 + }, + { + "epoch": 0.11607283540120476, + "grad_norm": 823966.75, + "learning_rate": 9.993220553864242e-06, + "loss": 168858.825, + "step": 57460 + }, + { + "epoch": 0.11609303603388858, + "grad_norm": 119177.2109375, + "learning_rate": 9.993202370345705e-06, + "loss": 96734.1938, + "step": 57470 + }, + { + "epoch": 0.1161132366665724, + "grad_norm": 356186.96875, + "learning_rate": 9.993184162490903e-06, + "loss": 181568.5875, + "step": 57480 + }, + { + "epoch": 0.11613343729925621, + "grad_norm": 196427.21875, + "learning_rate": 9.99316593029993e-06, + "loss": 91872.7625, + "step": 57490 + }, + { + "epoch": 0.11615363793194003, + "grad_norm": 136411.609375, + "learning_rate": 9.993147673772869e-06, + "loss": 83286.95, + "step": 57500 + }, + { + "epoch": 0.11617383856462385, + "grad_norm": 1835.234619140625, + "learning_rate": 9.993129392909814e-06, + "loss": 61912.2562, + "step": 57510 + }, + { + "epoch": 0.11619403919730766, + "grad_norm": 267325.125, + "learning_rate": 9.993111087710852e-06, + "loss": 171181.4625, + "step": 57520 + }, + { + "epoch": 0.11621423982999148, + "grad_norm": 53395.5234375, + "learning_rate": 9.993092758176071e-06, + "loss": 147378.9, + "step": 57530 + }, + { + "epoch": 0.1162344404626753, + "grad_norm": 29618.13671875, + "learning_rate": 9.993074404305563e-06, + "loss": 244470.675, + "step": 57540 + }, + { + "epoch": 0.1162546410953591, + "grad_norm": 64830.77734375, + "learning_rate": 9.993056026099415e-06, + "loss": 63475.6375, + "step": 57550 + }, + { + "epoch": 0.11627484172804292, + "grad_norm": 452780.96875, + "learning_rate": 9.993037623557716e-06, + "loss": 158925.85, + "step": 57560 + }, + { + "epoch": 0.11629504236072674, + "grad_norm": 89306.453125, + "learning_rate": 9.993019196680558e-06, + "loss": 147285.2875, + "step": 57570 + }, + { + "epoch": 0.11631524299341055, + "grad_norm": 118006.1796875, + "learning_rate": 9.993000745468031e-06, + "loss": 237956.825, + "step": 57580 + }, + { + "epoch": 0.11633544362609437, + "grad_norm": 55502.4296875, + "learning_rate": 9.992982269920223e-06, + "loss": 114920.375, + "step": 57590 + }, + { + "epoch": 0.11635564425877819, + "grad_norm": 47620.8515625, + "learning_rate": 9.992963770037227e-06, + "loss": 102410.1875, + "step": 57600 + }, + { + "epoch": 0.11637584489146201, + "grad_norm": 206732.140625, + "learning_rate": 9.99294524581913e-06, + "loss": 219899.7, + "step": 57610 + }, + { + "epoch": 0.11639604552414581, + "grad_norm": 826772.8125, + "learning_rate": 9.992926697266023e-06, + "loss": 148407.325, + "step": 57620 + }, + { + "epoch": 0.11641624615682963, + "grad_norm": 2501.10693359375, + "learning_rate": 9.992908124377997e-06, + "loss": 132194.4, + "step": 57630 + }, + { + "epoch": 0.11643644678951345, + "grad_norm": 109644.421875, + "learning_rate": 9.992889527155143e-06, + "loss": 63794.1125, + "step": 57640 + }, + { + "epoch": 0.11645664742219726, + "grad_norm": 46564.828125, + "learning_rate": 9.992870905597549e-06, + "loss": 121244.0, + "step": 57650 + }, + { + "epoch": 0.11647684805488108, + "grad_norm": 55598.66796875, + "learning_rate": 9.99285225970531e-06, + "loss": 87786.575, + "step": 57660 + }, + { + "epoch": 0.1164970486875649, + "grad_norm": 314716.8125, + "learning_rate": 9.992833589478513e-06, + "loss": 144912.8125, + "step": 57670 + }, + { + "epoch": 0.1165172493202487, + "grad_norm": 30172.85546875, + "learning_rate": 9.992814894917251e-06, + "loss": 88854.6875, + "step": 57680 + }, + { + "epoch": 0.11653744995293253, + "grad_norm": 227252.484375, + "learning_rate": 9.992796176021616e-06, + "loss": 183345.2375, + "step": 57690 + }, + { + "epoch": 0.11655765058561635, + "grad_norm": 154766.4375, + "learning_rate": 9.992777432791697e-06, + "loss": 153099.175, + "step": 57700 + }, + { + "epoch": 0.11657785121830015, + "grad_norm": 201033.0625, + "learning_rate": 9.992758665227586e-06, + "loss": 189850.5625, + "step": 57710 + }, + { + "epoch": 0.11659805185098397, + "grad_norm": 3231.4443359375, + "learning_rate": 9.992739873329375e-06, + "loss": 156106.2625, + "step": 57720 + }, + { + "epoch": 0.11661825248366779, + "grad_norm": 37826.3984375, + "learning_rate": 9.992721057097157e-06, + "loss": 63125.3187, + "step": 57730 + }, + { + "epoch": 0.1166384531163516, + "grad_norm": 0.0, + "learning_rate": 9.99270221653102e-06, + "loss": 122956.3375, + "step": 57740 + }, + { + "epoch": 0.11665865374903542, + "grad_norm": 13044.533203125, + "learning_rate": 9.99268335163106e-06, + "loss": 71520.65, + "step": 57750 + }, + { + "epoch": 0.11667885438171924, + "grad_norm": 172482.203125, + "learning_rate": 9.992664462397366e-06, + "loss": 126867.5625, + "step": 57760 + }, + { + "epoch": 0.11669905501440304, + "grad_norm": 106117.40625, + "learning_rate": 9.99264554883003e-06, + "loss": 163978.35, + "step": 57770 + }, + { + "epoch": 0.11671925564708686, + "grad_norm": 76453.3125, + "learning_rate": 9.992626610929146e-06, + "loss": 270118.55, + "step": 57780 + }, + { + "epoch": 0.11673945627977068, + "grad_norm": 70056.5, + "learning_rate": 9.992607648694805e-06, + "loss": 252031.625, + "step": 57790 + }, + { + "epoch": 0.1167596569124545, + "grad_norm": 171843.171875, + "learning_rate": 9.9925886621271e-06, + "loss": 164924.125, + "step": 57800 + }, + { + "epoch": 0.11677985754513831, + "grad_norm": 1019957.875, + "learning_rate": 9.992569651226123e-06, + "loss": 188541.9125, + "step": 57810 + }, + { + "epoch": 0.11680005817782213, + "grad_norm": 236084.578125, + "learning_rate": 9.992550615991968e-06, + "loss": 122354.8625, + "step": 57820 + }, + { + "epoch": 0.11682025881050595, + "grad_norm": 117637.4765625, + "learning_rate": 9.992531556424726e-06, + "loss": 54753.025, + "step": 57830 + }, + { + "epoch": 0.11684045944318976, + "grad_norm": 175983.34375, + "learning_rate": 9.992512472524491e-06, + "loss": 129250.8875, + "step": 57840 + }, + { + "epoch": 0.11686066007587358, + "grad_norm": 99157.1015625, + "learning_rate": 9.992493364291356e-06, + "loss": 88648.9563, + "step": 57850 + }, + { + "epoch": 0.1168808607085574, + "grad_norm": 39061.34375, + "learning_rate": 9.992474231725412e-06, + "loss": 54131.1687, + "step": 57860 + }, + { + "epoch": 0.1169010613412412, + "grad_norm": 171534.546875, + "learning_rate": 9.992455074826757e-06, + "loss": 192000.475, + "step": 57870 + }, + { + "epoch": 0.11692126197392502, + "grad_norm": 54939.7734375, + "learning_rate": 9.99243589359548e-06, + "loss": 55322.8438, + "step": 57880 + }, + { + "epoch": 0.11694146260660884, + "grad_norm": 205366.03125, + "learning_rate": 9.992416688031676e-06, + "loss": 288019.125, + "step": 57890 + }, + { + "epoch": 0.11696166323929265, + "grad_norm": 430905.75, + "learning_rate": 9.992397458135438e-06, + "loss": 154674.2625, + "step": 57900 + }, + { + "epoch": 0.11698186387197647, + "grad_norm": 118530.34375, + "learning_rate": 9.992378203906862e-06, + "loss": 199327.725, + "step": 57910 + }, + { + "epoch": 0.11700206450466029, + "grad_norm": 257835.96875, + "learning_rate": 9.99235892534604e-06, + "loss": 126528.275, + "step": 57920 + }, + { + "epoch": 0.1170222651373441, + "grad_norm": 26999.720703125, + "learning_rate": 9.992339622453065e-06, + "loss": 156993.0375, + "step": 57930 + }, + { + "epoch": 0.11704246577002791, + "grad_norm": 226930.0625, + "learning_rate": 9.992320295228032e-06, + "loss": 94083.0375, + "step": 57940 + }, + { + "epoch": 0.11706266640271173, + "grad_norm": 86813.34375, + "learning_rate": 9.992300943671035e-06, + "loss": 142924.85, + "step": 57950 + }, + { + "epoch": 0.11708286703539555, + "grad_norm": 653635.3125, + "learning_rate": 9.99228156778217e-06, + "loss": 216361.5, + "step": 57960 + }, + { + "epoch": 0.11710306766807936, + "grad_norm": 117322.7421875, + "learning_rate": 9.99226216756153e-06, + "loss": 188302.8, + "step": 57970 + }, + { + "epoch": 0.11712326830076318, + "grad_norm": 288871.40625, + "learning_rate": 9.99224274300921e-06, + "loss": 152910.7875, + "step": 57980 + }, + { + "epoch": 0.117143468933447, + "grad_norm": 55274.90234375, + "learning_rate": 9.992223294125303e-06, + "loss": 88065.9625, + "step": 57990 + }, + { + "epoch": 0.11716366956613081, + "grad_norm": 10928.0986328125, + "learning_rate": 9.992203820909906e-06, + "loss": 83104.65, + "step": 58000 + }, + { + "epoch": 0.11718387019881463, + "grad_norm": 191566.234375, + "learning_rate": 9.992184323363112e-06, + "loss": 193434.3, + "step": 58010 + }, + { + "epoch": 0.11720407083149845, + "grad_norm": 143265.640625, + "learning_rate": 9.992164801485018e-06, + "loss": 73622.3062, + "step": 58020 + }, + { + "epoch": 0.11722427146418225, + "grad_norm": 250905.375, + "learning_rate": 9.992145255275718e-06, + "loss": 265645.55, + "step": 58030 + }, + { + "epoch": 0.11724447209686607, + "grad_norm": 52944.3828125, + "learning_rate": 9.99212568473531e-06, + "loss": 133489.6, + "step": 58040 + }, + { + "epoch": 0.1172646727295499, + "grad_norm": 36314.5703125, + "learning_rate": 9.992106089863884e-06, + "loss": 80568.0, + "step": 58050 + }, + { + "epoch": 0.1172848733622337, + "grad_norm": 349668.8125, + "learning_rate": 9.992086470661537e-06, + "loss": 106363.3, + "step": 58060 + }, + { + "epoch": 0.11730507399491752, + "grad_norm": 45057.921875, + "learning_rate": 9.992066827128368e-06, + "loss": 115330.9375, + "step": 58070 + }, + { + "epoch": 0.11732527462760134, + "grad_norm": 39030.88671875, + "learning_rate": 9.992047159264472e-06, + "loss": 221600.25, + "step": 58080 + }, + { + "epoch": 0.11734547526028515, + "grad_norm": 1359583.25, + "learning_rate": 9.992027467069943e-06, + "loss": 256946.8, + "step": 58090 + }, + { + "epoch": 0.11736567589296897, + "grad_norm": 73660.3046875, + "learning_rate": 9.992007750544876e-06, + "loss": 68243.125, + "step": 58100 + }, + { + "epoch": 0.11738587652565279, + "grad_norm": 368463.90625, + "learning_rate": 9.99198800968937e-06, + "loss": 137436.2, + "step": 58110 + }, + { + "epoch": 0.1174060771583366, + "grad_norm": 6011.09326171875, + "learning_rate": 9.991968244503519e-06, + "loss": 29859.2281, + "step": 58120 + }, + { + "epoch": 0.11742627779102041, + "grad_norm": 7961.2392578125, + "learning_rate": 9.991948454987422e-06, + "loss": 177046.6125, + "step": 58130 + }, + { + "epoch": 0.11744647842370423, + "grad_norm": 21033.783203125, + "learning_rate": 9.99192864114117e-06, + "loss": 134486.0625, + "step": 58140 + }, + { + "epoch": 0.11746667905638805, + "grad_norm": 519143.71875, + "learning_rate": 9.991908802964867e-06, + "loss": 163427.425, + "step": 58150 + }, + { + "epoch": 0.11748687968907186, + "grad_norm": 202967.8125, + "learning_rate": 9.991888940458605e-06, + "loss": 75114.4125, + "step": 58160 + }, + { + "epoch": 0.11750708032175568, + "grad_norm": 0.0, + "learning_rate": 9.99186905362248e-06, + "loss": 146096.275, + "step": 58170 + }, + { + "epoch": 0.1175272809544395, + "grad_norm": 21489.462890625, + "learning_rate": 9.991849142456593e-06, + "loss": 120255.7125, + "step": 58180 + }, + { + "epoch": 0.1175474815871233, + "grad_norm": 187561.484375, + "learning_rate": 9.991829206961038e-06, + "loss": 94880.1687, + "step": 58190 + }, + { + "epoch": 0.11756768221980712, + "grad_norm": 214552.25, + "learning_rate": 9.991809247135912e-06, + "loss": 186989.9125, + "step": 58200 + }, + { + "epoch": 0.11758788285249094, + "grad_norm": 141433.9375, + "learning_rate": 9.991789262981314e-06, + "loss": 78935.1875, + "step": 58210 + }, + { + "epoch": 0.11760808348517475, + "grad_norm": 74306.5546875, + "learning_rate": 9.99176925449734e-06, + "loss": 196573.5125, + "step": 58220 + }, + { + "epoch": 0.11762828411785857, + "grad_norm": 327412.21875, + "learning_rate": 9.991749221684088e-06, + "loss": 203367.3875, + "step": 58230 + }, + { + "epoch": 0.11764848475054239, + "grad_norm": 156040.1875, + "learning_rate": 9.991729164541656e-06, + "loss": 115089.6625, + "step": 58240 + }, + { + "epoch": 0.1176686853832262, + "grad_norm": 365932.5625, + "learning_rate": 9.991709083070143e-06, + "loss": 109128.55, + "step": 58250 + }, + { + "epoch": 0.11768888601591002, + "grad_norm": 282144.1875, + "learning_rate": 9.991688977269643e-06, + "loss": 58665.6, + "step": 58260 + }, + { + "epoch": 0.11770908664859384, + "grad_norm": 365114.71875, + "learning_rate": 9.991668847140258e-06, + "loss": 245971.3, + "step": 58270 + }, + { + "epoch": 0.11772928728127766, + "grad_norm": 47165.8828125, + "learning_rate": 9.991648692682083e-06, + "loss": 98009.6313, + "step": 58280 + }, + { + "epoch": 0.11774948791396146, + "grad_norm": 1247019.875, + "learning_rate": 9.99162851389522e-06, + "loss": 323531.425, + "step": 58290 + }, + { + "epoch": 0.11776968854664528, + "grad_norm": 62894.0859375, + "learning_rate": 9.991608310779762e-06, + "loss": 56564.3313, + "step": 58300 + }, + { + "epoch": 0.1177898891793291, + "grad_norm": 123900.65625, + "learning_rate": 9.991588083335812e-06, + "loss": 102432.7312, + "step": 58310 + }, + { + "epoch": 0.11781008981201291, + "grad_norm": 13387.7607421875, + "learning_rate": 9.991567831563468e-06, + "loss": 147912.45, + "step": 58320 + }, + { + "epoch": 0.11783029044469673, + "grad_norm": 114447.0390625, + "learning_rate": 9.991547555462825e-06, + "loss": 352407.65, + "step": 58330 + }, + { + "epoch": 0.11785049107738055, + "grad_norm": 301744.71875, + "learning_rate": 9.991527255033988e-06, + "loss": 112055.225, + "step": 58340 + }, + { + "epoch": 0.11787069171006435, + "grad_norm": 22940.67578125, + "learning_rate": 9.99150693027705e-06, + "loss": 164358.4125, + "step": 58350 + }, + { + "epoch": 0.11789089234274817, + "grad_norm": 60829.8828125, + "learning_rate": 9.991486581192115e-06, + "loss": 47044.3344, + "step": 58360 + }, + { + "epoch": 0.117911092975432, + "grad_norm": 18086.771484375, + "learning_rate": 9.991466207779279e-06, + "loss": 248092.8, + "step": 58370 + }, + { + "epoch": 0.1179312936081158, + "grad_norm": 72844.5625, + "learning_rate": 9.99144581003864e-06, + "loss": 120391.625, + "step": 58380 + }, + { + "epoch": 0.11795149424079962, + "grad_norm": 41267.2109375, + "learning_rate": 9.991425387970301e-06, + "loss": 43627.5375, + "step": 58390 + }, + { + "epoch": 0.11797169487348344, + "grad_norm": 28352.46875, + "learning_rate": 9.99140494157436e-06, + "loss": 110417.8, + "step": 58400 + }, + { + "epoch": 0.11799189550616725, + "grad_norm": 145639.140625, + "learning_rate": 9.991384470850918e-06, + "loss": 141839.7875, + "step": 58410 + }, + { + "epoch": 0.11801209613885107, + "grad_norm": 1971744.0, + "learning_rate": 9.991363975800073e-06, + "loss": 342263.1, + "step": 58420 + }, + { + "epoch": 0.11803229677153489, + "grad_norm": 115416.421875, + "learning_rate": 9.991343456421923e-06, + "loss": 185711.875, + "step": 58430 + }, + { + "epoch": 0.1180524974042187, + "grad_norm": 5891.904296875, + "learning_rate": 9.991322912716572e-06, + "loss": 129310.0625, + "step": 58440 + }, + { + "epoch": 0.11807269803690251, + "grad_norm": 9022.4072265625, + "learning_rate": 9.99130234468412e-06, + "loss": 173128.25, + "step": 58450 + }, + { + "epoch": 0.11809289866958633, + "grad_norm": 29312.474609375, + "learning_rate": 9.991281752324664e-06, + "loss": 119433.4, + "step": 58460 + }, + { + "epoch": 0.11811309930227015, + "grad_norm": 59075.79296875, + "learning_rate": 9.991261135638307e-06, + "loss": 116070.1875, + "step": 58470 + }, + { + "epoch": 0.11813329993495396, + "grad_norm": 0.0, + "learning_rate": 9.991240494625147e-06, + "loss": 54253.45, + "step": 58480 + }, + { + "epoch": 0.11815350056763778, + "grad_norm": 110175.578125, + "learning_rate": 9.991219829285287e-06, + "loss": 97305.5437, + "step": 58490 + }, + { + "epoch": 0.1181737012003216, + "grad_norm": 281512.40625, + "learning_rate": 9.991199139618828e-06, + "loss": 100137.9312, + "step": 58500 + }, + { + "epoch": 0.1181939018330054, + "grad_norm": 283835.75, + "learning_rate": 9.991178425625869e-06, + "loss": 103119.425, + "step": 58510 + }, + { + "epoch": 0.11821410246568922, + "grad_norm": 309933.25, + "learning_rate": 9.99115768730651e-06, + "loss": 94741.8313, + "step": 58520 + }, + { + "epoch": 0.11823430309837304, + "grad_norm": 79887.2734375, + "learning_rate": 9.991136924660856e-06, + "loss": 125094.3, + "step": 58530 + }, + { + "epoch": 0.11825450373105685, + "grad_norm": 14023.458984375, + "learning_rate": 9.991116137689006e-06, + "loss": 120415.5875, + "step": 58540 + }, + { + "epoch": 0.11827470436374067, + "grad_norm": 38295.359375, + "learning_rate": 9.991095326391061e-06, + "loss": 69307.3875, + "step": 58550 + }, + { + "epoch": 0.11829490499642449, + "grad_norm": 127592.109375, + "learning_rate": 9.99107449076712e-06, + "loss": 199728.6125, + "step": 58560 + }, + { + "epoch": 0.1183151056291083, + "grad_norm": 9975.2607421875, + "learning_rate": 9.99105363081729e-06, + "loss": 263350.95, + "step": 58570 + }, + { + "epoch": 0.11833530626179212, + "grad_norm": 727226.5625, + "learning_rate": 9.99103274654167e-06, + "loss": 215504.125, + "step": 58580 + }, + { + "epoch": 0.11835550689447594, + "grad_norm": 289738.09375, + "learning_rate": 9.99101183794036e-06, + "loss": 88970.6812, + "step": 58590 + }, + { + "epoch": 0.11837570752715976, + "grad_norm": 595281.75, + "learning_rate": 9.990990905013466e-06, + "loss": 243377.45, + "step": 58600 + }, + { + "epoch": 0.11839590815984356, + "grad_norm": 223161.390625, + "learning_rate": 9.990969947761087e-06, + "loss": 383909.3, + "step": 58610 + }, + { + "epoch": 0.11841610879252738, + "grad_norm": 16008.240234375, + "learning_rate": 9.990948966183324e-06, + "loss": 115494.45, + "step": 58620 + }, + { + "epoch": 0.1184363094252112, + "grad_norm": 152752.375, + "learning_rate": 9.990927960280283e-06, + "loss": 70731.1812, + "step": 58630 + }, + { + "epoch": 0.11845651005789501, + "grad_norm": 34916.94921875, + "learning_rate": 9.990906930052065e-06, + "loss": 103488.4438, + "step": 58640 + }, + { + "epoch": 0.11847671069057883, + "grad_norm": 152786.828125, + "learning_rate": 9.99088587549877e-06, + "loss": 111801.5, + "step": 58650 + }, + { + "epoch": 0.11849691132326265, + "grad_norm": 162562.46875, + "learning_rate": 9.990864796620503e-06, + "loss": 161920.825, + "step": 58660 + }, + { + "epoch": 0.11851711195594646, + "grad_norm": 5536.052734375, + "learning_rate": 9.990843693417366e-06, + "loss": 235845.875, + "step": 58670 + }, + { + "epoch": 0.11853731258863028, + "grad_norm": 0.0, + "learning_rate": 9.990822565889464e-06, + "loss": 99016.0125, + "step": 58680 + }, + { + "epoch": 0.1185575132213141, + "grad_norm": 127632.796875, + "learning_rate": 9.990801414036896e-06, + "loss": 204931.575, + "step": 58690 + }, + { + "epoch": 0.1185777138539979, + "grad_norm": 51117.3515625, + "learning_rate": 9.99078023785977e-06, + "loss": 131364.0, + "step": 58700 + }, + { + "epoch": 0.11859791448668172, + "grad_norm": 3720.61376953125, + "learning_rate": 9.990759037358184e-06, + "loss": 128725.425, + "step": 58710 + }, + { + "epoch": 0.11861811511936554, + "grad_norm": 94775.5703125, + "learning_rate": 9.990737812532245e-06, + "loss": 108815.5125, + "step": 58720 + }, + { + "epoch": 0.11863831575204935, + "grad_norm": 11973.8486328125, + "learning_rate": 9.990716563382055e-06, + "loss": 165558.625, + "step": 58730 + }, + { + "epoch": 0.11865851638473317, + "grad_norm": 69597.2109375, + "learning_rate": 9.990695289907716e-06, + "loss": 184225.4, + "step": 58740 + }, + { + "epoch": 0.11867871701741699, + "grad_norm": 78729.1875, + "learning_rate": 9.990673992109335e-06, + "loss": 96443.95, + "step": 58750 + }, + { + "epoch": 0.11869891765010081, + "grad_norm": 348279.1875, + "learning_rate": 9.990652669987016e-06, + "loss": 186563.35, + "step": 58760 + }, + { + "epoch": 0.11871911828278461, + "grad_norm": 552023.4375, + "learning_rate": 9.990631323540858e-06, + "loss": 289357.325, + "step": 58770 + }, + { + "epoch": 0.11873931891546843, + "grad_norm": 160950.5625, + "learning_rate": 9.990609952770969e-06, + "loss": 129873.3, + "step": 58780 + }, + { + "epoch": 0.11875951954815225, + "grad_norm": 69281.5234375, + "learning_rate": 9.990588557677454e-06, + "loss": 83866.4, + "step": 58790 + }, + { + "epoch": 0.11877972018083606, + "grad_norm": 13379.1015625, + "learning_rate": 9.990567138260414e-06, + "loss": 161858.15, + "step": 58800 + }, + { + "epoch": 0.11879992081351988, + "grad_norm": 210335.0, + "learning_rate": 9.990545694519956e-06, + "loss": 83083.325, + "step": 58810 + }, + { + "epoch": 0.1188201214462037, + "grad_norm": 62191.70703125, + "learning_rate": 9.990524226456182e-06, + "loss": 176330.0, + "step": 58820 + }, + { + "epoch": 0.1188403220788875, + "grad_norm": 47049.9921875, + "learning_rate": 9.9905027340692e-06, + "loss": 86535.3, + "step": 58830 + }, + { + "epoch": 0.11886052271157133, + "grad_norm": 57358.6015625, + "learning_rate": 9.990481217359112e-06, + "loss": 175387.4875, + "step": 58840 + }, + { + "epoch": 0.11888072334425515, + "grad_norm": 846802.6875, + "learning_rate": 9.990459676326025e-06, + "loss": 227122.8, + "step": 58850 + }, + { + "epoch": 0.11890092397693895, + "grad_norm": 64913.32421875, + "learning_rate": 9.990438110970043e-06, + "loss": 185195.9375, + "step": 58860 + }, + { + "epoch": 0.11892112460962277, + "grad_norm": 53063.8203125, + "learning_rate": 9.990416521291268e-06, + "loss": 111564.1, + "step": 58870 + }, + { + "epoch": 0.11894132524230659, + "grad_norm": 264742.9375, + "learning_rate": 9.990394907289811e-06, + "loss": 103901.6562, + "step": 58880 + }, + { + "epoch": 0.1189615258749904, + "grad_norm": 17274.92578125, + "learning_rate": 9.990373268965773e-06, + "loss": 139601.175, + "step": 58890 + }, + { + "epoch": 0.11898172650767422, + "grad_norm": 52787.109375, + "learning_rate": 9.990351606319261e-06, + "loss": 104249.1812, + "step": 58900 + }, + { + "epoch": 0.11900192714035804, + "grad_norm": 1709.365966796875, + "learning_rate": 9.990329919350382e-06, + "loss": 260658.1, + "step": 58910 + }, + { + "epoch": 0.11902212777304186, + "grad_norm": 332332.59375, + "learning_rate": 9.990308208059239e-06, + "loss": 187857.4625, + "step": 58920 + }, + { + "epoch": 0.11904232840572566, + "grad_norm": 27162.53515625, + "learning_rate": 9.990286472445938e-06, + "loss": 173163.275, + "step": 58930 + }, + { + "epoch": 0.11906252903840948, + "grad_norm": 287481.0625, + "learning_rate": 9.990264712510586e-06, + "loss": 174634.575, + "step": 58940 + }, + { + "epoch": 0.1190827296710933, + "grad_norm": 402899.46875, + "learning_rate": 9.990242928253291e-06, + "loss": 153370.6625, + "step": 58950 + }, + { + "epoch": 0.11910293030377711, + "grad_norm": 158721.9375, + "learning_rate": 9.990221119674157e-06, + "loss": 75950.8375, + "step": 58960 + }, + { + "epoch": 0.11912313093646093, + "grad_norm": 20331.421875, + "learning_rate": 9.99019928677329e-06, + "loss": 112745.6875, + "step": 58970 + }, + { + "epoch": 0.11914333156914475, + "grad_norm": 907753.6875, + "learning_rate": 9.990177429550797e-06, + "loss": 189966.3, + "step": 58980 + }, + { + "epoch": 0.11916353220182856, + "grad_norm": 164095.25, + "learning_rate": 9.990155548006783e-06, + "loss": 126359.875, + "step": 58990 + }, + { + "epoch": 0.11918373283451238, + "grad_norm": 197979.78125, + "learning_rate": 9.990133642141359e-06, + "loss": 120570.0875, + "step": 59000 + }, + { + "epoch": 0.1192039334671962, + "grad_norm": 60829.3671875, + "learning_rate": 9.990111711954626e-06, + "loss": 92933.9187, + "step": 59010 + }, + { + "epoch": 0.11922413409988, + "grad_norm": 783078.9375, + "learning_rate": 9.990089757446697e-06, + "loss": 289423.075, + "step": 59020 + }, + { + "epoch": 0.11924433473256382, + "grad_norm": 263709.28125, + "learning_rate": 9.990067778617672e-06, + "loss": 110832.25, + "step": 59030 + }, + { + "epoch": 0.11926453536524764, + "grad_norm": 396625.53125, + "learning_rate": 9.990045775467664e-06, + "loss": 140843.6875, + "step": 59040 + }, + { + "epoch": 0.11928473599793145, + "grad_norm": 11130.37109375, + "learning_rate": 9.990023747996778e-06, + "loss": 159292.6125, + "step": 59050 + }, + { + "epoch": 0.11930493663061527, + "grad_norm": 43941.78125, + "learning_rate": 9.990001696205121e-06, + "loss": 148401.6375, + "step": 59060 + }, + { + "epoch": 0.11932513726329909, + "grad_norm": 42393.36328125, + "learning_rate": 9.989979620092802e-06, + "loss": 93727.5562, + "step": 59070 + }, + { + "epoch": 0.11934533789598291, + "grad_norm": 682634.4375, + "learning_rate": 9.989957519659926e-06, + "loss": 150394.525, + "step": 59080 + }, + { + "epoch": 0.11936553852866671, + "grad_norm": 98515.171875, + "learning_rate": 9.989935394906602e-06, + "loss": 55087.4125, + "step": 59090 + }, + { + "epoch": 0.11938573916135053, + "grad_norm": 140491.0625, + "learning_rate": 9.98991324583294e-06, + "loss": 163796.95, + "step": 59100 + }, + { + "epoch": 0.11940593979403435, + "grad_norm": 148923.171875, + "learning_rate": 9.989891072439045e-06, + "loss": 155096.1, + "step": 59110 + }, + { + "epoch": 0.11942614042671816, + "grad_norm": 17138.392578125, + "learning_rate": 9.989868874725026e-06, + "loss": 175822.55, + "step": 59120 + }, + { + "epoch": 0.11944634105940198, + "grad_norm": 3801.99951171875, + "learning_rate": 9.989846652690992e-06, + "loss": 226460.925, + "step": 59130 + }, + { + "epoch": 0.1194665416920858, + "grad_norm": 185105.5625, + "learning_rate": 9.989824406337049e-06, + "loss": 46568.1656, + "step": 59140 + }, + { + "epoch": 0.11948674232476961, + "grad_norm": 0.0, + "learning_rate": 9.989802135663308e-06, + "loss": 215227.775, + "step": 59150 + }, + { + "epoch": 0.11950694295745343, + "grad_norm": 16772.85546875, + "learning_rate": 9.989779840669878e-06, + "loss": 67504.5562, + "step": 59160 + }, + { + "epoch": 0.11952714359013725, + "grad_norm": 186416.484375, + "learning_rate": 9.989757521356864e-06, + "loss": 270425.05, + "step": 59170 + }, + { + "epoch": 0.11954734422282105, + "grad_norm": 212160.140625, + "learning_rate": 9.989735177724378e-06, + "loss": 119156.0875, + "step": 59180 + }, + { + "epoch": 0.11956754485550487, + "grad_norm": 15252.66015625, + "learning_rate": 9.989712809772528e-06, + "loss": 24462.1922, + "step": 59190 + }, + { + "epoch": 0.1195877454881887, + "grad_norm": 148127.625, + "learning_rate": 9.989690417501423e-06, + "loss": 130461.275, + "step": 59200 + }, + { + "epoch": 0.1196079461208725, + "grad_norm": 78604.4375, + "learning_rate": 9.989668000911173e-06, + "loss": 185481.35, + "step": 59210 + }, + { + "epoch": 0.11962814675355632, + "grad_norm": 1348387.5, + "learning_rate": 9.989645560001884e-06, + "loss": 155607.275, + "step": 59220 + }, + { + "epoch": 0.11964834738624014, + "grad_norm": 507421.34375, + "learning_rate": 9.989623094773669e-06, + "loss": 193688.775, + "step": 59230 + }, + { + "epoch": 0.11966854801892396, + "grad_norm": 69066.1953125, + "learning_rate": 9.989600605226637e-06, + "loss": 28847.675, + "step": 59240 + }, + { + "epoch": 0.11968874865160777, + "grad_norm": 231520.9375, + "learning_rate": 9.989578091360896e-06, + "loss": 114488.525, + "step": 59250 + }, + { + "epoch": 0.11970894928429159, + "grad_norm": 87762.1328125, + "learning_rate": 9.989555553176556e-06, + "loss": 185051.5625, + "step": 59260 + }, + { + "epoch": 0.1197291499169754, + "grad_norm": 32117.02734375, + "learning_rate": 9.989532990673729e-06, + "loss": 110403.625, + "step": 59270 + }, + { + "epoch": 0.11974935054965921, + "grad_norm": 112323.859375, + "learning_rate": 9.989510403852521e-06, + "loss": 63460.9062, + "step": 59280 + }, + { + "epoch": 0.11976955118234303, + "grad_norm": 6889.51025390625, + "learning_rate": 9.989487792713045e-06, + "loss": 90984.5562, + "step": 59290 + }, + { + "epoch": 0.11978975181502685, + "grad_norm": 195871.09375, + "learning_rate": 9.989465157255413e-06, + "loss": 246322.575, + "step": 59300 + }, + { + "epoch": 0.11980995244771066, + "grad_norm": 975087.625, + "learning_rate": 9.98944249747973e-06, + "loss": 192379.65, + "step": 59310 + }, + { + "epoch": 0.11983015308039448, + "grad_norm": 73092.0078125, + "learning_rate": 9.989419813386112e-06, + "loss": 78427.7875, + "step": 59320 + }, + { + "epoch": 0.1198503537130783, + "grad_norm": 1156958.0, + "learning_rate": 9.989397104974665e-06, + "loss": 143670.275, + "step": 59330 + }, + { + "epoch": 0.1198705543457621, + "grad_norm": 5815.5546875, + "learning_rate": 9.989374372245503e-06, + "loss": 119390.075, + "step": 59340 + }, + { + "epoch": 0.11989075497844592, + "grad_norm": 2895.353271484375, + "learning_rate": 9.989351615198734e-06, + "loss": 180249.1875, + "step": 59350 + }, + { + "epoch": 0.11991095561112974, + "grad_norm": 204948.078125, + "learning_rate": 9.989328833834472e-06, + "loss": 306961.4, + "step": 59360 + }, + { + "epoch": 0.11993115624381355, + "grad_norm": 7112.91796875, + "learning_rate": 9.989306028152825e-06, + "loss": 255325.325, + "step": 59370 + }, + { + "epoch": 0.11995135687649737, + "grad_norm": 82175.8125, + "learning_rate": 9.989283198153908e-06, + "loss": 132453.7875, + "step": 59380 + }, + { + "epoch": 0.11997155750918119, + "grad_norm": 154799.734375, + "learning_rate": 9.989260343837827e-06, + "loss": 231250.975, + "step": 59390 + }, + { + "epoch": 0.11999175814186501, + "grad_norm": 153969.46875, + "learning_rate": 9.989237465204698e-06, + "loss": 66776.2875, + "step": 59400 + }, + { + "epoch": 0.12001195877454882, + "grad_norm": 275022.15625, + "learning_rate": 9.989214562254628e-06, + "loss": 185897.8375, + "step": 59410 + }, + { + "epoch": 0.12003215940723264, + "grad_norm": 0.0, + "learning_rate": 9.989191634987734e-06, + "loss": 187251.6875, + "step": 59420 + }, + { + "epoch": 0.12005236003991646, + "grad_norm": 1294916.0, + "learning_rate": 9.989168683404125e-06, + "loss": 261604.675, + "step": 59430 + }, + { + "epoch": 0.12007256067260026, + "grad_norm": 536193.1875, + "learning_rate": 9.98914570750391e-06, + "loss": 126144.1, + "step": 59440 + }, + { + "epoch": 0.12009276130528408, + "grad_norm": 362083.34375, + "learning_rate": 9.98912270728721e-06, + "loss": 133504.5, + "step": 59450 + }, + { + "epoch": 0.1201129619379679, + "grad_norm": 171981.765625, + "learning_rate": 9.989099682754125e-06, + "loss": 106872.75, + "step": 59460 + }, + { + "epoch": 0.12013316257065171, + "grad_norm": 51600.9375, + "learning_rate": 9.989076633904775e-06, + "loss": 286760.225, + "step": 59470 + }, + { + "epoch": 0.12015336320333553, + "grad_norm": 135173.5, + "learning_rate": 9.989053560739272e-06, + "loss": 71037.25, + "step": 59480 + }, + { + "epoch": 0.12017356383601935, + "grad_norm": 33768.0703125, + "learning_rate": 9.989030463257726e-06, + "loss": 100473.4187, + "step": 59490 + }, + { + "epoch": 0.12019376446870315, + "grad_norm": 280559.375, + "learning_rate": 9.989007341460251e-06, + "loss": 70277.35, + "step": 59500 + }, + { + "epoch": 0.12021396510138697, + "grad_norm": 17164.173828125, + "learning_rate": 9.98898419534696e-06, + "loss": 162704.925, + "step": 59510 + }, + { + "epoch": 0.1202341657340708, + "grad_norm": 14030.0166015625, + "learning_rate": 9.988961024917963e-06, + "loss": 87709.575, + "step": 59520 + }, + { + "epoch": 0.1202543663667546, + "grad_norm": 1133303.75, + "learning_rate": 9.988937830173376e-06, + "loss": 233573.725, + "step": 59530 + }, + { + "epoch": 0.12027456699943842, + "grad_norm": 235948.015625, + "learning_rate": 9.988914611113311e-06, + "loss": 248917.475, + "step": 59540 + }, + { + "epoch": 0.12029476763212224, + "grad_norm": 12209.6337890625, + "learning_rate": 9.988891367737882e-06, + "loss": 273848.925, + "step": 59550 + }, + { + "epoch": 0.12031496826480606, + "grad_norm": 88920.734375, + "learning_rate": 9.988868100047203e-06, + "loss": 200287.2875, + "step": 59560 + }, + { + "epoch": 0.12033516889748987, + "grad_norm": 628482.0, + "learning_rate": 9.988844808041382e-06, + "loss": 136716.8, + "step": 59570 + }, + { + "epoch": 0.12035536953017369, + "grad_norm": 45949.21484375, + "learning_rate": 9.98882149172054e-06, + "loss": 80831.825, + "step": 59580 + }, + { + "epoch": 0.1203755701628575, + "grad_norm": 185846.296875, + "learning_rate": 9.988798151084783e-06, + "loss": 63029.5437, + "step": 59590 + }, + { + "epoch": 0.12039577079554131, + "grad_norm": 18884.830078125, + "learning_rate": 9.988774786134235e-06, + "loss": 248891.7, + "step": 59600 + }, + { + "epoch": 0.12041597142822513, + "grad_norm": 29539.978515625, + "learning_rate": 9.988751396869e-06, + "loss": 45826.0656, + "step": 59610 + }, + { + "epoch": 0.12043617206090895, + "grad_norm": 17458.76953125, + "learning_rate": 9.988727983289195e-06, + "loss": 68879.0063, + "step": 59620 + }, + { + "epoch": 0.12045637269359276, + "grad_norm": 1514492.75, + "learning_rate": 9.988704545394936e-06, + "loss": 378473.1, + "step": 59630 + }, + { + "epoch": 0.12047657332627658, + "grad_norm": 398797.625, + "learning_rate": 9.988681083186336e-06, + "loss": 72665.175, + "step": 59640 + }, + { + "epoch": 0.1204967739589604, + "grad_norm": 9492.150390625, + "learning_rate": 9.988657596663509e-06, + "loss": 116118.05, + "step": 59650 + }, + { + "epoch": 0.1205169745916442, + "grad_norm": 25562.353515625, + "learning_rate": 9.988634085826571e-06, + "loss": 75399.0938, + "step": 59660 + }, + { + "epoch": 0.12053717522432802, + "grad_norm": 151852.890625, + "learning_rate": 9.988610550675635e-06, + "loss": 121061.775, + "step": 59670 + }, + { + "epoch": 0.12055737585701184, + "grad_norm": 859564.5, + "learning_rate": 9.988586991210816e-06, + "loss": 161042.95, + "step": 59680 + }, + { + "epoch": 0.12057757648969565, + "grad_norm": 43486.23046875, + "learning_rate": 9.98856340743223e-06, + "loss": 68747.3687, + "step": 59690 + }, + { + "epoch": 0.12059777712237947, + "grad_norm": 506150.46875, + "learning_rate": 9.988539799339989e-06, + "loss": 72702.2625, + "step": 59700 + }, + { + "epoch": 0.12061797775506329, + "grad_norm": 45213.13671875, + "learning_rate": 9.988516166934212e-06, + "loss": 85454.9812, + "step": 59710 + }, + { + "epoch": 0.12063817838774711, + "grad_norm": 26902.9765625, + "learning_rate": 9.988492510215011e-06, + "loss": 97780.4187, + "step": 59720 + }, + { + "epoch": 0.12065837902043092, + "grad_norm": 103414.0859375, + "learning_rate": 9.988468829182504e-06, + "loss": 70545.5688, + "step": 59730 + }, + { + "epoch": 0.12067857965311474, + "grad_norm": 313815.59375, + "learning_rate": 9.988445123836804e-06, + "loss": 129744.75, + "step": 59740 + }, + { + "epoch": 0.12069878028579856, + "grad_norm": 23386.15234375, + "learning_rate": 9.988421394178027e-06, + "loss": 137207.0625, + "step": 59750 + }, + { + "epoch": 0.12071898091848236, + "grad_norm": 112609.234375, + "learning_rate": 9.98839764020629e-06, + "loss": 117034.7375, + "step": 59760 + }, + { + "epoch": 0.12073918155116618, + "grad_norm": 82080.4375, + "learning_rate": 9.988373861921708e-06, + "loss": 74181.8125, + "step": 59770 + }, + { + "epoch": 0.12075938218385, + "grad_norm": 329675.8125, + "learning_rate": 9.988350059324396e-06, + "loss": 135848.25, + "step": 59780 + }, + { + "epoch": 0.12077958281653381, + "grad_norm": 538087.25, + "learning_rate": 9.988326232414472e-06, + "loss": 175268.15, + "step": 59790 + }, + { + "epoch": 0.12079978344921763, + "grad_norm": 11296.8154296875, + "learning_rate": 9.98830238119205e-06, + "loss": 73693.8687, + "step": 59800 + }, + { + "epoch": 0.12081998408190145, + "grad_norm": 43826.390625, + "learning_rate": 9.988278505657247e-06, + "loss": 412391.15, + "step": 59810 + }, + { + "epoch": 0.12084018471458526, + "grad_norm": 117905.5546875, + "learning_rate": 9.98825460581018e-06, + "loss": 154752.2, + "step": 59820 + }, + { + "epoch": 0.12086038534726908, + "grad_norm": 528746.0625, + "learning_rate": 9.988230681650964e-06, + "loss": 257996.5, + "step": 59830 + }, + { + "epoch": 0.1208805859799529, + "grad_norm": 16526.134765625, + "learning_rate": 9.988206733179718e-06, + "loss": 104499.5688, + "step": 59840 + }, + { + "epoch": 0.1209007866126367, + "grad_norm": 155447.28125, + "learning_rate": 9.988182760396557e-06, + "loss": 104425.9563, + "step": 59850 + }, + { + "epoch": 0.12092098724532052, + "grad_norm": 37201.69921875, + "learning_rate": 9.988158763301598e-06, + "loss": 263514.85, + "step": 59860 + }, + { + "epoch": 0.12094118787800434, + "grad_norm": 56168.95703125, + "learning_rate": 9.988134741894959e-06, + "loss": 101639.4875, + "step": 59870 + }, + { + "epoch": 0.12096138851068816, + "grad_norm": 133597.609375, + "learning_rate": 9.988110696176756e-06, + "loss": 226366.125, + "step": 59880 + }, + { + "epoch": 0.12098158914337197, + "grad_norm": 290392.8125, + "learning_rate": 9.988086626147107e-06, + "loss": 152116.025, + "step": 59890 + }, + { + "epoch": 0.12100178977605579, + "grad_norm": 116578.3828125, + "learning_rate": 9.988062531806127e-06, + "loss": 95417.7625, + "step": 59900 + }, + { + "epoch": 0.12102199040873961, + "grad_norm": 47867.66796875, + "learning_rate": 9.988038413153936e-06, + "loss": 176153.2125, + "step": 59910 + }, + { + "epoch": 0.12104219104142341, + "grad_norm": 2010279.5, + "learning_rate": 9.988014270190652e-06, + "loss": 421373.0, + "step": 59920 + }, + { + "epoch": 0.12106239167410723, + "grad_norm": 204011.5625, + "learning_rate": 9.98799010291639e-06, + "loss": 276790.025, + "step": 59930 + }, + { + "epoch": 0.12108259230679105, + "grad_norm": 11277.7392578125, + "learning_rate": 9.987965911331268e-06, + "loss": 146749.3875, + "step": 59940 + }, + { + "epoch": 0.12110279293947486, + "grad_norm": 271816.21875, + "learning_rate": 9.987941695435409e-06, + "loss": 104188.8062, + "step": 59950 + }, + { + "epoch": 0.12112299357215868, + "grad_norm": 415720.46875, + "learning_rate": 9.987917455228924e-06, + "loss": 105037.675, + "step": 59960 + }, + { + "epoch": 0.1211431942048425, + "grad_norm": 523087.96875, + "learning_rate": 9.987893190711935e-06, + "loss": 84110.2812, + "step": 59970 + }, + { + "epoch": 0.1211633948375263, + "grad_norm": 105699.515625, + "learning_rate": 9.987868901884558e-06, + "loss": 173995.6125, + "step": 59980 + }, + { + "epoch": 0.12118359547021013, + "grad_norm": 7913.6162109375, + "learning_rate": 9.987844588746916e-06, + "loss": 93065.025, + "step": 59990 + }, + { + "epoch": 0.12120379610289395, + "grad_norm": 2517.028564453125, + "learning_rate": 9.987820251299121e-06, + "loss": 287350.475, + "step": 60000 + }, + { + "epoch": 0.12122399673557775, + "grad_norm": 334697.90625, + "learning_rate": 9.987795889541298e-06, + "loss": 60152.6438, + "step": 60010 + }, + { + "epoch": 0.12124419736826157, + "grad_norm": 32524.662109375, + "learning_rate": 9.987771503473562e-06, + "loss": 152678.8875, + "step": 60020 + }, + { + "epoch": 0.12126439800094539, + "grad_norm": 123737.46875, + "learning_rate": 9.987747093096032e-06, + "loss": 229885.775, + "step": 60030 + }, + { + "epoch": 0.12128459863362921, + "grad_norm": 283325.6875, + "learning_rate": 9.987722658408828e-06, + "loss": 117040.3125, + "step": 60040 + }, + { + "epoch": 0.12130479926631302, + "grad_norm": 28307.779296875, + "learning_rate": 9.98769819941207e-06, + "loss": 54645.4563, + "step": 60050 + }, + { + "epoch": 0.12132499989899684, + "grad_norm": 358755.59375, + "learning_rate": 9.987673716105874e-06, + "loss": 206183.8375, + "step": 60060 + }, + { + "epoch": 0.12134520053168066, + "grad_norm": 169913.890625, + "learning_rate": 9.987649208490361e-06, + "loss": 127964.8125, + "step": 60070 + }, + { + "epoch": 0.12136540116436446, + "grad_norm": 329730.8125, + "learning_rate": 9.987624676565652e-06, + "loss": 112462.0375, + "step": 60080 + }, + { + "epoch": 0.12138560179704828, + "grad_norm": 87643.2421875, + "learning_rate": 9.987600120331864e-06, + "loss": 161367.75, + "step": 60090 + }, + { + "epoch": 0.1214058024297321, + "grad_norm": 279674.3125, + "learning_rate": 9.987575539789119e-06, + "loss": 162409.875, + "step": 60100 + }, + { + "epoch": 0.12142600306241591, + "grad_norm": 32370.509765625, + "learning_rate": 9.987550934937536e-06, + "loss": 72279.1125, + "step": 60110 + }, + { + "epoch": 0.12144620369509973, + "grad_norm": 64264.2421875, + "learning_rate": 9.987526305777234e-06, + "loss": 302723.0, + "step": 60120 + }, + { + "epoch": 0.12146640432778355, + "grad_norm": 180534.515625, + "learning_rate": 9.987501652308333e-06, + "loss": 150540.65, + "step": 60130 + }, + { + "epoch": 0.12148660496046736, + "grad_norm": 5661.2236328125, + "learning_rate": 9.987476974530957e-06, + "loss": 243602.325, + "step": 60140 + }, + { + "epoch": 0.12150680559315118, + "grad_norm": 49932.4609375, + "learning_rate": 9.98745227244522e-06, + "loss": 232519.375, + "step": 60150 + }, + { + "epoch": 0.121527006225835, + "grad_norm": 1640.406494140625, + "learning_rate": 9.987427546051246e-06, + "loss": 117960.875, + "step": 60160 + }, + { + "epoch": 0.1215472068585188, + "grad_norm": 50775.8671875, + "learning_rate": 9.987402795349154e-06, + "loss": 127472.6625, + "step": 60170 + }, + { + "epoch": 0.12156740749120262, + "grad_norm": 0.0, + "learning_rate": 9.987378020339069e-06, + "loss": 101516.4563, + "step": 60180 + }, + { + "epoch": 0.12158760812388644, + "grad_norm": 328428.375, + "learning_rate": 9.987353221021106e-06, + "loss": 105261.8625, + "step": 60190 + }, + { + "epoch": 0.12160780875657026, + "grad_norm": 176676.953125, + "learning_rate": 9.987328397395389e-06, + "loss": 84284.9187, + "step": 60200 + }, + { + "epoch": 0.12162800938925407, + "grad_norm": 157327.25, + "learning_rate": 9.987303549462038e-06, + "loss": 149578.7125, + "step": 60210 + }, + { + "epoch": 0.12164821002193789, + "grad_norm": 85055.90625, + "learning_rate": 9.987278677221174e-06, + "loss": 174623.925, + "step": 60220 + }, + { + "epoch": 0.12166841065462171, + "grad_norm": 10055.8486328125, + "learning_rate": 9.987253780672918e-06, + "loss": 207867.1, + "step": 60230 + }, + { + "epoch": 0.12168861128730551, + "grad_norm": 252216.765625, + "learning_rate": 9.987228859817395e-06, + "loss": 182024.7, + "step": 60240 + }, + { + "epoch": 0.12170881191998933, + "grad_norm": 217552.453125, + "learning_rate": 9.987203914654721e-06, + "loss": 98427.6625, + "step": 60250 + }, + { + "epoch": 0.12172901255267315, + "grad_norm": 0.0, + "learning_rate": 9.987178945185019e-06, + "loss": 56969.1687, + "step": 60260 + }, + { + "epoch": 0.12174921318535696, + "grad_norm": 34496.39453125, + "learning_rate": 9.987153951408414e-06, + "loss": 93235.6375, + "step": 60270 + }, + { + "epoch": 0.12176941381804078, + "grad_norm": 19353.966796875, + "learning_rate": 9.987128933325025e-06, + "loss": 62814.3938, + "step": 60280 + }, + { + "epoch": 0.1217896144507246, + "grad_norm": 12764.4697265625, + "learning_rate": 9.987103890934974e-06, + "loss": 150204.1375, + "step": 60290 + }, + { + "epoch": 0.12180981508340841, + "grad_norm": 940335.6875, + "learning_rate": 9.987078824238384e-06, + "loss": 191526.275, + "step": 60300 + }, + { + "epoch": 0.12183001571609223, + "grad_norm": 2168158.75, + "learning_rate": 9.987053733235376e-06, + "loss": 311506.925, + "step": 60310 + }, + { + "epoch": 0.12185021634877605, + "grad_norm": 103182.828125, + "learning_rate": 9.987028617926074e-06, + "loss": 125768.675, + "step": 60320 + }, + { + "epoch": 0.12187041698145985, + "grad_norm": 13478.779296875, + "learning_rate": 9.987003478310597e-06, + "loss": 138144.55, + "step": 60330 + }, + { + "epoch": 0.12189061761414367, + "grad_norm": 29574.58984375, + "learning_rate": 9.986978314389071e-06, + "loss": 138382.95, + "step": 60340 + }, + { + "epoch": 0.1219108182468275, + "grad_norm": 117737.7265625, + "learning_rate": 9.98695312616162e-06, + "loss": 190324.45, + "step": 60350 + }, + { + "epoch": 0.12193101887951131, + "grad_norm": 68698.5, + "learning_rate": 9.986927913628361e-06, + "loss": 176471.45, + "step": 60360 + }, + { + "epoch": 0.12195121951219512, + "grad_norm": 339298.1875, + "learning_rate": 9.986902676789421e-06, + "loss": 125433.175, + "step": 60370 + }, + { + "epoch": 0.12197142014487894, + "grad_norm": 27793.46484375, + "learning_rate": 9.986877415644925e-06, + "loss": 123528.075, + "step": 60380 + }, + { + "epoch": 0.12199162077756276, + "grad_norm": 900339.25, + "learning_rate": 9.98685213019499e-06, + "loss": 194524.6875, + "step": 60390 + }, + { + "epoch": 0.12201182141024657, + "grad_norm": 67013.2734375, + "learning_rate": 9.986826820439743e-06, + "loss": 136284.675, + "step": 60400 + }, + { + "epoch": 0.12203202204293039, + "grad_norm": 130462.0234375, + "learning_rate": 9.986801486379307e-06, + "loss": 108541.1875, + "step": 60410 + }, + { + "epoch": 0.1220522226756142, + "grad_norm": 267550.84375, + "learning_rate": 9.986776128013807e-06, + "loss": 64319.2312, + "step": 60420 + }, + { + "epoch": 0.12207242330829801, + "grad_norm": 65608.453125, + "learning_rate": 9.986750745343363e-06, + "loss": 127187.775, + "step": 60430 + }, + { + "epoch": 0.12209262394098183, + "grad_norm": 41821.80859375, + "learning_rate": 9.986725338368103e-06, + "loss": 98247.6187, + "step": 60440 + }, + { + "epoch": 0.12211282457366565, + "grad_norm": 31073.6953125, + "learning_rate": 9.986699907088147e-06, + "loss": 62704.8875, + "step": 60450 + }, + { + "epoch": 0.12213302520634946, + "grad_norm": 1167471.625, + "learning_rate": 9.986674451503619e-06, + "loss": 233836.925, + "step": 60460 + }, + { + "epoch": 0.12215322583903328, + "grad_norm": 13891.8916015625, + "learning_rate": 9.986648971614646e-06, + "loss": 223888.95, + "step": 60470 + }, + { + "epoch": 0.1221734264717171, + "grad_norm": 335917.8125, + "learning_rate": 9.98662346742135e-06, + "loss": 74126.9125, + "step": 60480 + }, + { + "epoch": 0.1221936271044009, + "grad_norm": 1229718.5, + "learning_rate": 9.986597938923859e-06, + "loss": 148087.7, + "step": 60490 + }, + { + "epoch": 0.12221382773708472, + "grad_norm": 26394.06640625, + "learning_rate": 9.98657238612229e-06, + "loss": 45131.0625, + "step": 60500 + }, + { + "epoch": 0.12223402836976854, + "grad_norm": 27124.708984375, + "learning_rate": 9.986546809016775e-06, + "loss": 80112.85, + "step": 60510 + }, + { + "epoch": 0.12225422900245236, + "grad_norm": 25561.52734375, + "learning_rate": 9.986521207607436e-06, + "loss": 270940.2, + "step": 60520 + }, + { + "epoch": 0.12227442963513617, + "grad_norm": 185297.375, + "learning_rate": 9.986495581894396e-06, + "loss": 100100.8875, + "step": 60530 + }, + { + "epoch": 0.12229463026781999, + "grad_norm": 94864.3984375, + "learning_rate": 9.986469931877781e-06, + "loss": 55150.6062, + "step": 60540 + }, + { + "epoch": 0.12231483090050381, + "grad_norm": 167408.890625, + "learning_rate": 9.986444257557717e-06, + "loss": 65097.975, + "step": 60550 + }, + { + "epoch": 0.12233503153318762, + "grad_norm": 198773.03125, + "learning_rate": 9.986418558934329e-06, + "loss": 83350.7375, + "step": 60560 + }, + { + "epoch": 0.12235523216587144, + "grad_norm": 21815.453125, + "learning_rate": 9.98639283600774e-06, + "loss": 159528.9625, + "step": 60570 + }, + { + "epoch": 0.12237543279855526, + "grad_norm": 5696.5673828125, + "learning_rate": 9.98636708877808e-06, + "loss": 26768.2562, + "step": 60580 + }, + { + "epoch": 0.12239563343123906, + "grad_norm": 32181.89453125, + "learning_rate": 9.986341317245469e-06, + "loss": 63230.025, + "step": 60590 + }, + { + "epoch": 0.12241583406392288, + "grad_norm": 111329.3046875, + "learning_rate": 9.986315521410035e-06, + "loss": 212210.0, + "step": 60600 + }, + { + "epoch": 0.1224360346966067, + "grad_norm": 1760690.5, + "learning_rate": 9.986289701271905e-06, + "loss": 253846.2, + "step": 60610 + }, + { + "epoch": 0.12245623532929051, + "grad_norm": 0.0, + "learning_rate": 9.986263856831204e-06, + "loss": 199143.85, + "step": 60620 + }, + { + "epoch": 0.12247643596197433, + "grad_norm": 260431.921875, + "learning_rate": 9.986237988088059e-06, + "loss": 100695.2375, + "step": 60630 + }, + { + "epoch": 0.12249663659465815, + "grad_norm": 38626.82421875, + "learning_rate": 9.986212095042593e-06, + "loss": 179759.775, + "step": 60640 + }, + { + "epoch": 0.12251683722734195, + "grad_norm": 926496.0, + "learning_rate": 9.986186177694935e-06, + "loss": 166239.4375, + "step": 60650 + }, + { + "epoch": 0.12253703786002577, + "grad_norm": 164546.765625, + "learning_rate": 9.986160236045207e-06, + "loss": 84298.1062, + "step": 60660 + }, + { + "epoch": 0.1225572384927096, + "grad_norm": 42663.6875, + "learning_rate": 9.986134270093542e-06, + "loss": 198541.275, + "step": 60670 + }, + { + "epoch": 0.12257743912539341, + "grad_norm": 165668.703125, + "learning_rate": 9.986108279840063e-06, + "loss": 153338.9375, + "step": 60680 + }, + { + "epoch": 0.12259763975807722, + "grad_norm": 36377.47265625, + "learning_rate": 9.986082265284896e-06, + "loss": 101431.3813, + "step": 60690 + }, + { + "epoch": 0.12261784039076104, + "grad_norm": 771932.4375, + "learning_rate": 9.98605622642817e-06, + "loss": 98226.9812, + "step": 60700 + }, + { + "epoch": 0.12263804102344486, + "grad_norm": 73792.8046875, + "learning_rate": 9.986030163270011e-06, + "loss": 136187.175, + "step": 60710 + }, + { + "epoch": 0.12265824165612867, + "grad_norm": 24358.943359375, + "learning_rate": 9.986004075810543e-06, + "loss": 176005.1125, + "step": 60720 + }, + { + "epoch": 0.12267844228881249, + "grad_norm": 1469159.875, + "learning_rate": 9.985977964049898e-06, + "loss": 219581.525, + "step": 60730 + }, + { + "epoch": 0.1226986429214963, + "grad_norm": 114610.984375, + "learning_rate": 9.9859518279882e-06, + "loss": 149850.225, + "step": 60740 + }, + { + "epoch": 0.12271884355418011, + "grad_norm": 115741.0859375, + "learning_rate": 9.985925667625581e-06, + "loss": 60256.4, + "step": 60750 + }, + { + "epoch": 0.12273904418686393, + "grad_norm": 18029.21875, + "learning_rate": 9.98589948296216e-06, + "loss": 85926.8062, + "step": 60760 + }, + { + "epoch": 0.12275924481954775, + "grad_norm": 479125.71875, + "learning_rate": 9.985873273998072e-06, + "loss": 134770.575, + "step": 60770 + }, + { + "epoch": 0.12277944545223156, + "grad_norm": 6786.90478515625, + "learning_rate": 9.985847040733442e-06, + "loss": 65358.725, + "step": 60780 + }, + { + "epoch": 0.12279964608491538, + "grad_norm": 17970.666015625, + "learning_rate": 9.9858207831684e-06, + "loss": 116659.6125, + "step": 60790 + }, + { + "epoch": 0.1228198467175992, + "grad_norm": 23401.34375, + "learning_rate": 9.98579450130307e-06, + "loss": 106933.175, + "step": 60800 + }, + { + "epoch": 0.122840047350283, + "grad_norm": 14737.138671875, + "learning_rate": 9.985768195137585e-06, + "loss": 196406.8, + "step": 60810 + }, + { + "epoch": 0.12286024798296682, + "grad_norm": 9437.025390625, + "learning_rate": 9.985741864672067e-06, + "loss": 146782.9875, + "step": 60820 + }, + { + "epoch": 0.12288044861565064, + "grad_norm": 10349.8056640625, + "learning_rate": 9.985715509906649e-06, + "loss": 113996.75, + "step": 60830 + }, + { + "epoch": 0.12290064924833445, + "grad_norm": 348707.3125, + "learning_rate": 9.985689130841459e-06, + "loss": 70798.6, + "step": 60840 + }, + { + "epoch": 0.12292084988101827, + "grad_norm": 168003.546875, + "learning_rate": 9.985662727476625e-06, + "loss": 209350.075, + "step": 60850 + }, + { + "epoch": 0.12294105051370209, + "grad_norm": 246615.890625, + "learning_rate": 9.985636299812275e-06, + "loss": 85456.1438, + "step": 60860 + }, + { + "epoch": 0.12296125114638591, + "grad_norm": 29748.19921875, + "learning_rate": 9.98560984784854e-06, + "loss": 128046.4375, + "step": 60870 + }, + { + "epoch": 0.12298145177906972, + "grad_norm": 721396.6875, + "learning_rate": 9.985583371585544e-06, + "loss": 245438.35, + "step": 60880 + }, + { + "epoch": 0.12300165241175354, + "grad_norm": 4946.51953125, + "learning_rate": 9.98555687102342e-06, + "loss": 45278.4625, + "step": 60890 + }, + { + "epoch": 0.12302185304443736, + "grad_norm": 58137.96484375, + "learning_rate": 9.9855303461623e-06, + "loss": 43657.4219, + "step": 60900 + }, + { + "epoch": 0.12304205367712116, + "grad_norm": 180098.6875, + "learning_rate": 9.985503797002307e-06, + "loss": 149462.7875, + "step": 60910 + }, + { + "epoch": 0.12306225430980498, + "grad_norm": 38683.62890625, + "learning_rate": 9.985477223543574e-06, + "loss": 147935.725, + "step": 60920 + }, + { + "epoch": 0.1230824549424888, + "grad_norm": 124384.953125, + "learning_rate": 9.985450625786228e-06, + "loss": 285716.45, + "step": 60930 + }, + { + "epoch": 0.12310265557517261, + "grad_norm": 421691.96875, + "learning_rate": 9.985424003730403e-06, + "loss": 99134.1, + "step": 60940 + }, + { + "epoch": 0.12312285620785643, + "grad_norm": 14278.15234375, + "learning_rate": 9.985397357376224e-06, + "loss": 178962.85, + "step": 60950 + }, + { + "epoch": 0.12314305684054025, + "grad_norm": 383224.15625, + "learning_rate": 9.985370686723823e-06, + "loss": 327983.4, + "step": 60960 + }, + { + "epoch": 0.12316325747322406, + "grad_norm": 31052.046875, + "learning_rate": 9.985343991773331e-06, + "loss": 298739.225, + "step": 60970 + }, + { + "epoch": 0.12318345810590788, + "grad_norm": 34589.5078125, + "learning_rate": 9.985317272524876e-06, + "loss": 129955.0, + "step": 60980 + }, + { + "epoch": 0.1232036587385917, + "grad_norm": 45163.4921875, + "learning_rate": 9.98529052897859e-06, + "loss": 237800.15, + "step": 60990 + }, + { + "epoch": 0.1232238593712755, + "grad_norm": 27338.458984375, + "learning_rate": 9.985263761134602e-06, + "loss": 92900.95, + "step": 61000 + }, + { + "epoch": 0.12324406000395932, + "grad_norm": 353384.4375, + "learning_rate": 9.985236968993044e-06, + "loss": 99894.5375, + "step": 61010 + }, + { + "epoch": 0.12326426063664314, + "grad_norm": 1156292.875, + "learning_rate": 9.985210152554045e-06, + "loss": 188270.5875, + "step": 61020 + }, + { + "epoch": 0.12328446126932696, + "grad_norm": 82810.84375, + "learning_rate": 9.985183311817736e-06, + "loss": 171284.8125, + "step": 61030 + }, + { + "epoch": 0.12330466190201077, + "grad_norm": 186051.796875, + "learning_rate": 9.985156446784249e-06, + "loss": 199417.45, + "step": 61040 + }, + { + "epoch": 0.12332486253469459, + "grad_norm": 27716.666015625, + "learning_rate": 9.985129557453714e-06, + "loss": 49314.3688, + "step": 61050 + }, + { + "epoch": 0.12334506316737841, + "grad_norm": 410980.71875, + "learning_rate": 9.985102643826261e-06, + "loss": 105391.575, + "step": 61060 + }, + { + "epoch": 0.12336526380006221, + "grad_norm": 4076.0615234375, + "learning_rate": 9.985075705902024e-06, + "loss": 131618.925, + "step": 61070 + }, + { + "epoch": 0.12338546443274603, + "grad_norm": 332996.34375, + "learning_rate": 9.985048743681131e-06, + "loss": 108343.3625, + "step": 61080 + }, + { + "epoch": 0.12340566506542985, + "grad_norm": 138939.609375, + "learning_rate": 9.985021757163715e-06, + "loss": 216444.325, + "step": 61090 + }, + { + "epoch": 0.12342586569811366, + "grad_norm": 344862.0, + "learning_rate": 9.98499474634991e-06, + "loss": 171190.8375, + "step": 61100 + }, + { + "epoch": 0.12344606633079748, + "grad_norm": 1499.0914306640625, + "learning_rate": 9.984967711239844e-06, + "loss": 53815.7375, + "step": 61110 + }, + { + "epoch": 0.1234662669634813, + "grad_norm": 131145.609375, + "learning_rate": 9.984940651833648e-06, + "loss": 104383.9062, + "step": 61120 + }, + { + "epoch": 0.1234864675961651, + "grad_norm": 31280.6953125, + "learning_rate": 9.984913568131458e-06, + "loss": 255832.125, + "step": 61130 + }, + { + "epoch": 0.12350666822884893, + "grad_norm": 246239.03125, + "learning_rate": 9.984886460133403e-06, + "loss": 76052.4875, + "step": 61140 + }, + { + "epoch": 0.12352686886153275, + "grad_norm": 259076.234375, + "learning_rate": 9.984859327839617e-06, + "loss": 154720.35, + "step": 61150 + }, + { + "epoch": 0.12354706949421655, + "grad_norm": 17632.880859375, + "learning_rate": 9.98483217125023e-06, + "loss": 180595.5375, + "step": 61160 + }, + { + "epoch": 0.12356727012690037, + "grad_norm": 1475.3829345703125, + "learning_rate": 9.984804990365376e-06, + "loss": 168505.5875, + "step": 61170 + }, + { + "epoch": 0.12358747075958419, + "grad_norm": 572052.5, + "learning_rate": 9.984777785185188e-06, + "loss": 123806.6625, + "step": 61180 + }, + { + "epoch": 0.12360767139226801, + "grad_norm": 6288.099609375, + "learning_rate": 9.984750555709797e-06, + "loss": 129492.5125, + "step": 61190 + }, + { + "epoch": 0.12362787202495182, + "grad_norm": 170205.765625, + "learning_rate": 9.984723301939337e-06, + "loss": 72459.9438, + "step": 61200 + }, + { + "epoch": 0.12364807265763564, + "grad_norm": 143766.265625, + "learning_rate": 9.984696023873939e-06, + "loss": 80663.0562, + "step": 61210 + }, + { + "epoch": 0.12366827329031946, + "grad_norm": 120720.1015625, + "learning_rate": 9.984668721513737e-06, + "loss": 144546.7375, + "step": 61220 + }, + { + "epoch": 0.12368847392300326, + "grad_norm": 25864.986328125, + "learning_rate": 9.984641394858865e-06, + "loss": 53100.975, + "step": 61230 + }, + { + "epoch": 0.12370867455568708, + "grad_norm": 448113.3125, + "learning_rate": 9.984614043909455e-06, + "loss": 77157.525, + "step": 61240 + }, + { + "epoch": 0.1237288751883709, + "grad_norm": 257115.5625, + "learning_rate": 9.984586668665641e-06, + "loss": 171767.55, + "step": 61250 + }, + { + "epoch": 0.12374907582105471, + "grad_norm": 133769.84375, + "learning_rate": 9.984559269127557e-06, + "loss": 158996.0375, + "step": 61260 + }, + { + "epoch": 0.12376927645373853, + "grad_norm": 8881.7177734375, + "learning_rate": 9.984531845295333e-06, + "loss": 425444.5, + "step": 61270 + }, + { + "epoch": 0.12378947708642235, + "grad_norm": 103559.765625, + "learning_rate": 9.984504397169107e-06, + "loss": 249466.85, + "step": 61280 + }, + { + "epoch": 0.12380967771910616, + "grad_norm": 16938.720703125, + "learning_rate": 9.984476924749011e-06, + "loss": 246649.2, + "step": 61290 + }, + { + "epoch": 0.12382987835178998, + "grad_norm": 463755.21875, + "learning_rate": 9.98444942803518e-06, + "loss": 163437.3375, + "step": 61300 + }, + { + "epoch": 0.1238500789844738, + "grad_norm": 158278.453125, + "learning_rate": 9.984421907027747e-06, + "loss": 58081.3063, + "step": 61310 + }, + { + "epoch": 0.1238702796171576, + "grad_norm": 278403.6875, + "learning_rate": 9.984394361726844e-06, + "loss": 220427.75, + "step": 61320 + }, + { + "epoch": 0.12389048024984142, + "grad_norm": 9390.1201171875, + "learning_rate": 9.98436679213261e-06, + "loss": 80632.775, + "step": 61330 + }, + { + "epoch": 0.12391068088252524, + "grad_norm": 5160.18212890625, + "learning_rate": 9.984339198245175e-06, + "loss": 125438.65, + "step": 61340 + }, + { + "epoch": 0.12393088151520906, + "grad_norm": 339017.71875, + "learning_rate": 9.984311580064676e-06, + "loss": 57744.9563, + "step": 61350 + }, + { + "epoch": 0.12395108214789287, + "grad_norm": 4718.38720703125, + "learning_rate": 9.984283937591246e-06, + "loss": 33922.7625, + "step": 61360 + }, + { + "epoch": 0.12397128278057669, + "grad_norm": 17040.970703125, + "learning_rate": 9.98425627082502e-06, + "loss": 167189.45, + "step": 61370 + }, + { + "epoch": 0.12399148341326051, + "grad_norm": 190297.390625, + "learning_rate": 9.984228579766136e-06, + "loss": 124514.05, + "step": 61380 + }, + { + "epoch": 0.12401168404594431, + "grad_norm": 271302.09375, + "learning_rate": 9.984200864414726e-06, + "loss": 93354.2875, + "step": 61390 + }, + { + "epoch": 0.12403188467862813, + "grad_norm": 105061.8203125, + "learning_rate": 9.984173124770924e-06, + "loss": 104436.15, + "step": 61400 + }, + { + "epoch": 0.12405208531131195, + "grad_norm": 29094.822265625, + "learning_rate": 9.984145360834868e-06, + "loss": 47025.1594, + "step": 61410 + }, + { + "epoch": 0.12407228594399576, + "grad_norm": 207534.515625, + "learning_rate": 9.984117572606691e-06, + "loss": 89448.85, + "step": 61420 + }, + { + "epoch": 0.12409248657667958, + "grad_norm": 44923.01171875, + "learning_rate": 9.984089760086531e-06, + "loss": 129821.8, + "step": 61430 + }, + { + "epoch": 0.1241126872093634, + "grad_norm": 188716.265625, + "learning_rate": 9.98406192327452e-06, + "loss": 109569.2875, + "step": 61440 + }, + { + "epoch": 0.12413288784204721, + "grad_norm": 478803.03125, + "learning_rate": 9.984034062170796e-06, + "loss": 147873.9, + "step": 61450 + }, + { + "epoch": 0.12415308847473103, + "grad_norm": 18596.19140625, + "learning_rate": 9.984006176775496e-06, + "loss": 54725.9062, + "step": 61460 + }, + { + "epoch": 0.12417328910741485, + "grad_norm": 139089.953125, + "learning_rate": 9.983978267088753e-06, + "loss": 181719.5875, + "step": 61470 + }, + { + "epoch": 0.12419348974009865, + "grad_norm": 139381.953125, + "learning_rate": 9.983950333110705e-06, + "loss": 170360.925, + "step": 61480 + }, + { + "epoch": 0.12421369037278247, + "grad_norm": 201774.1875, + "learning_rate": 9.983922374841488e-06, + "loss": 80682.4125, + "step": 61490 + }, + { + "epoch": 0.1242338910054663, + "grad_norm": 144463.828125, + "learning_rate": 9.983894392281237e-06, + "loss": 53556.6062, + "step": 61500 + }, + { + "epoch": 0.12425409163815011, + "grad_norm": 109136.4765625, + "learning_rate": 9.98386638543009e-06, + "loss": 109521.6625, + "step": 61510 + }, + { + "epoch": 0.12427429227083392, + "grad_norm": 96559.7890625, + "learning_rate": 9.983838354288181e-06, + "loss": 53482.1312, + "step": 61520 + }, + { + "epoch": 0.12429449290351774, + "grad_norm": 282916.90625, + "learning_rate": 9.98381029885565e-06, + "loss": 81195.7625, + "step": 61530 + }, + { + "epoch": 0.12431469353620156, + "grad_norm": 191069.59375, + "learning_rate": 9.983782219132631e-06, + "loss": 208580.2375, + "step": 61540 + }, + { + "epoch": 0.12433489416888537, + "grad_norm": 251912.40625, + "learning_rate": 9.983754115119262e-06, + "loss": 102285.45, + "step": 61550 + }, + { + "epoch": 0.12435509480156919, + "grad_norm": 60749.43359375, + "learning_rate": 9.983725986815682e-06, + "loss": 170718.3, + "step": 61560 + }, + { + "epoch": 0.124375295434253, + "grad_norm": 148134.984375, + "learning_rate": 9.983697834222024e-06, + "loss": 214910.025, + "step": 61570 + }, + { + "epoch": 0.12439549606693681, + "grad_norm": 1155896.75, + "learning_rate": 9.983669657338425e-06, + "loss": 189892.2375, + "step": 61580 + }, + { + "epoch": 0.12441569669962063, + "grad_norm": 752661.75, + "learning_rate": 9.98364145616503e-06, + "loss": 295758.65, + "step": 61590 + }, + { + "epoch": 0.12443589733230445, + "grad_norm": 1429185.75, + "learning_rate": 9.983613230701967e-06, + "loss": 254744.25, + "step": 61600 + }, + { + "epoch": 0.12445609796498826, + "grad_norm": 396382.4375, + "learning_rate": 9.98358498094938e-06, + "loss": 76087.5688, + "step": 61610 + }, + { + "epoch": 0.12447629859767208, + "grad_norm": 594833.0, + "learning_rate": 9.983556706907401e-06, + "loss": 160759.7625, + "step": 61620 + }, + { + "epoch": 0.1244964992303559, + "grad_norm": 75494.1796875, + "learning_rate": 9.983528408576173e-06, + "loss": 52374.9219, + "step": 61630 + }, + { + "epoch": 0.1245166998630397, + "grad_norm": 431076.09375, + "learning_rate": 9.983500085955833e-06, + "loss": 185041.6875, + "step": 61640 + }, + { + "epoch": 0.12453690049572352, + "grad_norm": 207402.421875, + "learning_rate": 9.983471739046515e-06, + "loss": 153318.7375, + "step": 61650 + }, + { + "epoch": 0.12455710112840734, + "grad_norm": 67654.078125, + "learning_rate": 9.983443367848363e-06, + "loss": 125169.2375, + "step": 61660 + }, + { + "epoch": 0.12457730176109116, + "grad_norm": 750099.9375, + "learning_rate": 9.98341497236151e-06, + "loss": 214142.575, + "step": 61670 + }, + { + "epoch": 0.12459750239377497, + "grad_norm": 4426.505859375, + "learning_rate": 9.9833865525861e-06, + "loss": 126856.775, + "step": 61680 + }, + { + "epoch": 0.12461770302645879, + "grad_norm": 441101.53125, + "learning_rate": 9.983358108522266e-06, + "loss": 106582.5, + "step": 61690 + }, + { + "epoch": 0.12463790365914261, + "grad_norm": 118105.4765625, + "learning_rate": 9.98332964017015e-06, + "loss": 78929.375, + "step": 61700 + }, + { + "epoch": 0.12465810429182642, + "grad_norm": 244543.0, + "learning_rate": 9.98330114752989e-06, + "loss": 239005.675, + "step": 61710 + }, + { + "epoch": 0.12467830492451024, + "grad_norm": 68713.640625, + "learning_rate": 9.983272630601624e-06, + "loss": 76233.1125, + "step": 61720 + }, + { + "epoch": 0.12469850555719406, + "grad_norm": 701872.125, + "learning_rate": 9.983244089385491e-06, + "loss": 236991.6, + "step": 61730 + }, + { + "epoch": 0.12471870618987786, + "grad_norm": 84742.375, + "learning_rate": 9.98321552388163e-06, + "loss": 268600.2, + "step": 61740 + }, + { + "epoch": 0.12473890682256168, + "grad_norm": 254743.4375, + "learning_rate": 9.983186934090183e-06, + "loss": 96526.375, + "step": 61750 + }, + { + "epoch": 0.1247591074552455, + "grad_norm": 127238.3828125, + "learning_rate": 9.983158320011288e-06, + "loss": 119451.6875, + "step": 61760 + }, + { + "epoch": 0.12477930808792931, + "grad_norm": 586799.5, + "learning_rate": 9.983129681645082e-06, + "loss": 283835.075, + "step": 61770 + }, + { + "epoch": 0.12479950872061313, + "grad_norm": 370640.40625, + "learning_rate": 9.983101018991706e-06, + "loss": 138204.3, + "step": 61780 + }, + { + "epoch": 0.12481970935329695, + "grad_norm": 40055.00390625, + "learning_rate": 9.9830723320513e-06, + "loss": 69186.8562, + "step": 61790 + }, + { + "epoch": 0.12483990998598075, + "grad_norm": 453793.40625, + "learning_rate": 9.983043620824005e-06, + "loss": 62643.3625, + "step": 61800 + }, + { + "epoch": 0.12486011061866457, + "grad_norm": 529608.5625, + "learning_rate": 9.983014885309959e-06, + "loss": 203550.375, + "step": 61810 + }, + { + "epoch": 0.1248803112513484, + "grad_norm": 48235.91796875, + "learning_rate": 9.982986125509303e-06, + "loss": 126448.6125, + "step": 61820 + }, + { + "epoch": 0.12490051188403221, + "grad_norm": 112318.90625, + "learning_rate": 9.982957341422177e-06, + "loss": 113007.7375, + "step": 61830 + }, + { + "epoch": 0.12492071251671602, + "grad_norm": 22106.0859375, + "learning_rate": 9.982928533048722e-06, + "loss": 141722.125, + "step": 61840 + }, + { + "epoch": 0.12494091314939984, + "grad_norm": 108803.828125, + "learning_rate": 9.982899700389077e-06, + "loss": 90069.1938, + "step": 61850 + }, + { + "epoch": 0.12496111378208366, + "grad_norm": 411050.1875, + "learning_rate": 9.982870843443381e-06, + "loss": 54751.6, + "step": 61860 + }, + { + "epoch": 0.12498131441476747, + "grad_norm": 839205.875, + "learning_rate": 9.98284196221178e-06, + "loss": 305830.65, + "step": 61870 + }, + { + "epoch": 0.12500151504745127, + "grad_norm": 89046.8828125, + "learning_rate": 9.982813056694411e-06, + "loss": 93789.125, + "step": 61880 + }, + { + "epoch": 0.1250217156801351, + "grad_norm": 38260.6875, + "learning_rate": 9.982784126891416e-06, + "loss": 103649.9625, + "step": 61890 + }, + { + "epoch": 0.1250419163128189, + "grad_norm": 14863.5966796875, + "learning_rate": 9.982755172802933e-06, + "loss": 49532.5125, + "step": 61900 + }, + { + "epoch": 0.12506211694550273, + "grad_norm": 204608.375, + "learning_rate": 9.98272619442911e-06, + "loss": 139379.675, + "step": 61910 + }, + { + "epoch": 0.12508231757818655, + "grad_norm": 328020.0, + "learning_rate": 9.982697191770079e-06, + "loss": 138712.875, + "step": 61920 + }, + { + "epoch": 0.12510251821087037, + "grad_norm": 12383.4482421875, + "learning_rate": 9.982668164825989e-06, + "loss": 168494.3625, + "step": 61930 + }, + { + "epoch": 0.1251227188435542, + "grad_norm": 82684.859375, + "learning_rate": 9.982639113596978e-06, + "loss": 75769.2875, + "step": 61940 + }, + { + "epoch": 0.12514291947623798, + "grad_norm": 135536.890625, + "learning_rate": 9.982610038083188e-06, + "loss": 135776.525, + "step": 61950 + }, + { + "epoch": 0.1251631201089218, + "grad_norm": 298063.34375, + "learning_rate": 9.98258093828476e-06, + "loss": 116227.775, + "step": 61960 + }, + { + "epoch": 0.12518332074160562, + "grad_norm": 150567.359375, + "learning_rate": 9.98255181420184e-06, + "loss": 80265.4125, + "step": 61970 + }, + { + "epoch": 0.12520352137428944, + "grad_norm": 32387.16796875, + "learning_rate": 9.982522665834565e-06, + "loss": 141944.3, + "step": 61980 + }, + { + "epoch": 0.12522372200697326, + "grad_norm": 120239.6171875, + "learning_rate": 9.982493493183079e-06, + "loss": 122826.25, + "step": 61990 + }, + { + "epoch": 0.12524392263965708, + "grad_norm": 113002.828125, + "learning_rate": 9.982464296247523e-06, + "loss": 117861.9125, + "step": 62000 + }, + { + "epoch": 0.12526412327234088, + "grad_norm": 42255.75390625, + "learning_rate": 9.98243507502804e-06, + "loss": 82047.3813, + "step": 62010 + }, + { + "epoch": 0.1252843239050247, + "grad_norm": 28840.818359375, + "learning_rate": 9.982405829524774e-06, + "loss": 52633.9187, + "step": 62020 + }, + { + "epoch": 0.12530452453770852, + "grad_norm": 917070.4375, + "learning_rate": 9.982376559737866e-06, + "loss": 130068.6, + "step": 62030 + }, + { + "epoch": 0.12532472517039234, + "grad_norm": 52985.75390625, + "learning_rate": 9.982347265667459e-06, + "loss": 269733.2, + "step": 62040 + }, + { + "epoch": 0.12534492580307616, + "grad_norm": 260633.5625, + "learning_rate": 9.982317947313695e-06, + "loss": 99299.825, + "step": 62050 + }, + { + "epoch": 0.12536512643575998, + "grad_norm": 17303.857421875, + "learning_rate": 9.982288604676719e-06, + "loss": 233692.475, + "step": 62060 + }, + { + "epoch": 0.1253853270684438, + "grad_norm": 483171.9375, + "learning_rate": 9.982259237756674e-06, + "loss": 68236.2688, + "step": 62070 + }, + { + "epoch": 0.1254055277011276, + "grad_norm": 2356197.5, + "learning_rate": 9.982229846553698e-06, + "loss": 249731.55, + "step": 62080 + }, + { + "epoch": 0.1254257283338114, + "grad_norm": 111927.7265625, + "learning_rate": 9.982200431067939e-06, + "loss": 194572.2125, + "step": 62090 + }, + { + "epoch": 0.12544592896649523, + "grad_norm": 51428.9921875, + "learning_rate": 9.98217099129954e-06, + "loss": 59199.2812, + "step": 62100 + }, + { + "epoch": 0.12546612959917905, + "grad_norm": 75714.890625, + "learning_rate": 9.982141527248646e-06, + "loss": 89970.2188, + "step": 62110 + }, + { + "epoch": 0.12548633023186287, + "grad_norm": 115603.5859375, + "learning_rate": 9.982112038915394e-06, + "loss": 310235.55, + "step": 62120 + }, + { + "epoch": 0.1255065308645467, + "grad_norm": 447632.875, + "learning_rate": 9.982082526299935e-06, + "loss": 131194.25, + "step": 62130 + }, + { + "epoch": 0.12552673149723048, + "grad_norm": 656170.625, + "learning_rate": 9.98205298940241e-06, + "loss": 122072.6375, + "step": 62140 + }, + { + "epoch": 0.1255469321299143, + "grad_norm": 14869.3046875, + "learning_rate": 9.982023428222963e-06, + "loss": 55263.7688, + "step": 62150 + }, + { + "epoch": 0.12556713276259812, + "grad_norm": 436234.53125, + "learning_rate": 9.981993842761737e-06, + "loss": 163534.9875, + "step": 62160 + }, + { + "epoch": 0.12558733339528194, + "grad_norm": 54711.640625, + "learning_rate": 9.981964233018877e-06, + "loss": 94359.2312, + "step": 62170 + }, + { + "epoch": 0.12560753402796576, + "grad_norm": 88729.453125, + "learning_rate": 9.981934598994529e-06, + "loss": 185889.775, + "step": 62180 + }, + { + "epoch": 0.12562773466064958, + "grad_norm": 68998.4765625, + "learning_rate": 9.981904940688836e-06, + "loss": 106081.5875, + "step": 62190 + }, + { + "epoch": 0.12564793529333337, + "grad_norm": 67899.3515625, + "learning_rate": 9.981875258101944e-06, + "loss": 148073.1375, + "step": 62200 + }, + { + "epoch": 0.1256681359260172, + "grad_norm": 443910.4375, + "learning_rate": 9.981845551233993e-06, + "loss": 157469.675, + "step": 62210 + }, + { + "epoch": 0.125688336558701, + "grad_norm": 964714.5, + "learning_rate": 9.981815820085132e-06, + "loss": 240117.525, + "step": 62220 + }, + { + "epoch": 0.12570853719138483, + "grad_norm": 29007.619140625, + "learning_rate": 9.981786064655505e-06, + "loss": 118340.125, + "step": 62230 + }, + { + "epoch": 0.12572873782406865, + "grad_norm": 197372.34375, + "learning_rate": 9.981756284945256e-06, + "loss": 133054.575, + "step": 62240 + }, + { + "epoch": 0.12574893845675247, + "grad_norm": 376340.21875, + "learning_rate": 9.981726480954532e-06, + "loss": 149099.95, + "step": 62250 + }, + { + "epoch": 0.1257691390894363, + "grad_norm": 2105.56298828125, + "learning_rate": 9.981696652683479e-06, + "loss": 140247.275, + "step": 62260 + }, + { + "epoch": 0.12578933972212009, + "grad_norm": 30772.607421875, + "learning_rate": 9.98166680013224e-06, + "loss": 130902.0125, + "step": 62270 + }, + { + "epoch": 0.1258095403548039, + "grad_norm": 193549.296875, + "learning_rate": 9.981636923300959e-06, + "loss": 95956.425, + "step": 62280 + }, + { + "epoch": 0.12582974098748773, + "grad_norm": 252467.359375, + "learning_rate": 9.981607022189785e-06, + "loss": 113682.0625, + "step": 62290 + }, + { + "epoch": 0.12584994162017155, + "grad_norm": 26975.91015625, + "learning_rate": 9.981577096798864e-06, + "loss": 127625.175, + "step": 62300 + }, + { + "epoch": 0.12587014225285537, + "grad_norm": 32496.154296875, + "learning_rate": 9.981547147128338e-06, + "loss": 106726.85, + "step": 62310 + }, + { + "epoch": 0.12589034288553919, + "grad_norm": 93966.9296875, + "learning_rate": 9.981517173178357e-06, + "loss": 231329.925, + "step": 62320 + }, + { + "epoch": 0.12591054351822298, + "grad_norm": 2102288.25, + "learning_rate": 9.981487174949065e-06, + "loss": 204795.325, + "step": 62330 + }, + { + "epoch": 0.1259307441509068, + "grad_norm": 227696.890625, + "learning_rate": 9.98145715244061e-06, + "loss": 135730.6625, + "step": 62340 + }, + { + "epoch": 0.12595094478359062, + "grad_norm": 346506.71875, + "learning_rate": 9.981427105653135e-06, + "loss": 48536.1875, + "step": 62350 + }, + { + "epoch": 0.12597114541627444, + "grad_norm": 37298.7265625, + "learning_rate": 9.981397034586789e-06, + "loss": 89144.7188, + "step": 62360 + }, + { + "epoch": 0.12599134604895826, + "grad_norm": 74883.625, + "learning_rate": 9.981366939241719e-06, + "loss": 78331.1562, + "step": 62370 + }, + { + "epoch": 0.12601154668164208, + "grad_norm": 6293.11376953125, + "learning_rate": 9.98133681961807e-06, + "loss": 44674.7156, + "step": 62380 + }, + { + "epoch": 0.1260317473143259, + "grad_norm": 33318.54296875, + "learning_rate": 9.981306675715989e-06, + "loss": 189357.125, + "step": 62390 + }, + { + "epoch": 0.1260519479470097, + "grad_norm": 231875.8125, + "learning_rate": 9.981276507535625e-06, + "loss": 121428.8375, + "step": 62400 + }, + { + "epoch": 0.1260721485796935, + "grad_norm": 53528.58203125, + "learning_rate": 9.981246315077123e-06, + "loss": 109913.1625, + "step": 62410 + }, + { + "epoch": 0.12609234921237733, + "grad_norm": 24222.986328125, + "learning_rate": 9.98121609834063e-06, + "loss": 105462.75, + "step": 62420 + }, + { + "epoch": 0.12611254984506115, + "grad_norm": 5489.9384765625, + "learning_rate": 9.981185857326292e-06, + "loss": 86826.5562, + "step": 62430 + }, + { + "epoch": 0.12613275047774497, + "grad_norm": 1383175.875, + "learning_rate": 9.98115559203426e-06, + "loss": 216206.4, + "step": 62440 + }, + { + "epoch": 0.1261529511104288, + "grad_norm": 69093.1484375, + "learning_rate": 9.981125302464681e-06, + "loss": 327426.325, + "step": 62450 + }, + { + "epoch": 0.12617315174311258, + "grad_norm": 111441.09375, + "learning_rate": 9.9810949886177e-06, + "loss": 102802.575, + "step": 62460 + }, + { + "epoch": 0.1261933523757964, + "grad_norm": 736011.5625, + "learning_rate": 9.981064650493466e-06, + "loss": 188760.45, + "step": 62470 + }, + { + "epoch": 0.12621355300848022, + "grad_norm": 28053.205078125, + "learning_rate": 9.981034288092129e-06, + "loss": 67142.6438, + "step": 62480 + }, + { + "epoch": 0.12623375364116404, + "grad_norm": 67860.921875, + "learning_rate": 9.981003901413833e-06, + "loss": 71070.825, + "step": 62490 + }, + { + "epoch": 0.12625395427384786, + "grad_norm": 1316942.125, + "learning_rate": 9.980973490458728e-06, + "loss": 244935.45, + "step": 62500 + }, + { + "epoch": 0.12627415490653168, + "grad_norm": 112279.3984375, + "learning_rate": 9.980943055226964e-06, + "loss": 302506.3, + "step": 62510 + }, + { + "epoch": 0.12629435553921547, + "grad_norm": 35530.609375, + "learning_rate": 9.980912595718686e-06, + "loss": 89758.2437, + "step": 62520 + }, + { + "epoch": 0.1263145561718993, + "grad_norm": 111562.3984375, + "learning_rate": 9.980882111934046e-06, + "loss": 103231.075, + "step": 62530 + }, + { + "epoch": 0.12633475680458311, + "grad_norm": 35617.984375, + "learning_rate": 9.980851603873189e-06, + "loss": 83635.2063, + "step": 62540 + }, + { + "epoch": 0.12635495743726693, + "grad_norm": 209287.25, + "learning_rate": 9.980821071536266e-06, + "loss": 49649.575, + "step": 62550 + }, + { + "epoch": 0.12637515806995075, + "grad_norm": 27026.830078125, + "learning_rate": 9.980790514923425e-06, + "loss": 68461.9688, + "step": 62560 + }, + { + "epoch": 0.12639535870263457, + "grad_norm": 25899.60546875, + "learning_rate": 9.980759934034816e-06, + "loss": 88512.4812, + "step": 62570 + }, + { + "epoch": 0.1264155593353184, + "grad_norm": 253463.109375, + "learning_rate": 9.980729328870586e-06, + "loss": 184979.425, + "step": 62580 + }, + { + "epoch": 0.1264357599680022, + "grad_norm": 1189491.5, + "learning_rate": 9.980698699430884e-06, + "loss": 209676.2875, + "step": 62590 + }, + { + "epoch": 0.126455960600686, + "grad_norm": 29661.119140625, + "learning_rate": 9.980668045715864e-06, + "loss": 60933.2375, + "step": 62600 + }, + { + "epoch": 0.12647616123336983, + "grad_norm": 38534.796875, + "learning_rate": 9.98063736772567e-06, + "loss": 99697.8313, + "step": 62610 + }, + { + "epoch": 0.12649636186605365, + "grad_norm": 405608.9375, + "learning_rate": 9.980606665460453e-06, + "loss": 193431.575, + "step": 62620 + }, + { + "epoch": 0.12651656249873747, + "grad_norm": 3119.25830078125, + "learning_rate": 9.980575938920364e-06, + "loss": 83170.2375, + "step": 62630 + }, + { + "epoch": 0.1265367631314213, + "grad_norm": 51558.66796875, + "learning_rate": 9.980545188105553e-06, + "loss": 48773.4719, + "step": 62640 + }, + { + "epoch": 0.12655696376410508, + "grad_norm": 121497.2109375, + "learning_rate": 9.980514413016167e-06, + "loss": 59640.9313, + "step": 62650 + }, + { + "epoch": 0.1265771643967889, + "grad_norm": 338734.84375, + "learning_rate": 9.980483613652359e-06, + "loss": 67363.7625, + "step": 62660 + }, + { + "epoch": 0.12659736502947272, + "grad_norm": 155065.984375, + "learning_rate": 9.980452790014278e-06, + "loss": 102422.6062, + "step": 62670 + }, + { + "epoch": 0.12661756566215654, + "grad_norm": 268842.03125, + "learning_rate": 9.980421942102075e-06, + "loss": 127429.1375, + "step": 62680 + }, + { + "epoch": 0.12663776629484036, + "grad_norm": 545264.5625, + "learning_rate": 9.980391069915897e-06, + "loss": 85865.6375, + "step": 62690 + }, + { + "epoch": 0.12665796692752418, + "grad_norm": 27526.80078125, + "learning_rate": 9.980360173455899e-06, + "loss": 112156.525, + "step": 62700 + }, + { + "epoch": 0.126678167560208, + "grad_norm": 37240.5, + "learning_rate": 9.980329252722227e-06, + "loss": 39876.2156, + "step": 62710 + }, + { + "epoch": 0.1266983681928918, + "grad_norm": 64700.8125, + "learning_rate": 9.980298307715038e-06, + "loss": 110142.975, + "step": 62720 + }, + { + "epoch": 0.1267185688255756, + "grad_norm": 410885.8125, + "learning_rate": 9.980267338434477e-06, + "loss": 184034.5375, + "step": 62730 + }, + { + "epoch": 0.12673876945825943, + "grad_norm": 14146.546875, + "learning_rate": 9.980236344880696e-06, + "loss": 87761.475, + "step": 62740 + }, + { + "epoch": 0.12675897009094325, + "grad_norm": 1260769.375, + "learning_rate": 9.98020532705385e-06, + "loss": 215142.675, + "step": 62750 + }, + { + "epoch": 0.12677917072362707, + "grad_norm": 857321.4375, + "learning_rate": 9.980174284954084e-06, + "loss": 277610.625, + "step": 62760 + }, + { + "epoch": 0.1267993713563109, + "grad_norm": 167162.78125, + "learning_rate": 9.980143218581555e-06, + "loss": 165436.5375, + "step": 62770 + }, + { + "epoch": 0.12681957198899468, + "grad_norm": 949155.125, + "learning_rate": 9.98011212793641e-06, + "loss": 144944.0, + "step": 62780 + }, + { + "epoch": 0.1268397726216785, + "grad_norm": 8801.2685546875, + "learning_rate": 9.980081013018804e-06, + "loss": 120212.7875, + "step": 62790 + }, + { + "epoch": 0.12685997325436232, + "grad_norm": 9091.0927734375, + "learning_rate": 9.980049873828887e-06, + "loss": 40076.4344, + "step": 62800 + }, + { + "epoch": 0.12688017388704614, + "grad_norm": 353443.5625, + "learning_rate": 9.98001871036681e-06, + "loss": 201757.725, + "step": 62810 + }, + { + "epoch": 0.12690037451972996, + "grad_norm": 1696890.5, + "learning_rate": 9.979987522632727e-06, + "loss": 209448.15, + "step": 62820 + }, + { + "epoch": 0.12692057515241378, + "grad_norm": 1157998.375, + "learning_rate": 9.979956310626788e-06, + "loss": 142712.6, + "step": 62830 + }, + { + "epoch": 0.12694077578509758, + "grad_norm": 46451.51171875, + "learning_rate": 9.979925074349146e-06, + "loss": 100022.05, + "step": 62840 + }, + { + "epoch": 0.1269609764177814, + "grad_norm": 204466.21875, + "learning_rate": 9.979893813799953e-06, + "loss": 40362.2969, + "step": 62850 + }, + { + "epoch": 0.12698117705046522, + "grad_norm": 9812.263671875, + "learning_rate": 9.979862528979362e-06, + "loss": 71449.3687, + "step": 62860 + }, + { + "epoch": 0.12700137768314904, + "grad_norm": 113338.96875, + "learning_rate": 9.979831219887526e-06, + "loss": 46890.7875, + "step": 62870 + }, + { + "epoch": 0.12702157831583286, + "grad_norm": 78011.921875, + "learning_rate": 9.979799886524594e-06, + "loss": 97414.375, + "step": 62880 + }, + { + "epoch": 0.12704177894851668, + "grad_norm": 401697.625, + "learning_rate": 9.979768528890725e-06, + "loss": 125632.275, + "step": 62890 + }, + { + "epoch": 0.1270619795812005, + "grad_norm": 837052.5, + "learning_rate": 9.979737146986064e-06, + "loss": 209563.9625, + "step": 62900 + }, + { + "epoch": 0.1270821802138843, + "grad_norm": 18067.2421875, + "learning_rate": 9.979705740810771e-06, + "loss": 158040.975, + "step": 62910 + }, + { + "epoch": 0.1271023808465681, + "grad_norm": 183874.15625, + "learning_rate": 9.979674310364996e-06, + "loss": 124379.675, + "step": 62920 + }, + { + "epoch": 0.12712258147925193, + "grad_norm": 280229.90625, + "learning_rate": 9.979642855648892e-06, + "loss": 131601.25, + "step": 62930 + }, + { + "epoch": 0.12714278211193575, + "grad_norm": 87031.984375, + "learning_rate": 9.979611376662613e-06, + "loss": 69933.025, + "step": 62940 + }, + { + "epoch": 0.12716298274461957, + "grad_norm": 79888.765625, + "learning_rate": 9.97957987340631e-06, + "loss": 106160.625, + "step": 62950 + }, + { + "epoch": 0.1271831833773034, + "grad_norm": 537264.9375, + "learning_rate": 9.979548345880142e-06, + "loss": 303450.85, + "step": 62960 + }, + { + "epoch": 0.12720338400998718, + "grad_norm": 1231244.375, + "learning_rate": 9.979516794084256e-06, + "loss": 157576.7375, + "step": 62970 + }, + { + "epoch": 0.127223584642671, + "grad_norm": 359992.4375, + "learning_rate": 9.97948521801881e-06, + "loss": 50015.1562, + "step": 62980 + }, + { + "epoch": 0.12724378527535482, + "grad_norm": 190158.421875, + "learning_rate": 9.979453617683958e-06, + "loss": 72445.6875, + "step": 62990 + }, + { + "epoch": 0.12726398590803864, + "grad_norm": 645784.875, + "learning_rate": 9.979421993079853e-06, + "loss": 100625.05, + "step": 63000 + }, + { + "epoch": 0.12728418654072246, + "grad_norm": 459220.15625, + "learning_rate": 9.979390344206648e-06, + "loss": 249213.35, + "step": 63010 + }, + { + "epoch": 0.12730438717340628, + "grad_norm": 83039.8828125, + "learning_rate": 9.9793586710645e-06, + "loss": 137717.375, + "step": 63020 + }, + { + "epoch": 0.1273245878060901, + "grad_norm": 1382567.625, + "learning_rate": 9.97932697365356e-06, + "loss": 214524.95, + "step": 63030 + }, + { + "epoch": 0.1273447884387739, + "grad_norm": 2480857.5, + "learning_rate": 9.979295251973986e-06, + "loss": 84799.1187, + "step": 63040 + }, + { + "epoch": 0.1273649890714577, + "grad_norm": 979279.6875, + "learning_rate": 9.97926350602593e-06, + "loss": 242299.325, + "step": 63050 + }, + { + "epoch": 0.12738518970414153, + "grad_norm": 760607.3125, + "learning_rate": 9.979231735809546e-06, + "loss": 53464.7625, + "step": 63060 + }, + { + "epoch": 0.12740539033682535, + "grad_norm": 157324.59375, + "learning_rate": 9.979199941324994e-06, + "loss": 87634.0312, + "step": 63070 + }, + { + "epoch": 0.12742559096950917, + "grad_norm": 82461.328125, + "learning_rate": 9.979168122572422e-06, + "loss": 102209.3125, + "step": 63080 + }, + { + "epoch": 0.127445791602193, + "grad_norm": 94720.2890625, + "learning_rate": 9.97913627955199e-06, + "loss": 166177.05, + "step": 63090 + }, + { + "epoch": 0.12746599223487678, + "grad_norm": 244683.96875, + "learning_rate": 9.979104412263851e-06, + "loss": 65986.9, + "step": 63100 + }, + { + "epoch": 0.1274861928675606, + "grad_norm": 1765385.375, + "learning_rate": 9.979072520708162e-06, + "loss": 410213.0, + "step": 63110 + }, + { + "epoch": 0.12750639350024442, + "grad_norm": 25431.689453125, + "learning_rate": 9.979040604885077e-06, + "loss": 94418.8438, + "step": 63120 + }, + { + "epoch": 0.12752659413292824, + "grad_norm": 83589.8828125, + "learning_rate": 9.979008664794751e-06, + "loss": 198199.5375, + "step": 63130 + }, + { + "epoch": 0.12754679476561206, + "grad_norm": 264043.96875, + "learning_rate": 9.978976700437341e-06, + "loss": 59109.975, + "step": 63140 + }, + { + "epoch": 0.12756699539829588, + "grad_norm": 723459.875, + "learning_rate": 9.978944711813003e-06, + "loss": 79232.175, + "step": 63150 + }, + { + "epoch": 0.12758719603097968, + "grad_norm": 38277.96875, + "learning_rate": 9.978912698921892e-06, + "loss": 117674.4625, + "step": 63160 + }, + { + "epoch": 0.1276073966636635, + "grad_norm": 172360.6875, + "learning_rate": 9.978880661764166e-06, + "loss": 127928.8875, + "step": 63170 + }, + { + "epoch": 0.12762759729634732, + "grad_norm": 252143.5, + "learning_rate": 9.978848600339978e-06, + "loss": 58996.0875, + "step": 63180 + }, + { + "epoch": 0.12764779792903114, + "grad_norm": 17617.802734375, + "learning_rate": 9.978816514649486e-06, + "loss": 262775.35, + "step": 63190 + }, + { + "epoch": 0.12766799856171496, + "grad_norm": 116269.921875, + "learning_rate": 9.978784404692847e-06, + "loss": 66057.2937, + "step": 63200 + }, + { + "epoch": 0.12768819919439878, + "grad_norm": 570698.5625, + "learning_rate": 9.978752270470216e-06, + "loss": 169563.65, + "step": 63210 + }, + { + "epoch": 0.1277083998270826, + "grad_norm": 7619.47900390625, + "learning_rate": 9.97872011198175e-06, + "loss": 48402.5719, + "step": 63220 + }, + { + "epoch": 0.1277286004597664, + "grad_norm": 17688.1875, + "learning_rate": 9.978687929227606e-06, + "loss": 54528.95, + "step": 63230 + }, + { + "epoch": 0.1277488010924502, + "grad_norm": 36218.0859375, + "learning_rate": 9.97865572220794e-06, + "loss": 135815.25, + "step": 63240 + }, + { + "epoch": 0.12776900172513403, + "grad_norm": 22569.658203125, + "learning_rate": 9.978623490922913e-06, + "loss": 94978.1, + "step": 63250 + }, + { + "epoch": 0.12778920235781785, + "grad_norm": 105486.1640625, + "learning_rate": 9.978591235372675e-06, + "loss": 103989.7437, + "step": 63260 + }, + { + "epoch": 0.12780940299050167, + "grad_norm": 66550.7890625, + "learning_rate": 9.97855895555739e-06, + "loss": 224429.45, + "step": 63270 + }, + { + "epoch": 0.1278296036231855, + "grad_norm": 1654754.875, + "learning_rate": 9.978526651477211e-06, + "loss": 153266.55, + "step": 63280 + }, + { + "epoch": 0.12784980425586928, + "grad_norm": 266628.96875, + "learning_rate": 9.978494323132296e-06, + "loss": 87990.0938, + "step": 63290 + }, + { + "epoch": 0.1278700048885531, + "grad_norm": 279611.625, + "learning_rate": 9.978461970522807e-06, + "loss": 267152.5, + "step": 63300 + }, + { + "epoch": 0.12789020552123692, + "grad_norm": 63041.73828125, + "learning_rate": 9.978429593648894e-06, + "loss": 214028.4, + "step": 63310 + }, + { + "epoch": 0.12791040615392074, + "grad_norm": 3687844.5, + "learning_rate": 9.978397192510722e-06, + "loss": 256636.15, + "step": 63320 + }, + { + "epoch": 0.12793060678660456, + "grad_norm": 183898.5, + "learning_rate": 9.978364767108444e-06, + "loss": 94030.2937, + "step": 63330 + }, + { + "epoch": 0.12795080741928838, + "grad_norm": 71968.4453125, + "learning_rate": 9.97833231744222e-06, + "loss": 223238.325, + "step": 63340 + }, + { + "epoch": 0.1279710080519722, + "grad_norm": 246774.546875, + "learning_rate": 9.97829984351221e-06, + "loss": 118726.25, + "step": 63350 + }, + { + "epoch": 0.127991208684656, + "grad_norm": 124702.8125, + "learning_rate": 9.978267345318569e-06, + "loss": 52255.8938, + "step": 63360 + }, + { + "epoch": 0.1280114093173398, + "grad_norm": 19482.478515625, + "learning_rate": 9.978234822861456e-06, + "loss": 24016.4328, + "step": 63370 + }, + { + "epoch": 0.12803160995002363, + "grad_norm": 651124.5625, + "learning_rate": 9.978202276141032e-06, + "loss": 241314.75, + "step": 63380 + }, + { + "epoch": 0.12805181058270745, + "grad_norm": 74427.5, + "learning_rate": 9.978169705157455e-06, + "loss": 206610.35, + "step": 63390 + }, + { + "epoch": 0.12807201121539127, + "grad_norm": 6246.126953125, + "learning_rate": 9.97813710991088e-06, + "loss": 67838.5625, + "step": 63400 + }, + { + "epoch": 0.1280922118480751, + "grad_norm": 376392.125, + "learning_rate": 9.978104490401468e-06, + "loss": 212244.275, + "step": 63410 + }, + { + "epoch": 0.12811241248075889, + "grad_norm": 656995.5625, + "learning_rate": 9.978071846629381e-06, + "loss": 72536.0625, + "step": 63420 + }, + { + "epoch": 0.1281326131134427, + "grad_norm": 386055.34375, + "learning_rate": 9.978039178594774e-06, + "loss": 172177.5375, + "step": 63430 + }, + { + "epoch": 0.12815281374612653, + "grad_norm": 262454.125, + "learning_rate": 9.978006486297808e-06, + "loss": 62513.7125, + "step": 63440 + }, + { + "epoch": 0.12817301437881035, + "grad_norm": 135173.984375, + "learning_rate": 9.977973769738642e-06, + "loss": 58138.85, + "step": 63450 + }, + { + "epoch": 0.12819321501149417, + "grad_norm": 1062924.375, + "learning_rate": 9.977941028917436e-06, + "loss": 142524.975, + "step": 63460 + }, + { + "epoch": 0.12821341564417799, + "grad_norm": 56576.4609375, + "learning_rate": 9.977908263834348e-06, + "loss": 220195.525, + "step": 63470 + }, + { + "epoch": 0.12823361627686178, + "grad_norm": 91272.5078125, + "learning_rate": 9.97787547448954e-06, + "loss": 113793.7375, + "step": 63480 + }, + { + "epoch": 0.1282538169095456, + "grad_norm": 10084.95703125, + "learning_rate": 9.977842660883172e-06, + "loss": 45547.5094, + "step": 63490 + }, + { + "epoch": 0.12827401754222942, + "grad_norm": 104930.21875, + "learning_rate": 9.9778098230154e-06, + "loss": 194685.525, + "step": 63500 + }, + { + "epoch": 0.12829421817491324, + "grad_norm": 318296.25, + "learning_rate": 9.97777696088639e-06, + "loss": 96612.9688, + "step": 63510 + }, + { + "epoch": 0.12831441880759706, + "grad_norm": 314079.1875, + "learning_rate": 9.977744074496297e-06, + "loss": 151779.625, + "step": 63520 + }, + { + "epoch": 0.12833461944028088, + "grad_norm": 20339.8125, + "learning_rate": 9.97771116384528e-06, + "loss": 95444.0625, + "step": 63530 + }, + { + "epoch": 0.1283548200729647, + "grad_norm": 118918.21875, + "learning_rate": 9.977678228933508e-06, + "loss": 196303.4125, + "step": 63540 + }, + { + "epoch": 0.1283750207056485, + "grad_norm": 197985.546875, + "learning_rate": 9.977645269761131e-06, + "loss": 124279.7125, + "step": 63550 + }, + { + "epoch": 0.1283952213383323, + "grad_norm": 65050.3203125, + "learning_rate": 9.977612286328317e-06, + "loss": 80883.6938, + "step": 63560 + }, + { + "epoch": 0.12841542197101613, + "grad_norm": 118413.734375, + "learning_rate": 9.977579278635225e-06, + "loss": 87060.85, + "step": 63570 + }, + { + "epoch": 0.12843562260369995, + "grad_norm": 14441.326171875, + "learning_rate": 9.977546246682015e-06, + "loss": 52976.1188, + "step": 63580 + }, + { + "epoch": 0.12845582323638377, + "grad_norm": 172610.59375, + "learning_rate": 9.977513190468848e-06, + "loss": 269303.55, + "step": 63590 + }, + { + "epoch": 0.1284760238690676, + "grad_norm": 145539.484375, + "learning_rate": 9.977480109995886e-06, + "loss": 41797.8313, + "step": 63600 + }, + { + "epoch": 0.12849622450175138, + "grad_norm": 806.5298461914062, + "learning_rate": 9.977447005263289e-06, + "loss": 148240.5875, + "step": 63610 + }, + { + "epoch": 0.1285164251344352, + "grad_norm": 0.0, + "learning_rate": 9.97741387627122e-06, + "loss": 113132.775, + "step": 63620 + }, + { + "epoch": 0.12853662576711902, + "grad_norm": 74846.8671875, + "learning_rate": 9.977380723019838e-06, + "loss": 89510.7937, + "step": 63630 + }, + { + "epoch": 0.12855682639980284, + "grad_norm": 166976.453125, + "learning_rate": 9.977347545509307e-06, + "loss": 127017.3, + "step": 63640 + }, + { + "epoch": 0.12857702703248666, + "grad_norm": 437399.4375, + "learning_rate": 9.977314343739785e-06, + "loss": 198313.3, + "step": 63650 + }, + { + "epoch": 0.12859722766517048, + "grad_norm": 245820.609375, + "learning_rate": 9.97728111771144e-06, + "loss": 80622.4125, + "step": 63660 + }, + { + "epoch": 0.1286174282978543, + "grad_norm": 1197011.125, + "learning_rate": 9.97724786742443e-06, + "loss": 235488.4, + "step": 63670 + }, + { + "epoch": 0.1286376289305381, + "grad_norm": 233559.296875, + "learning_rate": 9.977214592878917e-06, + "loss": 66070.35, + "step": 63680 + }, + { + "epoch": 0.12865782956322191, + "grad_norm": 204340.0625, + "learning_rate": 9.977181294075063e-06, + "loss": 161788.1125, + "step": 63690 + }, + { + "epoch": 0.12867803019590573, + "grad_norm": 175306.625, + "learning_rate": 9.977147971013033e-06, + "loss": 150477.3375, + "step": 63700 + }, + { + "epoch": 0.12869823082858955, + "grad_norm": 25801.611328125, + "learning_rate": 9.977114623692985e-06, + "loss": 62160.075, + "step": 63710 + }, + { + "epoch": 0.12871843146127337, + "grad_norm": 489599.8125, + "learning_rate": 9.977081252115085e-06, + "loss": 188869.3, + "step": 63720 + }, + { + "epoch": 0.1287386320939572, + "grad_norm": 13146.142578125, + "learning_rate": 9.977047856279496e-06, + "loss": 207041.8625, + "step": 63730 + }, + { + "epoch": 0.128758832726641, + "grad_norm": 78281.0703125, + "learning_rate": 9.977014436186377e-06, + "loss": 113607.55, + "step": 63740 + }, + { + "epoch": 0.1287790333593248, + "grad_norm": 75254.21875, + "learning_rate": 9.976980991835896e-06, + "loss": 226340.925, + "step": 63750 + }, + { + "epoch": 0.12879923399200863, + "grad_norm": 73753.1328125, + "learning_rate": 9.97694752322821e-06, + "loss": 47064.75, + "step": 63760 + }, + { + "epoch": 0.12881943462469245, + "grad_norm": 8096.8818359375, + "learning_rate": 9.976914030363488e-06, + "loss": 75431.025, + "step": 63770 + }, + { + "epoch": 0.12883963525737627, + "grad_norm": 362387.78125, + "learning_rate": 9.976880513241889e-06, + "loss": 117860.5375, + "step": 63780 + }, + { + "epoch": 0.1288598358900601, + "grad_norm": 11331.5029296875, + "learning_rate": 9.976846971863579e-06, + "loss": 107294.025, + "step": 63790 + }, + { + "epoch": 0.12888003652274388, + "grad_norm": 262922.65625, + "learning_rate": 9.97681340622872e-06, + "loss": 91372.0, + "step": 63800 + }, + { + "epoch": 0.1289002371554277, + "grad_norm": 45591.67578125, + "learning_rate": 9.976779816337476e-06, + "loss": 48420.5344, + "step": 63810 + }, + { + "epoch": 0.12892043778811152, + "grad_norm": 30205.142578125, + "learning_rate": 9.976746202190012e-06, + "loss": 68337.4375, + "step": 63820 + }, + { + "epoch": 0.12894063842079534, + "grad_norm": 19500.43359375, + "learning_rate": 9.97671256378649e-06, + "loss": 49576.7562, + "step": 63830 + }, + { + "epoch": 0.12896083905347916, + "grad_norm": 117254.796875, + "learning_rate": 9.976678901127074e-06, + "loss": 120204.5125, + "step": 63840 + }, + { + "epoch": 0.12898103968616298, + "grad_norm": 158472.34375, + "learning_rate": 9.976645214211929e-06, + "loss": 116504.175, + "step": 63850 + }, + { + "epoch": 0.1290012403188468, + "grad_norm": 127535.7109375, + "learning_rate": 9.976611503041218e-06, + "loss": 232871.25, + "step": 63860 + }, + { + "epoch": 0.1290214409515306, + "grad_norm": 336594.0, + "learning_rate": 9.976577767615108e-06, + "loss": 101235.525, + "step": 63870 + }, + { + "epoch": 0.1290416415842144, + "grad_norm": 39231.7421875, + "learning_rate": 9.97654400793376e-06, + "loss": 79491.1687, + "step": 63880 + }, + { + "epoch": 0.12906184221689823, + "grad_norm": 229990.953125, + "learning_rate": 9.97651022399734e-06, + "loss": 136704.675, + "step": 63890 + }, + { + "epoch": 0.12908204284958205, + "grad_norm": 199153.8125, + "learning_rate": 9.976476415806013e-06, + "loss": 117818.1875, + "step": 63900 + }, + { + "epoch": 0.12910224348226587, + "grad_norm": 32581.013671875, + "learning_rate": 9.976442583359944e-06, + "loss": 86692.1, + "step": 63910 + }, + { + "epoch": 0.1291224441149497, + "grad_norm": 112353.4296875, + "learning_rate": 9.976408726659296e-06, + "loss": 43228.6219, + "step": 63920 + }, + { + "epoch": 0.12914264474763348, + "grad_norm": 30218.611328125, + "learning_rate": 9.976374845704238e-06, + "loss": 93977.1562, + "step": 63930 + }, + { + "epoch": 0.1291628453803173, + "grad_norm": 6682.62841796875, + "learning_rate": 9.976340940494931e-06, + "loss": 81054.5688, + "step": 63940 + }, + { + "epoch": 0.12918304601300112, + "grad_norm": 607594.875, + "learning_rate": 9.976307011031542e-06, + "loss": 129823.7625, + "step": 63950 + }, + { + "epoch": 0.12920324664568494, + "grad_norm": 5984.00341796875, + "learning_rate": 9.976273057314236e-06, + "loss": 43775.3625, + "step": 63960 + }, + { + "epoch": 0.12922344727836876, + "grad_norm": 301399.59375, + "learning_rate": 9.97623907934318e-06, + "loss": 74267.2812, + "step": 63970 + }, + { + "epoch": 0.12924364791105258, + "grad_norm": 325373.25, + "learning_rate": 9.976205077118536e-06, + "loss": 182794.575, + "step": 63980 + }, + { + "epoch": 0.12926384854373638, + "grad_norm": 125774.171875, + "learning_rate": 9.976171050640473e-06, + "loss": 118793.35, + "step": 63990 + }, + { + "epoch": 0.1292840491764202, + "grad_norm": 795586.25, + "learning_rate": 9.976136999909156e-06, + "loss": 121433.675, + "step": 64000 + }, + { + "epoch": 0.12930424980910402, + "grad_norm": 56970.47265625, + "learning_rate": 9.976102924924752e-06, + "loss": 64802.925, + "step": 64010 + }, + { + "epoch": 0.12932445044178784, + "grad_norm": 117886.453125, + "learning_rate": 9.976068825687424e-06, + "loss": 247378.75, + "step": 64020 + }, + { + "epoch": 0.12934465107447166, + "grad_norm": 575555.125, + "learning_rate": 9.97603470219734e-06, + "loss": 100946.4812, + "step": 64030 + }, + { + "epoch": 0.12936485170715548, + "grad_norm": 728114.5625, + "learning_rate": 9.976000554454668e-06, + "loss": 175729.55, + "step": 64040 + }, + { + "epoch": 0.1293850523398393, + "grad_norm": 69710.421875, + "learning_rate": 9.975966382459571e-06, + "loss": 119375.1125, + "step": 64050 + }, + { + "epoch": 0.1294052529725231, + "grad_norm": 109257.3046875, + "learning_rate": 9.975932186212217e-06, + "loss": 151021.4625, + "step": 64060 + }, + { + "epoch": 0.1294254536052069, + "grad_norm": 198546.515625, + "learning_rate": 9.975897965712777e-06, + "loss": 183078.8125, + "step": 64070 + }, + { + "epoch": 0.12944565423789073, + "grad_norm": 88825.1484375, + "learning_rate": 9.975863720961411e-06, + "loss": 125399.9375, + "step": 64080 + }, + { + "epoch": 0.12946585487057455, + "grad_norm": 61475.359375, + "learning_rate": 9.975829451958288e-06, + "loss": 111369.775, + "step": 64090 + }, + { + "epoch": 0.12948605550325837, + "grad_norm": 1408709.875, + "learning_rate": 9.975795158703576e-06, + "loss": 100733.5, + "step": 64100 + }, + { + "epoch": 0.1295062561359422, + "grad_norm": 227720.25, + "learning_rate": 9.975760841197443e-06, + "loss": 56194.8625, + "step": 64110 + }, + { + "epoch": 0.12952645676862598, + "grad_norm": 2511977.5, + "learning_rate": 9.975726499440055e-06, + "loss": 301737.45, + "step": 64120 + }, + { + "epoch": 0.1295466574013098, + "grad_norm": 78376.359375, + "learning_rate": 9.975692133431579e-06, + "loss": 135876.6, + "step": 64130 + }, + { + "epoch": 0.12956685803399362, + "grad_norm": 1048679.375, + "learning_rate": 9.975657743172182e-06, + "loss": 188681.3, + "step": 64140 + }, + { + "epoch": 0.12958705866667744, + "grad_norm": 963835.0625, + "learning_rate": 9.975623328662036e-06, + "loss": 220824.925, + "step": 64150 + }, + { + "epoch": 0.12960725929936126, + "grad_norm": 16700.052734375, + "learning_rate": 9.975588889901302e-06, + "loss": 234462.025, + "step": 64160 + }, + { + "epoch": 0.12962745993204508, + "grad_norm": 139837.046875, + "learning_rate": 9.975554426890152e-06, + "loss": 153169.125, + "step": 64170 + }, + { + "epoch": 0.1296476605647289, + "grad_norm": 296183.0, + "learning_rate": 9.975519939628754e-06, + "loss": 87411.7688, + "step": 64180 + }, + { + "epoch": 0.1296678611974127, + "grad_norm": 1564838.0, + "learning_rate": 9.975485428117276e-06, + "loss": 166218.625, + "step": 64190 + }, + { + "epoch": 0.1296880618300965, + "grad_norm": 1145891.5, + "learning_rate": 9.975450892355882e-06, + "loss": 116339.175, + "step": 64200 + }, + { + "epoch": 0.12970826246278033, + "grad_norm": 95440.7578125, + "learning_rate": 9.975416332344747e-06, + "loss": 49853.2125, + "step": 64210 + }, + { + "epoch": 0.12972846309546415, + "grad_norm": 91492.796875, + "learning_rate": 9.975381748084035e-06, + "loss": 135726.6875, + "step": 64220 + }, + { + "epoch": 0.12974866372814797, + "grad_norm": 347993.90625, + "learning_rate": 9.975347139573917e-06, + "loss": 79101.0375, + "step": 64230 + }, + { + "epoch": 0.1297688643608318, + "grad_norm": 480154.5625, + "learning_rate": 9.97531250681456e-06, + "loss": 195745.1, + "step": 64240 + }, + { + "epoch": 0.12978906499351558, + "grad_norm": 10731.68359375, + "learning_rate": 9.975277849806133e-06, + "loss": 103853.825, + "step": 64250 + }, + { + "epoch": 0.1298092656261994, + "grad_norm": 244812.0, + "learning_rate": 9.975243168548804e-06, + "loss": 118989.5375, + "step": 64260 + }, + { + "epoch": 0.12982946625888322, + "grad_norm": 63268.8125, + "learning_rate": 9.975208463042745e-06, + "loss": 101069.8625, + "step": 64270 + }, + { + "epoch": 0.12984966689156704, + "grad_norm": 38818.1640625, + "learning_rate": 9.975173733288122e-06, + "loss": 62447.7875, + "step": 64280 + }, + { + "epoch": 0.12986986752425086, + "grad_norm": 81182.7734375, + "learning_rate": 9.975138979285107e-06, + "loss": 57500.6813, + "step": 64290 + }, + { + "epoch": 0.12989006815693468, + "grad_norm": 17135.84375, + "learning_rate": 9.975104201033868e-06, + "loss": 96899.4625, + "step": 64300 + }, + { + "epoch": 0.12991026878961848, + "grad_norm": 19390.970703125, + "learning_rate": 9.975069398534574e-06, + "loss": 91236.6687, + "step": 64310 + }, + { + "epoch": 0.1299304694223023, + "grad_norm": 180316.0625, + "learning_rate": 9.975034571787394e-06, + "loss": 113477.975, + "step": 64320 + }, + { + "epoch": 0.12995067005498612, + "grad_norm": 3053173.0, + "learning_rate": 9.9749997207925e-06, + "loss": 269427.075, + "step": 64330 + }, + { + "epoch": 0.12997087068766994, + "grad_norm": 6031279.0, + "learning_rate": 9.974964845550062e-06, + "loss": 162411.025, + "step": 64340 + }, + { + "epoch": 0.12999107132035376, + "grad_norm": 36916.0, + "learning_rate": 9.974929946060246e-06, + "loss": 121056.775, + "step": 64350 + }, + { + "epoch": 0.13001127195303758, + "grad_norm": 99021.2265625, + "learning_rate": 9.974895022323226e-06, + "loss": 98736.3875, + "step": 64360 + }, + { + "epoch": 0.1300314725857214, + "grad_norm": 7331.16650390625, + "learning_rate": 9.974860074339173e-06, + "loss": 92980.5375, + "step": 64370 + }, + { + "epoch": 0.1300516732184052, + "grad_norm": 643409.25, + "learning_rate": 9.974825102108251e-06, + "loss": 105131.725, + "step": 64380 + }, + { + "epoch": 0.130071873851089, + "grad_norm": 482250.875, + "learning_rate": 9.974790105630639e-06, + "loss": 122675.425, + "step": 64390 + }, + { + "epoch": 0.13009207448377283, + "grad_norm": 90401.6953125, + "learning_rate": 9.974755084906503e-06, + "loss": 139481.3, + "step": 64400 + }, + { + "epoch": 0.13011227511645665, + "grad_norm": 126333.53125, + "learning_rate": 9.974720039936012e-06, + "loss": 114329.825, + "step": 64410 + }, + { + "epoch": 0.13013247574914047, + "grad_norm": 51887.1875, + "learning_rate": 9.97468497071934e-06, + "loss": 73194.8813, + "step": 64420 + }, + { + "epoch": 0.1301526763818243, + "grad_norm": 559915.875, + "learning_rate": 9.974649877256657e-06, + "loss": 54896.8375, + "step": 64430 + }, + { + "epoch": 0.13017287701450808, + "grad_norm": 476994.5, + "learning_rate": 9.974614759548133e-06, + "loss": 122757.175, + "step": 64440 + }, + { + "epoch": 0.1301930776471919, + "grad_norm": 21219.4296875, + "learning_rate": 9.97457961759394e-06, + "loss": 57498.9375, + "step": 64450 + }, + { + "epoch": 0.13021327827987572, + "grad_norm": 150685.8125, + "learning_rate": 9.97454445139425e-06, + "loss": 57461.5875, + "step": 64460 + }, + { + "epoch": 0.13023347891255954, + "grad_norm": 71390.6015625, + "learning_rate": 9.974509260949233e-06, + "loss": 81615.375, + "step": 64470 + }, + { + "epoch": 0.13025367954524336, + "grad_norm": 103694.8046875, + "learning_rate": 9.97447404625906e-06, + "loss": 159331.4625, + "step": 64480 + }, + { + "epoch": 0.13027388017792718, + "grad_norm": 1154384.375, + "learning_rate": 9.974438807323907e-06, + "loss": 237679.3, + "step": 64490 + }, + { + "epoch": 0.130294080810611, + "grad_norm": 220411.21875, + "learning_rate": 9.974403544143942e-06, + "loss": 104187.1187, + "step": 64500 + }, + { + "epoch": 0.1303142814432948, + "grad_norm": 1412733.875, + "learning_rate": 9.974368256719335e-06, + "loss": 86626.6125, + "step": 64510 + }, + { + "epoch": 0.1303344820759786, + "grad_norm": 1092699.75, + "learning_rate": 9.974332945050263e-06, + "loss": 81380.85, + "step": 64520 + }, + { + "epoch": 0.13035468270866243, + "grad_norm": 100381.65625, + "learning_rate": 9.974297609136895e-06, + "loss": 58619.325, + "step": 64530 + }, + { + "epoch": 0.13037488334134625, + "grad_norm": 1160338.25, + "learning_rate": 9.974262248979402e-06, + "loss": 91750.6875, + "step": 64540 + }, + { + "epoch": 0.13039508397403007, + "grad_norm": 1405109.125, + "learning_rate": 9.97422686457796e-06, + "loss": 100650.5, + "step": 64550 + }, + { + "epoch": 0.1304152846067139, + "grad_norm": 45786.07421875, + "learning_rate": 9.97419145593274e-06, + "loss": 125384.7875, + "step": 64560 + }, + { + "epoch": 0.13043548523939769, + "grad_norm": 11543.4912109375, + "learning_rate": 9.974156023043912e-06, + "loss": 203710.175, + "step": 64570 + }, + { + "epoch": 0.1304556858720815, + "grad_norm": 3051886.25, + "learning_rate": 9.974120565911653e-06, + "loss": 172033.9, + "step": 64580 + }, + { + "epoch": 0.13047588650476533, + "grad_norm": 28174.861328125, + "learning_rate": 9.974085084536132e-06, + "loss": 36446.5125, + "step": 64590 + }, + { + "epoch": 0.13049608713744915, + "grad_norm": 2072.072998046875, + "learning_rate": 9.974049578917524e-06, + "loss": 65282.2375, + "step": 64600 + }, + { + "epoch": 0.13051628777013297, + "grad_norm": 676366.375, + "learning_rate": 9.974014049056003e-06, + "loss": 101978.7563, + "step": 64610 + }, + { + "epoch": 0.13053648840281679, + "grad_norm": 1104579.75, + "learning_rate": 9.973978494951739e-06, + "loss": 126570.85, + "step": 64620 + }, + { + "epoch": 0.13055668903550058, + "grad_norm": 84368.5390625, + "learning_rate": 9.973942916604907e-06, + "loss": 92931.8125, + "step": 64630 + }, + { + "epoch": 0.1305768896681844, + "grad_norm": 42361.9765625, + "learning_rate": 9.973907314015682e-06, + "loss": 147356.5875, + "step": 64640 + }, + { + "epoch": 0.13059709030086822, + "grad_norm": 258712.953125, + "learning_rate": 9.973871687184234e-06, + "loss": 130615.825, + "step": 64650 + }, + { + "epoch": 0.13061729093355204, + "grad_norm": 352953.84375, + "learning_rate": 9.97383603611074e-06, + "loss": 141610.175, + "step": 64660 + }, + { + "epoch": 0.13063749156623586, + "grad_norm": 423663.59375, + "learning_rate": 9.973800360795372e-06, + "loss": 138315.65, + "step": 64670 + }, + { + "epoch": 0.13065769219891968, + "grad_norm": 160334.234375, + "learning_rate": 9.973764661238306e-06, + "loss": 72020.75, + "step": 64680 + }, + { + "epoch": 0.1306778928316035, + "grad_norm": 755688.625, + "learning_rate": 9.973728937439714e-06, + "loss": 36975.5813, + "step": 64690 + }, + { + "epoch": 0.1306980934642873, + "grad_norm": 2107287.5, + "learning_rate": 9.973693189399767e-06, + "loss": 216223.1, + "step": 64700 + }, + { + "epoch": 0.1307182940969711, + "grad_norm": 206190.5625, + "learning_rate": 9.973657417118646e-06, + "loss": 156113.3875, + "step": 64710 + }, + { + "epoch": 0.13073849472965493, + "grad_norm": 0.0, + "learning_rate": 9.97362162059652e-06, + "loss": 277374.15, + "step": 64720 + }, + { + "epoch": 0.13075869536233875, + "grad_norm": 31620.52734375, + "learning_rate": 9.973585799833567e-06, + "loss": 77669.75, + "step": 64730 + }, + { + "epoch": 0.13077889599502257, + "grad_norm": 113916.40625, + "learning_rate": 9.97354995482996e-06, + "loss": 122452.475, + "step": 64740 + }, + { + "epoch": 0.1307990966277064, + "grad_norm": 163665.65625, + "learning_rate": 9.973514085585871e-06, + "loss": 171619.8625, + "step": 64750 + }, + { + "epoch": 0.13081929726039018, + "grad_norm": 204822.640625, + "learning_rate": 9.97347819210148e-06, + "loss": 108113.2375, + "step": 64760 + }, + { + "epoch": 0.130839497893074, + "grad_norm": 197261.375, + "learning_rate": 9.973442274376958e-06, + "loss": 84149.65, + "step": 64770 + }, + { + "epoch": 0.13085969852575782, + "grad_norm": 1022877.75, + "learning_rate": 9.973406332412484e-06, + "loss": 170240.775, + "step": 64780 + }, + { + "epoch": 0.13087989915844164, + "grad_norm": 294421.65625, + "learning_rate": 9.97337036620823e-06, + "loss": 267884.225, + "step": 64790 + }, + { + "epoch": 0.13090009979112546, + "grad_norm": 76547.8828125, + "learning_rate": 9.973334375764372e-06, + "loss": 147739.1, + "step": 64800 + }, + { + "epoch": 0.13092030042380928, + "grad_norm": 491162.71875, + "learning_rate": 9.973298361081083e-06, + "loss": 118284.6125, + "step": 64810 + }, + { + "epoch": 0.1309405010564931, + "grad_norm": 1439774.0, + "learning_rate": 9.973262322158544e-06, + "loss": 173299.95, + "step": 64820 + }, + { + "epoch": 0.1309607016891769, + "grad_norm": 707683.875, + "learning_rate": 9.973226258996926e-06, + "loss": 108611.8, + "step": 64830 + }, + { + "epoch": 0.13098090232186071, + "grad_norm": 273311.4375, + "learning_rate": 9.973190171596407e-06, + "loss": 94696.7375, + "step": 64840 + }, + { + "epoch": 0.13100110295454453, + "grad_norm": 628438.5625, + "learning_rate": 9.973154059957162e-06, + "loss": 111067.325, + "step": 64850 + }, + { + "epoch": 0.13102130358722835, + "grad_norm": 203633.546875, + "learning_rate": 9.973117924079367e-06, + "loss": 185192.15, + "step": 64860 + }, + { + "epoch": 0.13104150421991217, + "grad_norm": 216586.3125, + "learning_rate": 9.973081763963199e-06, + "loss": 120583.85, + "step": 64870 + }, + { + "epoch": 0.131061704852596, + "grad_norm": 260003.90625, + "learning_rate": 9.973045579608834e-06, + "loss": 229010.325, + "step": 64880 + }, + { + "epoch": 0.1310819054852798, + "grad_norm": 184936.1875, + "learning_rate": 9.973009371016447e-06, + "loss": 114599.5875, + "step": 64890 + }, + { + "epoch": 0.1311021061179636, + "grad_norm": 36259.98046875, + "learning_rate": 9.972973138186217e-06, + "loss": 79780.95, + "step": 64900 + }, + { + "epoch": 0.13112230675064743, + "grad_norm": 333074.78125, + "learning_rate": 9.972936881118318e-06, + "loss": 316941.0, + "step": 64910 + }, + { + "epoch": 0.13114250738333125, + "grad_norm": 645815.0, + "learning_rate": 9.972900599812928e-06, + "loss": 187238.0125, + "step": 64920 + }, + { + "epoch": 0.13116270801601507, + "grad_norm": 491729.8125, + "learning_rate": 9.972864294270224e-06, + "loss": 201768.025, + "step": 64930 + }, + { + "epoch": 0.1311829086486989, + "grad_norm": 256029.078125, + "learning_rate": 9.972827964490382e-06, + "loss": 116998.05, + "step": 64940 + }, + { + "epoch": 0.13120310928138268, + "grad_norm": 23070.6953125, + "learning_rate": 9.972791610473578e-06, + "loss": 107293.4125, + "step": 64950 + }, + { + "epoch": 0.1312233099140665, + "grad_norm": 230621.71875, + "learning_rate": 9.972755232219992e-06, + "loss": 109330.3, + "step": 64960 + }, + { + "epoch": 0.13124351054675032, + "grad_norm": 352055.875, + "learning_rate": 9.972718829729802e-06, + "loss": 125655.2875, + "step": 64970 + }, + { + "epoch": 0.13126371117943414, + "grad_norm": 2310907.0, + "learning_rate": 9.972682403003182e-06, + "loss": 176715.175, + "step": 64980 + }, + { + "epoch": 0.13128391181211796, + "grad_norm": 0.0, + "learning_rate": 9.972645952040311e-06, + "loss": 156120.05, + "step": 64990 + }, + { + "epoch": 0.13130411244480178, + "grad_norm": 703257.0625, + "learning_rate": 9.972609476841368e-06, + "loss": 115490.325, + "step": 65000 + }, + { + "epoch": 0.1313243130774856, + "grad_norm": 268133.375, + "learning_rate": 9.972572977406527e-06, + "loss": 62151.925, + "step": 65010 + }, + { + "epoch": 0.1313445137101694, + "grad_norm": 413875.21875, + "learning_rate": 9.97253645373597e-06, + "loss": 350279.525, + "step": 65020 + }, + { + "epoch": 0.1313647143428532, + "grad_norm": 170360.5625, + "learning_rate": 9.972499905829874e-06, + "loss": 122046.475, + "step": 65030 + }, + { + "epoch": 0.13138491497553703, + "grad_norm": 330482.0, + "learning_rate": 9.972463333688416e-06, + "loss": 96947.725, + "step": 65040 + }, + { + "epoch": 0.13140511560822085, + "grad_norm": 336456.25, + "learning_rate": 9.972426737311775e-06, + "loss": 231693.65, + "step": 65050 + }, + { + "epoch": 0.13142531624090467, + "grad_norm": 768995.4375, + "learning_rate": 9.972390116700128e-06, + "loss": 168514.425, + "step": 65060 + }, + { + "epoch": 0.1314455168735885, + "grad_norm": 189391.921875, + "learning_rate": 9.972353471853655e-06, + "loss": 103854.2375, + "step": 65070 + }, + { + "epoch": 0.13146571750627228, + "grad_norm": 152389.625, + "learning_rate": 9.972316802772536e-06, + "loss": 134103.825, + "step": 65080 + }, + { + "epoch": 0.1314859181389561, + "grad_norm": 67763.8515625, + "learning_rate": 9.972280109456946e-06, + "loss": 82811.1187, + "step": 65090 + }, + { + "epoch": 0.13150611877163992, + "grad_norm": 815081.4375, + "learning_rate": 9.972243391907068e-06, + "loss": 101754.0562, + "step": 65100 + }, + { + "epoch": 0.13152631940432374, + "grad_norm": 14914.70703125, + "learning_rate": 9.972206650123077e-06, + "loss": 104565.1125, + "step": 65110 + }, + { + "epoch": 0.13154652003700756, + "grad_norm": 620242.3125, + "learning_rate": 9.972169884105155e-06, + "loss": 237228.225, + "step": 65120 + }, + { + "epoch": 0.13156672066969138, + "grad_norm": 281326.5, + "learning_rate": 9.972133093853477e-06, + "loss": 155377.2875, + "step": 65130 + }, + { + "epoch": 0.1315869213023752, + "grad_norm": 141046.34375, + "learning_rate": 9.972096279368228e-06, + "loss": 73679.1875, + "step": 65140 + }, + { + "epoch": 0.131607121935059, + "grad_norm": 778653.4375, + "learning_rate": 9.972059440649584e-06, + "loss": 68323.5625, + "step": 65150 + }, + { + "epoch": 0.13162732256774282, + "grad_norm": 55854.37890625, + "learning_rate": 9.972022577697726e-06, + "loss": 86161.1187, + "step": 65160 + }, + { + "epoch": 0.13164752320042664, + "grad_norm": 457945.3125, + "learning_rate": 9.971985690512834e-06, + "loss": 302203.3, + "step": 65170 + }, + { + "epoch": 0.13166772383311046, + "grad_norm": 58150.67578125, + "learning_rate": 9.971948779095084e-06, + "loss": 73049.5688, + "step": 65180 + }, + { + "epoch": 0.13168792446579428, + "grad_norm": 189164.125, + "learning_rate": 9.97191184344466e-06, + "loss": 90928.2125, + "step": 65190 + }, + { + "epoch": 0.1317081250984781, + "grad_norm": 391511.84375, + "learning_rate": 9.97187488356174e-06, + "loss": 248526.575, + "step": 65200 + }, + { + "epoch": 0.1317283257311619, + "grad_norm": 143280.890625, + "learning_rate": 9.971837899446505e-06, + "loss": 64862.9062, + "step": 65210 + }, + { + "epoch": 0.1317485263638457, + "grad_norm": 49619.47265625, + "learning_rate": 9.971800891099137e-06, + "loss": 274534.45, + "step": 65220 + }, + { + "epoch": 0.13176872699652953, + "grad_norm": 48035.5546875, + "learning_rate": 9.971763858519812e-06, + "loss": 95315.5938, + "step": 65230 + }, + { + "epoch": 0.13178892762921335, + "grad_norm": 586782.4375, + "learning_rate": 9.971726801708715e-06, + "loss": 178974.4125, + "step": 65240 + }, + { + "epoch": 0.13180912826189717, + "grad_norm": 27330.39453125, + "learning_rate": 9.971689720666024e-06, + "loss": 101643.5437, + "step": 65250 + }, + { + "epoch": 0.131829328894581, + "grad_norm": 30493.4375, + "learning_rate": 9.97165261539192e-06, + "loss": 225844.65, + "step": 65260 + }, + { + "epoch": 0.13184952952726478, + "grad_norm": 240225.640625, + "learning_rate": 9.971615485886583e-06, + "loss": 150262.175, + "step": 65270 + }, + { + "epoch": 0.1318697301599486, + "grad_norm": 19291.060546875, + "learning_rate": 9.971578332150197e-06, + "loss": 211334.5, + "step": 65280 + }, + { + "epoch": 0.13188993079263242, + "grad_norm": 959471.625, + "learning_rate": 9.97154115418294e-06, + "loss": 132152.7, + "step": 65290 + }, + { + "epoch": 0.13191013142531624, + "grad_norm": 168209.625, + "learning_rate": 9.971503951984996e-06, + "loss": 112009.7, + "step": 65300 + }, + { + "epoch": 0.13193033205800006, + "grad_norm": 568261.5, + "learning_rate": 9.971466725556542e-06, + "loss": 122246.85, + "step": 65310 + }, + { + "epoch": 0.13195053269068388, + "grad_norm": 1735981.375, + "learning_rate": 9.971429474897765e-06, + "loss": 226186.85, + "step": 65320 + }, + { + "epoch": 0.1319707333233677, + "grad_norm": 115922.671875, + "learning_rate": 9.971392200008842e-06, + "loss": 46200.3719, + "step": 65330 + }, + { + "epoch": 0.1319909339560515, + "grad_norm": 88104.484375, + "learning_rate": 9.971354900889955e-06, + "loss": 149073.45, + "step": 65340 + }, + { + "epoch": 0.1320111345887353, + "grad_norm": 1445703.25, + "learning_rate": 9.97131757754129e-06, + "loss": 41539.725, + "step": 65350 + }, + { + "epoch": 0.13203133522141913, + "grad_norm": 428890.1875, + "learning_rate": 9.971280229963026e-06, + "loss": 156059.85, + "step": 65360 + }, + { + "epoch": 0.13205153585410295, + "grad_norm": 1763885.0, + "learning_rate": 9.971242858155344e-06, + "loss": 104664.225, + "step": 65370 + }, + { + "epoch": 0.13207173648678677, + "grad_norm": 112791.78125, + "learning_rate": 9.971205462118427e-06, + "loss": 96335.6125, + "step": 65380 + }, + { + "epoch": 0.1320919371194706, + "grad_norm": 79692.1328125, + "learning_rate": 9.971168041852456e-06, + "loss": 142543.15, + "step": 65390 + }, + { + "epoch": 0.13211213775215438, + "grad_norm": 2905240.5, + "learning_rate": 9.971130597357618e-06, + "loss": 139786.35, + "step": 65400 + }, + { + "epoch": 0.1321323383848382, + "grad_norm": 167498.375, + "learning_rate": 9.97109312863409e-06, + "loss": 67975.6687, + "step": 65410 + }, + { + "epoch": 0.13215253901752202, + "grad_norm": 1136204.75, + "learning_rate": 9.971055635682059e-06, + "loss": 276198.375, + "step": 65420 + }, + { + "epoch": 0.13217273965020584, + "grad_norm": 29209.39453125, + "learning_rate": 9.971018118501706e-06, + "loss": 238268.425, + "step": 65430 + }, + { + "epoch": 0.13219294028288966, + "grad_norm": 1710943.875, + "learning_rate": 9.970980577093212e-06, + "loss": 173636.85, + "step": 65440 + }, + { + "epoch": 0.13221314091557348, + "grad_norm": 27089.328125, + "learning_rate": 9.970943011456762e-06, + "loss": 28917.175, + "step": 65450 + }, + { + "epoch": 0.1322333415482573, + "grad_norm": 47204.9453125, + "learning_rate": 9.970905421592538e-06, + "loss": 138600.3875, + "step": 65460 + }, + { + "epoch": 0.1322535421809411, + "grad_norm": 303756.3125, + "learning_rate": 9.970867807500725e-06, + "loss": 72423.2812, + "step": 65470 + }, + { + "epoch": 0.13227374281362492, + "grad_norm": 140765.515625, + "learning_rate": 9.970830169181504e-06, + "loss": 191932.2, + "step": 65480 + }, + { + "epoch": 0.13229394344630874, + "grad_norm": 93403.46875, + "learning_rate": 9.97079250663506e-06, + "loss": 63620.5312, + "step": 65490 + }, + { + "epoch": 0.13231414407899256, + "grad_norm": 273634.125, + "learning_rate": 9.970754819861577e-06, + "loss": 172106.85, + "step": 65500 + }, + { + "epoch": 0.13233434471167638, + "grad_norm": 137596.484375, + "learning_rate": 9.97071710886124e-06, + "loss": 338658.775, + "step": 65510 + }, + { + "epoch": 0.1323545453443602, + "grad_norm": 28995.369140625, + "learning_rate": 9.970679373634227e-06, + "loss": 63374.7312, + "step": 65520 + }, + { + "epoch": 0.132374745977044, + "grad_norm": 155476.5625, + "learning_rate": 9.970641614180727e-06, + "loss": 173207.0875, + "step": 65530 + }, + { + "epoch": 0.1323949466097278, + "grad_norm": 387166.9375, + "learning_rate": 9.970603830500923e-06, + "loss": 78762.7563, + "step": 65540 + }, + { + "epoch": 0.13241514724241163, + "grad_norm": 14344.697265625, + "learning_rate": 9.970566022594996e-06, + "loss": 36076.9563, + "step": 65550 + }, + { + "epoch": 0.13243534787509545, + "grad_norm": 571467.0, + "learning_rate": 9.970528190463136e-06, + "loss": 62434.2875, + "step": 65560 + }, + { + "epoch": 0.13245554850777927, + "grad_norm": 41178.89453125, + "learning_rate": 9.970490334105525e-06, + "loss": 142721.8625, + "step": 65570 + }, + { + "epoch": 0.1324757491404631, + "grad_norm": 1251100.5, + "learning_rate": 9.970452453522344e-06, + "loss": 168810.975, + "step": 65580 + }, + { + "epoch": 0.13249594977314688, + "grad_norm": 78589.546875, + "learning_rate": 9.970414548713783e-06, + "loss": 101415.1375, + "step": 65590 + }, + { + "epoch": 0.1325161504058307, + "grad_norm": 47345.25, + "learning_rate": 9.970376619680024e-06, + "loss": 218556.2, + "step": 65600 + }, + { + "epoch": 0.13253635103851452, + "grad_norm": 799415.625, + "learning_rate": 9.970338666421251e-06, + "loss": 84520.975, + "step": 65610 + }, + { + "epoch": 0.13255655167119834, + "grad_norm": 829490.125, + "learning_rate": 9.970300688937651e-06, + "loss": 169399.35, + "step": 65620 + }, + { + "epoch": 0.13257675230388216, + "grad_norm": 531388.5, + "learning_rate": 9.970262687229409e-06, + "loss": 146217.5875, + "step": 65630 + }, + { + "epoch": 0.13259695293656598, + "grad_norm": 19279.92578125, + "learning_rate": 9.970224661296708e-06, + "loss": 75551.55, + "step": 65640 + }, + { + "epoch": 0.1326171535692498, + "grad_norm": 675708.3125, + "learning_rate": 9.970186611139736e-06, + "loss": 38421.2063, + "step": 65650 + }, + { + "epoch": 0.1326373542019336, + "grad_norm": 23280.177734375, + "learning_rate": 9.970148536758678e-06, + "loss": 221557.925, + "step": 65660 + }, + { + "epoch": 0.1326575548346174, + "grad_norm": 76444.4609375, + "learning_rate": 9.970110438153717e-06, + "loss": 143594.4625, + "step": 65670 + }, + { + "epoch": 0.13267775546730123, + "grad_norm": 190040.71875, + "learning_rate": 9.970072315325041e-06, + "loss": 146698.45, + "step": 65680 + }, + { + "epoch": 0.13269795609998505, + "grad_norm": 368119.9375, + "learning_rate": 9.970034168272835e-06, + "loss": 252733.0, + "step": 65690 + }, + { + "epoch": 0.13271815673266887, + "grad_norm": 74549.484375, + "learning_rate": 9.969995996997285e-06, + "loss": 225786.425, + "step": 65700 + }, + { + "epoch": 0.1327383573653527, + "grad_norm": 266288.78125, + "learning_rate": 9.96995780149858e-06, + "loss": 62221.2375, + "step": 65710 + }, + { + "epoch": 0.13275855799803649, + "grad_norm": 82135.203125, + "learning_rate": 9.969919581776902e-06, + "loss": 202892.975, + "step": 65720 + }, + { + "epoch": 0.1327787586307203, + "grad_norm": 2861755.75, + "learning_rate": 9.969881337832437e-06, + "loss": 269379.2, + "step": 65730 + }, + { + "epoch": 0.13279895926340413, + "grad_norm": 150759.5, + "learning_rate": 9.969843069665375e-06, + "loss": 98926.4125, + "step": 65740 + }, + { + "epoch": 0.13281915989608795, + "grad_norm": 286313.125, + "learning_rate": 9.9698047772759e-06, + "loss": 110974.65, + "step": 65750 + }, + { + "epoch": 0.13283936052877177, + "grad_norm": 491821.0625, + "learning_rate": 9.969766460664199e-06, + "loss": 46628.6469, + "step": 65760 + }, + { + "epoch": 0.13285956116145559, + "grad_norm": 192573.625, + "learning_rate": 9.96972811983046e-06, + "loss": 106619.4375, + "step": 65770 + }, + { + "epoch": 0.1328797617941394, + "grad_norm": 1003719.5625, + "learning_rate": 9.969689754774868e-06, + "loss": 201662.5875, + "step": 65780 + }, + { + "epoch": 0.1328999624268232, + "grad_norm": 16178.5107421875, + "learning_rate": 9.96965136549761e-06, + "loss": 165981.025, + "step": 65790 + }, + { + "epoch": 0.13292016305950702, + "grad_norm": 29063.146484375, + "learning_rate": 9.969612951998874e-06, + "loss": 35206.0031, + "step": 65800 + }, + { + "epoch": 0.13294036369219084, + "grad_norm": 51289.8125, + "learning_rate": 9.96957451427885e-06, + "loss": 57381.7375, + "step": 65810 + }, + { + "epoch": 0.13296056432487466, + "grad_norm": 831895.0625, + "learning_rate": 9.96953605233772e-06, + "loss": 133195.5, + "step": 65820 + }, + { + "epoch": 0.13298076495755848, + "grad_norm": 376037.4375, + "learning_rate": 9.969497566175675e-06, + "loss": 75356.575, + "step": 65830 + }, + { + "epoch": 0.1330009655902423, + "grad_norm": 522393.875, + "learning_rate": 9.969459055792903e-06, + "loss": 209246.675, + "step": 65840 + }, + { + "epoch": 0.1330211662229261, + "grad_norm": 1699957.875, + "learning_rate": 9.969420521189587e-06, + "loss": 266032.6, + "step": 65850 + }, + { + "epoch": 0.1330413668556099, + "grad_norm": 498373.0, + "learning_rate": 9.96938196236592e-06, + "loss": 93203.675, + "step": 65860 + }, + { + "epoch": 0.13306156748829373, + "grad_norm": 391688.5, + "learning_rate": 9.96934337932209e-06, + "loss": 153043.1, + "step": 65870 + }, + { + "epoch": 0.13308176812097755, + "grad_norm": 81833.1484375, + "learning_rate": 9.969304772058279e-06, + "loss": 150415.675, + "step": 65880 + }, + { + "epoch": 0.13310196875366137, + "grad_norm": 196333.5, + "learning_rate": 9.969266140574682e-06, + "loss": 72733.3313, + "step": 65890 + }, + { + "epoch": 0.1331221693863452, + "grad_norm": 43209.87890625, + "learning_rate": 9.969227484871485e-06, + "loss": 77718.5063, + "step": 65900 + }, + { + "epoch": 0.13314237001902898, + "grad_norm": 27018.03125, + "learning_rate": 9.969188804948872e-06, + "loss": 182022.3, + "step": 65910 + }, + { + "epoch": 0.1331625706517128, + "grad_norm": 195529.828125, + "learning_rate": 9.969150100807039e-06, + "loss": 107861.45, + "step": 65920 + }, + { + "epoch": 0.13318277128439662, + "grad_norm": 14746.296875, + "learning_rate": 9.969111372446171e-06, + "loss": 124500.4625, + "step": 65930 + }, + { + "epoch": 0.13320297191708044, + "grad_norm": 25266.5546875, + "learning_rate": 9.969072619866455e-06, + "loss": 96094.05, + "step": 65940 + }, + { + "epoch": 0.13322317254976426, + "grad_norm": 321365.90625, + "learning_rate": 9.969033843068083e-06, + "loss": 69554.1687, + "step": 65950 + }, + { + "epoch": 0.13324337318244808, + "grad_norm": 6679.9599609375, + "learning_rate": 9.968995042051244e-06, + "loss": 158282.7125, + "step": 65960 + }, + { + "epoch": 0.1332635738151319, + "grad_norm": 18952.111328125, + "learning_rate": 9.968956216816123e-06, + "loss": 86275.2063, + "step": 65970 + }, + { + "epoch": 0.1332837744478157, + "grad_norm": 20095.328125, + "learning_rate": 9.968917367362914e-06, + "loss": 68401.8062, + "step": 65980 + }, + { + "epoch": 0.13330397508049951, + "grad_norm": 50211.92578125, + "learning_rate": 9.968878493691803e-06, + "loss": 101314.9937, + "step": 65990 + }, + { + "epoch": 0.13332417571318333, + "grad_norm": 190886.984375, + "learning_rate": 9.968839595802982e-06, + "loss": 222201.175, + "step": 66000 + }, + { + "epoch": 0.13334437634586715, + "grad_norm": 53883.4140625, + "learning_rate": 9.968800673696638e-06, + "loss": 180523.35, + "step": 66010 + }, + { + "epoch": 0.13336457697855097, + "grad_norm": 221872.578125, + "learning_rate": 9.968761727372965e-06, + "loss": 87034.425, + "step": 66020 + }, + { + "epoch": 0.1333847776112348, + "grad_norm": 1052492.625, + "learning_rate": 9.968722756832148e-06, + "loss": 85252.7875, + "step": 66030 + }, + { + "epoch": 0.1334049782439186, + "grad_norm": 40201.53515625, + "learning_rate": 9.96868376207438e-06, + "loss": 99571.4625, + "step": 66040 + }, + { + "epoch": 0.1334251788766024, + "grad_norm": 845035.0, + "learning_rate": 9.968644743099848e-06, + "loss": 52596.6438, + "step": 66050 + }, + { + "epoch": 0.13344537950928623, + "grad_norm": 803195.875, + "learning_rate": 9.968605699908747e-06, + "loss": 89033.5125, + "step": 66060 + }, + { + "epoch": 0.13346558014197005, + "grad_norm": 174373.15625, + "learning_rate": 9.968566632501262e-06, + "loss": 201745.675, + "step": 66070 + }, + { + "epoch": 0.13348578077465387, + "grad_norm": 12314.66015625, + "learning_rate": 9.968527540877586e-06, + "loss": 208499.8125, + "step": 66080 + }, + { + "epoch": 0.1335059814073377, + "grad_norm": 316363.09375, + "learning_rate": 9.96848842503791e-06, + "loss": 176575.175, + "step": 66090 + }, + { + "epoch": 0.1335261820400215, + "grad_norm": 15845.5498046875, + "learning_rate": 9.968449284982424e-06, + "loss": 134993.0625, + "step": 66100 + }, + { + "epoch": 0.1335463826727053, + "grad_norm": 475227.09375, + "learning_rate": 9.968410120711321e-06, + "loss": 100682.6687, + "step": 66110 + }, + { + "epoch": 0.13356658330538912, + "grad_norm": 1710030.75, + "learning_rate": 9.968370932224787e-06, + "loss": 181717.0625, + "step": 66120 + }, + { + "epoch": 0.13358678393807294, + "grad_norm": 661518.0625, + "learning_rate": 9.968331719523015e-06, + "loss": 139571.225, + "step": 66130 + }, + { + "epoch": 0.13360698457075676, + "grad_norm": 333198.09375, + "learning_rate": 9.968292482606199e-06, + "loss": 216882.8, + "step": 66140 + }, + { + "epoch": 0.13362718520344058, + "grad_norm": 1564043.375, + "learning_rate": 9.968253221474527e-06, + "loss": 250229.35, + "step": 66150 + }, + { + "epoch": 0.1336473858361244, + "grad_norm": 674014.1875, + "learning_rate": 9.96821393612819e-06, + "loss": 153545.6875, + "step": 66160 + }, + { + "epoch": 0.1336675864688082, + "grad_norm": 1881.436279296875, + "learning_rate": 9.968174626567382e-06, + "loss": 73236.6438, + "step": 66170 + }, + { + "epoch": 0.133687787101492, + "grad_norm": 64687.28515625, + "learning_rate": 9.968135292792294e-06, + "loss": 238342.475, + "step": 66180 + }, + { + "epoch": 0.13370798773417583, + "grad_norm": 867157.875, + "learning_rate": 9.968095934803116e-06, + "loss": 222422.4, + "step": 66190 + }, + { + "epoch": 0.13372818836685965, + "grad_norm": 79669.3828125, + "learning_rate": 9.968056552600043e-06, + "loss": 181614.5625, + "step": 66200 + }, + { + "epoch": 0.13374838899954347, + "grad_norm": 28597.478515625, + "learning_rate": 9.968017146183263e-06, + "loss": 122493.575, + "step": 66210 + }, + { + "epoch": 0.1337685896322273, + "grad_norm": 54452.20703125, + "learning_rate": 9.967977715552972e-06, + "loss": 112629.975, + "step": 66220 + }, + { + "epoch": 0.13378879026491108, + "grad_norm": 441838.90625, + "learning_rate": 9.967938260709357e-06, + "loss": 142056.5875, + "step": 66230 + }, + { + "epoch": 0.1338089908975949, + "grad_norm": 304200.53125, + "learning_rate": 9.967898781652616e-06, + "loss": 56735.3125, + "step": 66240 + }, + { + "epoch": 0.13382919153027872, + "grad_norm": 1316663.375, + "learning_rate": 9.967859278382939e-06, + "loss": 122155.375, + "step": 66250 + }, + { + "epoch": 0.13384939216296254, + "grad_norm": 3584.92529296875, + "learning_rate": 9.967819750900517e-06, + "loss": 119784.3875, + "step": 66260 + }, + { + "epoch": 0.13386959279564636, + "grad_norm": 32971.44140625, + "learning_rate": 9.967780199205544e-06, + "loss": 158110.0125, + "step": 66270 + }, + { + "epoch": 0.13388979342833018, + "grad_norm": 169440.0625, + "learning_rate": 9.967740623298214e-06, + "loss": 79669.0562, + "step": 66280 + }, + { + "epoch": 0.133909994061014, + "grad_norm": 20991.890625, + "learning_rate": 9.967701023178717e-06, + "loss": 79127.8625, + "step": 66290 + }, + { + "epoch": 0.1339301946936978, + "grad_norm": 801698.75, + "learning_rate": 9.96766139884725e-06, + "loss": 184688.05, + "step": 66300 + }, + { + "epoch": 0.13395039532638162, + "grad_norm": 4787509.5, + "learning_rate": 9.967621750304002e-06, + "loss": 183780.425, + "step": 66310 + }, + { + "epoch": 0.13397059595906544, + "grad_norm": 41274.18359375, + "learning_rate": 9.96758207754917e-06, + "loss": 98209.4625, + "step": 66320 + }, + { + "epoch": 0.13399079659174926, + "grad_norm": 28159.818359375, + "learning_rate": 9.967542380582944e-06, + "loss": 162746.8, + "step": 66330 + }, + { + "epoch": 0.13401099722443308, + "grad_norm": 699540.75, + "learning_rate": 9.96750265940552e-06, + "loss": 122652.2875, + "step": 66340 + }, + { + "epoch": 0.1340311978571169, + "grad_norm": 571930.0625, + "learning_rate": 9.967462914017087e-06, + "loss": 79969.8375, + "step": 66350 + }, + { + "epoch": 0.1340513984898007, + "grad_norm": 156251.5, + "learning_rate": 9.967423144417847e-06, + "loss": 77465.9563, + "step": 66360 + }, + { + "epoch": 0.1340715991224845, + "grad_norm": 178480.078125, + "learning_rate": 9.967383350607986e-06, + "loss": 164861.3, + "step": 66370 + }, + { + "epoch": 0.13409179975516833, + "grad_norm": 307910.5625, + "learning_rate": 9.967343532587701e-06, + "loss": 84667.2688, + "step": 66380 + }, + { + "epoch": 0.13411200038785215, + "grad_norm": 55085.66796875, + "learning_rate": 9.967303690357189e-06, + "loss": 156246.35, + "step": 66390 + }, + { + "epoch": 0.13413220102053597, + "grad_norm": 15062.4697265625, + "learning_rate": 9.967263823916638e-06, + "loss": 101374.6125, + "step": 66400 + }, + { + "epoch": 0.1341524016532198, + "grad_norm": 98526.1796875, + "learning_rate": 9.967223933266247e-06, + "loss": 85761.2312, + "step": 66410 + }, + { + "epoch": 0.1341726022859036, + "grad_norm": 385037.3125, + "learning_rate": 9.96718401840621e-06, + "loss": 159209.825, + "step": 66420 + }, + { + "epoch": 0.1341928029185874, + "grad_norm": 16247.3681640625, + "learning_rate": 9.96714407933672e-06, + "loss": 87070.6875, + "step": 66430 + }, + { + "epoch": 0.13421300355127122, + "grad_norm": 23204.943359375, + "learning_rate": 9.96710411605797e-06, + "loss": 58646.5062, + "step": 66440 + }, + { + "epoch": 0.13423320418395504, + "grad_norm": 222361.75, + "learning_rate": 9.96706412857016e-06, + "loss": 141698.05, + "step": 66450 + }, + { + "epoch": 0.13425340481663886, + "grad_norm": 86887.78125, + "learning_rate": 9.967024116873481e-06, + "loss": 56836.1375, + "step": 66460 + }, + { + "epoch": 0.13427360544932268, + "grad_norm": 504300.625, + "learning_rate": 9.966984080968128e-06, + "loss": 144197.2625, + "step": 66470 + }, + { + "epoch": 0.1342938060820065, + "grad_norm": 282534.1875, + "learning_rate": 9.966944020854297e-06, + "loss": 153241.6125, + "step": 66480 + }, + { + "epoch": 0.1343140067146903, + "grad_norm": 124177.7890625, + "learning_rate": 9.966903936532184e-06, + "loss": 83502.7375, + "step": 66490 + }, + { + "epoch": 0.1343342073473741, + "grad_norm": 738338.25, + "learning_rate": 9.966863828001982e-06, + "loss": 98937.7312, + "step": 66500 + }, + { + "epoch": 0.13435440798005793, + "grad_norm": 20885.21484375, + "learning_rate": 9.96682369526389e-06, + "loss": 57930.9187, + "step": 66510 + }, + { + "epoch": 0.13437460861274175, + "grad_norm": 1397773.75, + "learning_rate": 9.966783538318101e-06, + "loss": 152032.225, + "step": 66520 + }, + { + "epoch": 0.13439480924542557, + "grad_norm": 111810.03125, + "learning_rate": 9.966743357164812e-06, + "loss": 139936.6625, + "step": 66530 + }, + { + "epoch": 0.1344150098781094, + "grad_norm": 255498.953125, + "learning_rate": 9.966703151804219e-06, + "loss": 81011.3562, + "step": 66540 + }, + { + "epoch": 0.13443521051079318, + "grad_norm": 208756.765625, + "learning_rate": 9.966662922236515e-06, + "loss": 165127.15, + "step": 66550 + }, + { + "epoch": 0.134455411143477, + "grad_norm": 114651.75, + "learning_rate": 9.966622668461899e-06, + "loss": 222509.925, + "step": 66560 + }, + { + "epoch": 0.13447561177616082, + "grad_norm": 1007884.125, + "learning_rate": 9.966582390480567e-06, + "loss": 70709.9312, + "step": 66570 + }, + { + "epoch": 0.13449581240884464, + "grad_norm": 115306.5, + "learning_rate": 9.966542088292714e-06, + "loss": 74139.9812, + "step": 66580 + }, + { + "epoch": 0.13451601304152846, + "grad_norm": 134643.9375, + "learning_rate": 9.96650176189854e-06, + "loss": 106285.9375, + "step": 66590 + }, + { + "epoch": 0.13453621367421228, + "grad_norm": 167766.921875, + "learning_rate": 9.966461411298235e-06, + "loss": 158602.675, + "step": 66600 + }, + { + "epoch": 0.1345564143068961, + "grad_norm": 107901.765625, + "learning_rate": 9.966421036492003e-06, + "loss": 91147.2188, + "step": 66610 + }, + { + "epoch": 0.1345766149395799, + "grad_norm": 1202555.0, + "learning_rate": 9.966380637480034e-06, + "loss": 219488.1, + "step": 66620 + }, + { + "epoch": 0.13459681557226372, + "grad_norm": 91616.3359375, + "learning_rate": 9.96634021426253e-06, + "loss": 147875.1375, + "step": 66630 + }, + { + "epoch": 0.13461701620494754, + "grad_norm": 120125.75, + "learning_rate": 9.966299766839685e-06, + "loss": 257301.175, + "step": 66640 + }, + { + "epoch": 0.13463721683763136, + "grad_norm": 371068.34375, + "learning_rate": 9.966259295211698e-06, + "loss": 83897.075, + "step": 66650 + }, + { + "epoch": 0.13465741747031518, + "grad_norm": 7784.23583984375, + "learning_rate": 9.966218799378766e-06, + "loss": 80239.0125, + "step": 66660 + }, + { + "epoch": 0.134677618102999, + "grad_norm": 100973.4609375, + "learning_rate": 9.966178279341084e-06, + "loss": 181774.6, + "step": 66670 + }, + { + "epoch": 0.1346978187356828, + "grad_norm": 198220.515625, + "learning_rate": 9.966137735098853e-06, + "loss": 195954.875, + "step": 66680 + }, + { + "epoch": 0.1347180193683666, + "grad_norm": 96966.0546875, + "learning_rate": 9.966097166652268e-06, + "loss": 147084.9875, + "step": 66690 + }, + { + "epoch": 0.13473822000105043, + "grad_norm": 25627.4375, + "learning_rate": 9.966056574001528e-06, + "loss": 79542.6812, + "step": 66700 + }, + { + "epoch": 0.13475842063373425, + "grad_norm": 24844.685546875, + "learning_rate": 9.966015957146832e-06, + "loss": 156720.9375, + "step": 66710 + }, + { + "epoch": 0.13477862126641807, + "grad_norm": 20096.47265625, + "learning_rate": 9.965975316088377e-06, + "loss": 91086.5625, + "step": 66720 + }, + { + "epoch": 0.1347988218991019, + "grad_norm": 57861.27734375, + "learning_rate": 9.96593465082636e-06, + "loss": 79148.9875, + "step": 66730 + }, + { + "epoch": 0.1348190225317857, + "grad_norm": 8298.0595703125, + "learning_rate": 9.965893961360977e-06, + "loss": 39854.1844, + "step": 66740 + }, + { + "epoch": 0.1348392231644695, + "grad_norm": 26466.072265625, + "learning_rate": 9.965853247692433e-06, + "loss": 77049.8813, + "step": 66750 + }, + { + "epoch": 0.13485942379715332, + "grad_norm": 240324.921875, + "learning_rate": 9.965812509820918e-06, + "loss": 43157.0344, + "step": 66760 + }, + { + "epoch": 0.13487962442983714, + "grad_norm": 853778.25, + "learning_rate": 9.965771747746638e-06, + "loss": 163069.1375, + "step": 66770 + }, + { + "epoch": 0.13489982506252096, + "grad_norm": 85444.84375, + "learning_rate": 9.96573096146979e-06, + "loss": 51197.4719, + "step": 66780 + }, + { + "epoch": 0.13492002569520478, + "grad_norm": 125981.8359375, + "learning_rate": 9.96569015099057e-06, + "loss": 66551.4937, + "step": 66790 + }, + { + "epoch": 0.1349402263278886, + "grad_norm": 343064.53125, + "learning_rate": 9.965649316309178e-06, + "loss": 44119.1781, + "step": 66800 + }, + { + "epoch": 0.1349604269605724, + "grad_norm": 259660.765625, + "learning_rate": 9.965608457425813e-06, + "loss": 188389.9875, + "step": 66810 + }, + { + "epoch": 0.1349806275932562, + "grad_norm": 110213.921875, + "learning_rate": 9.965567574340676e-06, + "loss": 137566.3875, + "step": 66820 + }, + { + "epoch": 0.13500082822594003, + "grad_norm": 461976.46875, + "learning_rate": 9.965526667053964e-06, + "loss": 112677.4625, + "step": 66830 + }, + { + "epoch": 0.13502102885862385, + "grad_norm": 3783103.75, + "learning_rate": 9.965485735565878e-06, + "loss": 158741.6, + "step": 66840 + }, + { + "epoch": 0.13504122949130767, + "grad_norm": 1679771.75, + "learning_rate": 9.965444779876618e-06, + "loss": 163754.0875, + "step": 66850 + }, + { + "epoch": 0.1350614301239915, + "grad_norm": 310036.34375, + "learning_rate": 9.96540379998638e-06, + "loss": 170320.5125, + "step": 66860 + }, + { + "epoch": 0.13508163075667529, + "grad_norm": 15797.541015625, + "learning_rate": 9.965362795895368e-06, + "loss": 155196.1125, + "step": 66870 + }, + { + "epoch": 0.1351018313893591, + "grad_norm": 36598.8359375, + "learning_rate": 9.965321767603778e-06, + "loss": 121574.65, + "step": 66880 + }, + { + "epoch": 0.13512203202204293, + "grad_norm": 104181.859375, + "learning_rate": 9.965280715111814e-06, + "loss": 114771.6, + "step": 66890 + }, + { + "epoch": 0.13514223265472675, + "grad_norm": 106769.0546875, + "learning_rate": 9.965239638419673e-06, + "loss": 228788.35, + "step": 66900 + }, + { + "epoch": 0.13516243328741057, + "grad_norm": 152333.734375, + "learning_rate": 9.965198537527556e-06, + "loss": 138939.5875, + "step": 66910 + }, + { + "epoch": 0.13518263392009439, + "grad_norm": 1237410.0, + "learning_rate": 9.965157412435663e-06, + "loss": 161930.525, + "step": 66920 + }, + { + "epoch": 0.1352028345527782, + "grad_norm": 416093.5, + "learning_rate": 9.965116263144196e-06, + "loss": 126365.5875, + "step": 66930 + }, + { + "epoch": 0.135223035185462, + "grad_norm": 2060239.75, + "learning_rate": 9.965075089653354e-06, + "loss": 237434.9, + "step": 66940 + }, + { + "epoch": 0.13524323581814582, + "grad_norm": 719004.6875, + "learning_rate": 9.965033891963338e-06, + "loss": 104873.75, + "step": 66950 + }, + { + "epoch": 0.13526343645082964, + "grad_norm": 1773.2176513671875, + "learning_rate": 9.96499267007435e-06, + "loss": 95500.1438, + "step": 66960 + }, + { + "epoch": 0.13528363708351346, + "grad_norm": 10993.423828125, + "learning_rate": 9.964951423986588e-06, + "loss": 113097.9125, + "step": 66970 + }, + { + "epoch": 0.13530383771619728, + "grad_norm": 24526.44921875, + "learning_rate": 9.964910153700258e-06, + "loss": 192363.5125, + "step": 66980 + }, + { + "epoch": 0.1353240383488811, + "grad_norm": 1677992.625, + "learning_rate": 9.964868859215555e-06, + "loss": 83599.975, + "step": 66990 + }, + { + "epoch": 0.1353442389815649, + "grad_norm": 137272.4375, + "learning_rate": 9.964827540532685e-06, + "loss": 78944.575, + "step": 67000 + }, + { + "epoch": 0.1353644396142487, + "grad_norm": 70426.484375, + "learning_rate": 9.964786197651848e-06, + "loss": 246379.125, + "step": 67010 + }, + { + "epoch": 0.13538464024693253, + "grad_norm": 110767.921875, + "learning_rate": 9.964744830573245e-06, + "loss": 104748.6, + "step": 67020 + }, + { + "epoch": 0.13540484087961635, + "grad_norm": 29090.978515625, + "learning_rate": 9.964703439297076e-06, + "loss": 173681.9, + "step": 67030 + }, + { + "epoch": 0.13542504151230017, + "grad_norm": 98298.6171875, + "learning_rate": 9.964662023823548e-06, + "loss": 216133.7, + "step": 67040 + }, + { + "epoch": 0.135445242144984, + "grad_norm": 797618.5625, + "learning_rate": 9.964620584152858e-06, + "loss": 128963.225, + "step": 67050 + }, + { + "epoch": 0.13546544277766778, + "grad_norm": 361627.84375, + "learning_rate": 9.964579120285208e-06, + "loss": 98603.1812, + "step": 67060 + }, + { + "epoch": 0.1354856434103516, + "grad_norm": 207240.9375, + "learning_rate": 9.964537632220801e-06, + "loss": 84946.2188, + "step": 67070 + }, + { + "epoch": 0.13550584404303542, + "grad_norm": 380105.59375, + "learning_rate": 9.964496119959842e-06, + "loss": 109904.35, + "step": 67080 + }, + { + "epoch": 0.13552604467571924, + "grad_norm": 449689.28125, + "learning_rate": 9.96445458350253e-06, + "loss": 110202.1, + "step": 67090 + }, + { + "epoch": 0.13554624530840306, + "grad_norm": 59080.453125, + "learning_rate": 9.964413022849069e-06, + "loss": 76568.0, + "step": 67100 + }, + { + "epoch": 0.13556644594108688, + "grad_norm": 27390.013671875, + "learning_rate": 9.964371437999661e-06, + "loss": 147987.6625, + "step": 67110 + }, + { + "epoch": 0.1355866465737707, + "grad_norm": 899491.0625, + "learning_rate": 9.96432982895451e-06, + "loss": 178164.275, + "step": 67120 + }, + { + "epoch": 0.1356068472064545, + "grad_norm": 69254.53125, + "learning_rate": 9.964288195713814e-06, + "loss": 70044.6, + "step": 67130 + }, + { + "epoch": 0.13562704783913831, + "grad_norm": 28499.083984375, + "learning_rate": 9.964246538277782e-06, + "loss": 63251.1438, + "step": 67140 + }, + { + "epoch": 0.13564724847182213, + "grad_norm": 65700.0, + "learning_rate": 9.964204856646613e-06, + "loss": 225786.35, + "step": 67150 + }, + { + "epoch": 0.13566744910450595, + "grad_norm": 439644.40625, + "learning_rate": 9.964163150820512e-06, + "loss": 133831.9875, + "step": 67160 + }, + { + "epoch": 0.13568764973718977, + "grad_norm": 246658.640625, + "learning_rate": 9.964121420799682e-06, + "loss": 120149.7875, + "step": 67170 + }, + { + "epoch": 0.1357078503698736, + "grad_norm": 12647.3330078125, + "learning_rate": 9.964079666584327e-06, + "loss": 103700.3062, + "step": 67180 + }, + { + "epoch": 0.1357280510025574, + "grad_norm": 2702058.5, + "learning_rate": 9.96403788817465e-06, + "loss": 212759.525, + "step": 67190 + }, + { + "epoch": 0.1357482516352412, + "grad_norm": 206340.46875, + "learning_rate": 9.963996085570854e-06, + "loss": 273962.375, + "step": 67200 + }, + { + "epoch": 0.13576845226792503, + "grad_norm": 88453.6953125, + "learning_rate": 9.963954258773143e-06, + "loss": 68988.3375, + "step": 67210 + }, + { + "epoch": 0.13578865290060885, + "grad_norm": 1828465.875, + "learning_rate": 9.963912407781721e-06, + "loss": 232065.2, + "step": 67220 + }, + { + "epoch": 0.13580885353329267, + "grad_norm": 995935.625, + "learning_rate": 9.963870532596791e-06, + "loss": 145069.0, + "step": 67230 + }, + { + "epoch": 0.1358290541659765, + "grad_norm": 653302.4375, + "learning_rate": 9.96382863321856e-06, + "loss": 137729.175, + "step": 67240 + }, + { + "epoch": 0.1358492547986603, + "grad_norm": 76408.625, + "learning_rate": 9.963786709647228e-06, + "loss": 131459.75, + "step": 67250 + }, + { + "epoch": 0.1358694554313441, + "grad_norm": 128398.5546875, + "learning_rate": 9.963744761883003e-06, + "loss": 85097.4812, + "step": 67260 + }, + { + "epoch": 0.13588965606402792, + "grad_norm": 393370.34375, + "learning_rate": 9.963702789926089e-06, + "loss": 121506.1, + "step": 67270 + }, + { + "epoch": 0.13590985669671174, + "grad_norm": 2011079.5, + "learning_rate": 9.963660793776689e-06, + "loss": 296047.575, + "step": 67280 + }, + { + "epoch": 0.13593005732939556, + "grad_norm": 573738.75, + "learning_rate": 9.963618773435006e-06, + "loss": 72388.2125, + "step": 67290 + }, + { + "epoch": 0.13595025796207938, + "grad_norm": 328497.53125, + "learning_rate": 9.96357672890125e-06, + "loss": 180565.275, + "step": 67300 + }, + { + "epoch": 0.1359704585947632, + "grad_norm": 596716.0625, + "learning_rate": 9.963534660175622e-06, + "loss": 85930.7563, + "step": 67310 + }, + { + "epoch": 0.135990659227447, + "grad_norm": 53462.5859375, + "learning_rate": 9.963492567258327e-06, + "loss": 121205.225, + "step": 67320 + }, + { + "epoch": 0.1360108598601308, + "grad_norm": 514252.9375, + "learning_rate": 9.963450450149572e-06, + "loss": 66668.6562, + "step": 67330 + }, + { + "epoch": 0.13603106049281463, + "grad_norm": 751839.25, + "learning_rate": 9.963408308849563e-06, + "loss": 125239.9125, + "step": 67340 + }, + { + "epoch": 0.13605126112549845, + "grad_norm": 552507.3125, + "learning_rate": 9.963366143358502e-06, + "loss": 186669.175, + "step": 67350 + }, + { + "epoch": 0.13607146175818227, + "grad_norm": 834723.0, + "learning_rate": 9.963323953676599e-06, + "loss": 242007.1, + "step": 67360 + }, + { + "epoch": 0.1360916623908661, + "grad_norm": 517.103759765625, + "learning_rate": 9.963281739804054e-06, + "loss": 227319.55, + "step": 67370 + }, + { + "epoch": 0.13611186302354988, + "grad_norm": 40976.55859375, + "learning_rate": 9.963239501741076e-06, + "loss": 123102.6125, + "step": 67380 + }, + { + "epoch": 0.1361320636562337, + "grad_norm": 192180.265625, + "learning_rate": 9.963197239487871e-06, + "loss": 82046.8125, + "step": 67390 + }, + { + "epoch": 0.13615226428891752, + "grad_norm": 58866.16796875, + "learning_rate": 9.963154953044646e-06, + "loss": 163457.5375, + "step": 67400 + }, + { + "epoch": 0.13617246492160134, + "grad_norm": 94407.7578125, + "learning_rate": 9.963112642411606e-06, + "loss": 94287.525, + "step": 67410 + }, + { + "epoch": 0.13619266555428516, + "grad_norm": 114627.8515625, + "learning_rate": 9.963070307588955e-06, + "loss": 94472.9187, + "step": 67420 + }, + { + "epoch": 0.13621286618696898, + "grad_norm": 465063.0625, + "learning_rate": 9.963027948576902e-06, + "loss": 200313.6, + "step": 67430 + }, + { + "epoch": 0.1362330668196528, + "grad_norm": 12017.759765625, + "learning_rate": 9.96298556537565e-06, + "loss": 174485.725, + "step": 67440 + }, + { + "epoch": 0.1362532674523366, + "grad_norm": 445853.625, + "learning_rate": 9.962943157985412e-06, + "loss": 217100.15, + "step": 67450 + }, + { + "epoch": 0.13627346808502042, + "grad_norm": 1245097.0, + "learning_rate": 9.96290072640639e-06, + "loss": 110031.275, + "step": 67460 + }, + { + "epoch": 0.13629366871770424, + "grad_norm": 167322.5625, + "learning_rate": 9.962858270638793e-06, + "loss": 125152.675, + "step": 67470 + }, + { + "epoch": 0.13631386935038806, + "grad_norm": 242659.859375, + "learning_rate": 9.962815790682825e-06, + "loss": 77652.7625, + "step": 67480 + }, + { + "epoch": 0.13633406998307188, + "grad_norm": 7034.1533203125, + "learning_rate": 9.962773286538696e-06, + "loss": 47864.4688, + "step": 67490 + }, + { + "epoch": 0.1363542706157557, + "grad_norm": 70560.34375, + "learning_rate": 9.962730758206612e-06, + "loss": 110925.2625, + "step": 67500 + }, + { + "epoch": 0.1363744712484395, + "grad_norm": 4103.68896484375, + "learning_rate": 9.962688205686778e-06, + "loss": 139157.0625, + "step": 67510 + }, + { + "epoch": 0.1363946718811233, + "grad_norm": 14561.6650390625, + "learning_rate": 9.962645628979406e-06, + "loss": 79984.525, + "step": 67520 + }, + { + "epoch": 0.13641487251380713, + "grad_norm": 374056.53125, + "learning_rate": 9.962603028084699e-06, + "loss": 85967.675, + "step": 67530 + }, + { + "epoch": 0.13643507314649095, + "grad_norm": 584995.625, + "learning_rate": 9.962560403002868e-06, + "loss": 150610.0, + "step": 67540 + }, + { + "epoch": 0.13645527377917477, + "grad_norm": 30632.408203125, + "learning_rate": 9.96251775373412e-06, + "loss": 158004.1, + "step": 67550 + }, + { + "epoch": 0.1364754744118586, + "grad_norm": 143988.328125, + "learning_rate": 9.962475080278662e-06, + "loss": 56562.6062, + "step": 67560 + }, + { + "epoch": 0.1364956750445424, + "grad_norm": 53438.3046875, + "learning_rate": 9.9624323826367e-06, + "loss": 124622.075, + "step": 67570 + }, + { + "epoch": 0.1365158756772262, + "grad_norm": 58576.96875, + "learning_rate": 9.962389660808447e-06, + "loss": 116893.7, + "step": 67580 + }, + { + "epoch": 0.13653607630991002, + "grad_norm": 2090605.125, + "learning_rate": 9.96234691479411e-06, + "loss": 235451.2, + "step": 67590 + }, + { + "epoch": 0.13655627694259384, + "grad_norm": 34791.71484375, + "learning_rate": 9.962304144593893e-06, + "loss": 90150.8813, + "step": 67600 + }, + { + "epoch": 0.13657647757527766, + "grad_norm": 9667695.0, + "learning_rate": 9.962261350208008e-06, + "loss": 164301.275, + "step": 67610 + }, + { + "epoch": 0.13659667820796148, + "grad_norm": 40796.578125, + "learning_rate": 9.962218531636664e-06, + "loss": 311648.875, + "step": 67620 + }, + { + "epoch": 0.1366168788406453, + "grad_norm": 0.0, + "learning_rate": 9.962175688880067e-06, + "loss": 97461.9875, + "step": 67630 + }, + { + "epoch": 0.1366370794733291, + "grad_norm": 85083.2578125, + "learning_rate": 9.96213282193843e-06, + "loss": 89262.0188, + "step": 67640 + }, + { + "epoch": 0.1366572801060129, + "grad_norm": 47430.3671875, + "learning_rate": 9.962089930811959e-06, + "loss": 77746.0938, + "step": 67650 + }, + { + "epoch": 0.13667748073869673, + "grad_norm": 155012.609375, + "learning_rate": 9.962047015500861e-06, + "loss": 197427.85, + "step": 67660 + }, + { + "epoch": 0.13669768137138055, + "grad_norm": 1886496.375, + "learning_rate": 9.96200407600535e-06, + "loss": 185685.725, + "step": 67670 + }, + { + "epoch": 0.13671788200406437, + "grad_norm": 290717.59375, + "learning_rate": 9.961961112325633e-06, + "loss": 118272.725, + "step": 67680 + }, + { + "epoch": 0.1367380826367482, + "grad_norm": 184924.46875, + "learning_rate": 9.961918124461918e-06, + "loss": 79970.2563, + "step": 67690 + }, + { + "epoch": 0.13675828326943198, + "grad_norm": 109027.421875, + "learning_rate": 9.961875112414417e-06, + "loss": 115521.825, + "step": 67700 + }, + { + "epoch": 0.1367784839021158, + "grad_norm": 19629.91015625, + "learning_rate": 9.961832076183337e-06, + "loss": 120401.575, + "step": 67710 + }, + { + "epoch": 0.13679868453479962, + "grad_norm": 1962498.25, + "learning_rate": 9.96178901576889e-06, + "loss": 141497.775, + "step": 67720 + }, + { + "epoch": 0.13681888516748344, + "grad_norm": 509659.4375, + "learning_rate": 9.961745931171288e-06, + "loss": 139078.6125, + "step": 67730 + }, + { + "epoch": 0.13683908580016726, + "grad_norm": 48651.08984375, + "learning_rate": 9.961702822390735e-06, + "loss": 101187.1438, + "step": 67740 + }, + { + "epoch": 0.13685928643285108, + "grad_norm": 12231.0029296875, + "learning_rate": 9.961659689427444e-06, + "loss": 176170.1125, + "step": 67750 + }, + { + "epoch": 0.1368794870655349, + "grad_norm": 60948.15625, + "learning_rate": 9.961616532281626e-06, + "loss": 368183.025, + "step": 67760 + }, + { + "epoch": 0.1368996876982187, + "grad_norm": 62862.47265625, + "learning_rate": 9.961573350953491e-06, + "loss": 137663.175, + "step": 67770 + }, + { + "epoch": 0.13691988833090252, + "grad_norm": 381857.65625, + "learning_rate": 9.96153014544325e-06, + "loss": 246581.6, + "step": 67780 + }, + { + "epoch": 0.13694008896358634, + "grad_norm": 745071.4375, + "learning_rate": 9.961486915751114e-06, + "loss": 150349.2625, + "step": 67790 + }, + { + "epoch": 0.13696028959627016, + "grad_norm": 46378.87890625, + "learning_rate": 9.96144366187729e-06, + "loss": 153722.925, + "step": 67800 + }, + { + "epoch": 0.13698049022895398, + "grad_norm": 77907.6796875, + "learning_rate": 9.961400383821992e-06, + "loss": 77561.475, + "step": 67810 + }, + { + "epoch": 0.1370006908616378, + "grad_norm": 602389.25, + "learning_rate": 9.96135708158543e-06, + "loss": 104642.2, + "step": 67820 + }, + { + "epoch": 0.1370208914943216, + "grad_norm": 201769.046875, + "learning_rate": 9.961313755167816e-06, + "loss": 176727.1625, + "step": 67830 + }, + { + "epoch": 0.1370410921270054, + "grad_norm": 113076.6875, + "learning_rate": 9.961270404569358e-06, + "loss": 214695.3, + "step": 67840 + }, + { + "epoch": 0.13706129275968923, + "grad_norm": 106815.46875, + "learning_rate": 9.961227029790272e-06, + "loss": 91806.0063, + "step": 67850 + }, + { + "epoch": 0.13708149339237305, + "grad_norm": 300532.40625, + "learning_rate": 9.961183630830768e-06, + "loss": 93809.875, + "step": 67860 + }, + { + "epoch": 0.13710169402505687, + "grad_norm": 148883.5, + "learning_rate": 9.961140207691055e-06, + "loss": 118896.9625, + "step": 67870 + }, + { + "epoch": 0.1371218946577407, + "grad_norm": 142163.15625, + "learning_rate": 9.961096760371349e-06, + "loss": 166103.925, + "step": 67880 + }, + { + "epoch": 0.1371420952904245, + "grad_norm": 88260.0625, + "learning_rate": 9.961053288871855e-06, + "loss": 62102.7375, + "step": 67890 + }, + { + "epoch": 0.1371622959231083, + "grad_norm": 46221.50390625, + "learning_rate": 9.961009793192793e-06, + "loss": 53908.1312, + "step": 67900 + }, + { + "epoch": 0.13718249655579212, + "grad_norm": 164147.21875, + "learning_rate": 9.96096627333437e-06, + "loss": 162770.7625, + "step": 67910 + }, + { + "epoch": 0.13720269718847594, + "grad_norm": 30006.37890625, + "learning_rate": 9.960922729296797e-06, + "loss": 111563.45, + "step": 67920 + }, + { + "epoch": 0.13722289782115976, + "grad_norm": 340932.59375, + "learning_rate": 9.96087916108029e-06, + "loss": 88062.0625, + "step": 67930 + }, + { + "epoch": 0.13724309845384358, + "grad_norm": 76585.296875, + "learning_rate": 9.960835568685058e-06, + "loss": 127844.5625, + "step": 67940 + }, + { + "epoch": 0.1372632990865274, + "grad_norm": 1003910.3125, + "learning_rate": 9.960791952111318e-06, + "loss": 155390.8375, + "step": 67950 + }, + { + "epoch": 0.1372834997192112, + "grad_norm": 341798.46875, + "learning_rate": 9.960748311359278e-06, + "loss": 127115.3125, + "step": 67960 + }, + { + "epoch": 0.137303700351895, + "grad_norm": 157991.171875, + "learning_rate": 9.96070464642915e-06, + "loss": 66707.6, + "step": 67970 + }, + { + "epoch": 0.13732390098457883, + "grad_norm": 124400.8046875, + "learning_rate": 9.960660957321153e-06, + "loss": 128456.75, + "step": 67980 + }, + { + "epoch": 0.13734410161726265, + "grad_norm": 76581.3671875, + "learning_rate": 9.960617244035495e-06, + "loss": 63997.0, + "step": 67990 + }, + { + "epoch": 0.13736430224994647, + "grad_norm": 258254.953125, + "learning_rate": 9.960573506572391e-06, + "loss": 169699.075, + "step": 68000 + }, + { + "epoch": 0.1373845028826303, + "grad_norm": 236685.234375, + "learning_rate": 9.960529744932051e-06, + "loss": 116255.3625, + "step": 68010 + }, + { + "epoch": 0.13740470351531409, + "grad_norm": 58171.93359375, + "learning_rate": 9.960485959114693e-06, + "loss": 40798.8406, + "step": 68020 + }, + { + "epoch": 0.1374249041479979, + "grad_norm": 65074.9765625, + "learning_rate": 9.960442149120527e-06, + "loss": 115905.2625, + "step": 68030 + }, + { + "epoch": 0.13744510478068173, + "grad_norm": 27044.529296875, + "learning_rate": 9.960398314949767e-06, + "loss": 160094.6125, + "step": 68040 + }, + { + "epoch": 0.13746530541336555, + "grad_norm": 83624.75, + "learning_rate": 9.960354456602627e-06, + "loss": 54489.4187, + "step": 68050 + }, + { + "epoch": 0.13748550604604937, + "grad_norm": 103873.4609375, + "learning_rate": 9.960310574079324e-06, + "loss": 50168.9406, + "step": 68060 + }, + { + "epoch": 0.13750570667873319, + "grad_norm": 6030.49365234375, + "learning_rate": 9.960266667380065e-06, + "loss": 32492.4813, + "step": 68070 + }, + { + "epoch": 0.137525907311417, + "grad_norm": 728106.1875, + "learning_rate": 9.96022273650507e-06, + "loss": 55213.925, + "step": 68080 + }, + { + "epoch": 0.1375461079441008, + "grad_norm": 300637.84375, + "learning_rate": 9.96017878145455e-06, + "loss": 210180.6, + "step": 68090 + }, + { + "epoch": 0.13756630857678462, + "grad_norm": 5796.1875, + "learning_rate": 9.960134802228722e-06, + "loss": 72057.4875, + "step": 68100 + }, + { + "epoch": 0.13758650920946844, + "grad_norm": 167572.40625, + "learning_rate": 9.960090798827798e-06, + "loss": 86313.475, + "step": 68110 + }, + { + "epoch": 0.13760670984215226, + "grad_norm": 148339.25, + "learning_rate": 9.960046771251991e-06, + "loss": 106071.9625, + "step": 68120 + }, + { + "epoch": 0.13762691047483608, + "grad_norm": 0.0, + "learning_rate": 9.96000271950152e-06, + "loss": 115597.55, + "step": 68130 + }, + { + "epoch": 0.1376471111075199, + "grad_norm": 24161.458984375, + "learning_rate": 9.959958643576597e-06, + "loss": 54251.6312, + "step": 68140 + }, + { + "epoch": 0.1376673117402037, + "grad_norm": 951832.375, + "learning_rate": 9.959914543477436e-06, + "loss": 91667.9625, + "step": 68150 + }, + { + "epoch": 0.1376875123728875, + "grad_norm": 81372.0859375, + "learning_rate": 9.959870419204253e-06, + "loss": 96388.1, + "step": 68160 + }, + { + "epoch": 0.13770771300557133, + "grad_norm": 408172.03125, + "learning_rate": 9.959826270757265e-06, + "loss": 77033.4, + "step": 68170 + }, + { + "epoch": 0.13772791363825515, + "grad_norm": 122400.515625, + "learning_rate": 9.959782098136683e-06, + "loss": 128507.2, + "step": 68180 + }, + { + "epoch": 0.13774811427093897, + "grad_norm": 163815.984375, + "learning_rate": 9.959737901342725e-06, + "loss": 74679.4688, + "step": 68190 + }, + { + "epoch": 0.1377683149036228, + "grad_norm": 263180.25, + "learning_rate": 9.959693680375608e-06, + "loss": 109491.9875, + "step": 68200 + }, + { + "epoch": 0.1377885155363066, + "grad_norm": 23807.23046875, + "learning_rate": 9.959649435235543e-06, + "loss": 106415.8375, + "step": 68210 + }, + { + "epoch": 0.1378087161689904, + "grad_norm": 1496823.375, + "learning_rate": 9.95960516592275e-06, + "loss": 162724.1375, + "step": 68220 + }, + { + "epoch": 0.13782891680167422, + "grad_norm": 7078.15185546875, + "learning_rate": 9.959560872437443e-06, + "loss": 92008.5938, + "step": 68230 + }, + { + "epoch": 0.13784911743435804, + "grad_norm": 3718123.0, + "learning_rate": 9.959516554779838e-06, + "loss": 172005.65, + "step": 68240 + }, + { + "epoch": 0.13786931806704186, + "grad_norm": 104674.1875, + "learning_rate": 9.95947221295015e-06, + "loss": 140931.275, + "step": 68250 + }, + { + "epoch": 0.13788951869972568, + "grad_norm": 381780.59375, + "learning_rate": 9.959427846948595e-06, + "loss": 58836.4125, + "step": 68260 + }, + { + "epoch": 0.1379097193324095, + "grad_norm": 0.0, + "learning_rate": 9.959383456775392e-06, + "loss": 201462.7125, + "step": 68270 + }, + { + "epoch": 0.1379299199650933, + "grad_norm": 0.0, + "learning_rate": 9.959339042430753e-06, + "loss": 89433.775, + "step": 68280 + }, + { + "epoch": 0.13795012059777711, + "grad_norm": 431241.5625, + "learning_rate": 9.9592946039149e-06, + "loss": 151876.475, + "step": 68290 + }, + { + "epoch": 0.13797032123046093, + "grad_norm": 37416.51953125, + "learning_rate": 9.959250141228046e-06, + "loss": 54035.4125, + "step": 68300 + }, + { + "epoch": 0.13799052186314475, + "grad_norm": 2655174.5, + "learning_rate": 9.959205654370406e-06, + "loss": 263507.675, + "step": 68310 + }, + { + "epoch": 0.13801072249582857, + "grad_norm": 163836.0, + "learning_rate": 9.959161143342201e-06, + "loss": 121177.875, + "step": 68320 + }, + { + "epoch": 0.1380309231285124, + "grad_norm": 1931870.75, + "learning_rate": 9.959116608143647e-06, + "loss": 137848.85, + "step": 68330 + }, + { + "epoch": 0.1380511237611962, + "grad_norm": 116002.4375, + "learning_rate": 9.959072048774958e-06, + "loss": 179276.2, + "step": 68340 + }, + { + "epoch": 0.13807132439388, + "grad_norm": 25597.29296875, + "learning_rate": 9.959027465236354e-06, + "loss": 144057.925, + "step": 68350 + }, + { + "epoch": 0.13809152502656383, + "grad_norm": 13882.8251953125, + "learning_rate": 9.958982857528053e-06, + "loss": 86203.2375, + "step": 68360 + }, + { + "epoch": 0.13811172565924765, + "grad_norm": 7127.6513671875, + "learning_rate": 9.958938225650268e-06, + "loss": 238384.15, + "step": 68370 + }, + { + "epoch": 0.13813192629193147, + "grad_norm": 230781.234375, + "learning_rate": 9.958893569603222e-06, + "loss": 143916.075, + "step": 68380 + }, + { + "epoch": 0.1381521269246153, + "grad_norm": 1563079.25, + "learning_rate": 9.958848889387129e-06, + "loss": 143700.025, + "step": 68390 + }, + { + "epoch": 0.1381723275572991, + "grad_norm": 570993.0, + "learning_rate": 9.958804185002209e-06, + "loss": 110843.15, + "step": 68400 + }, + { + "epoch": 0.1381925281899829, + "grad_norm": 296927.59375, + "learning_rate": 9.958759456448677e-06, + "loss": 116039.0625, + "step": 68410 + }, + { + "epoch": 0.13821272882266672, + "grad_norm": 390235.84375, + "learning_rate": 9.958714703726755e-06, + "loss": 219718.9, + "step": 68420 + }, + { + "epoch": 0.13823292945535054, + "grad_norm": 60249.47265625, + "learning_rate": 9.958669926836658e-06, + "loss": 108847.1625, + "step": 68430 + }, + { + "epoch": 0.13825313008803436, + "grad_norm": 231608.453125, + "learning_rate": 9.958625125778606e-06, + "loss": 112294.125, + "step": 68440 + }, + { + "epoch": 0.13827333072071818, + "grad_norm": 7558.67041015625, + "learning_rate": 9.958580300552816e-06, + "loss": 211818.175, + "step": 68450 + }, + { + "epoch": 0.138293531353402, + "grad_norm": 61397.890625, + "learning_rate": 9.958535451159506e-06, + "loss": 96041.2937, + "step": 68460 + }, + { + "epoch": 0.1383137319860858, + "grad_norm": 1028.8887939453125, + "learning_rate": 9.958490577598896e-06, + "loss": 91573.9875, + "step": 68470 + }, + { + "epoch": 0.1383339326187696, + "grad_norm": 70280.578125, + "learning_rate": 9.958445679871204e-06, + "loss": 87100.825, + "step": 68480 + }, + { + "epoch": 0.13835413325145343, + "grad_norm": 1406248.75, + "learning_rate": 9.958400757976651e-06, + "loss": 122531.4125, + "step": 68490 + }, + { + "epoch": 0.13837433388413725, + "grad_norm": 3029615.5, + "learning_rate": 9.958355811915452e-06, + "loss": 159488.1, + "step": 68500 + }, + { + "epoch": 0.13839453451682107, + "grad_norm": 3089607.25, + "learning_rate": 9.95831084168783e-06, + "loss": 190773.35, + "step": 68510 + }, + { + "epoch": 0.1384147351495049, + "grad_norm": 119643.9609375, + "learning_rate": 9.958265847294001e-06, + "loss": 77656.8875, + "step": 68520 + }, + { + "epoch": 0.1384349357821887, + "grad_norm": 84936.8984375, + "learning_rate": 9.958220828734187e-06, + "loss": 179062.725, + "step": 68530 + }, + { + "epoch": 0.1384551364148725, + "grad_norm": 35844.19921875, + "learning_rate": 9.958175786008605e-06, + "loss": 56510.3, + "step": 68540 + }, + { + "epoch": 0.13847533704755632, + "grad_norm": 472862.1875, + "learning_rate": 9.958130719117476e-06, + "loss": 86856.2625, + "step": 68550 + }, + { + "epoch": 0.13849553768024014, + "grad_norm": 8173.7548828125, + "learning_rate": 9.958085628061018e-06, + "loss": 126729.3125, + "step": 68560 + }, + { + "epoch": 0.13851573831292396, + "grad_norm": 56072.046875, + "learning_rate": 9.958040512839453e-06, + "loss": 102961.2937, + "step": 68570 + }, + { + "epoch": 0.13853593894560778, + "grad_norm": 247659.328125, + "learning_rate": 9.957995373453e-06, + "loss": 57972.6687, + "step": 68580 + }, + { + "epoch": 0.1385561395782916, + "grad_norm": 166313.15625, + "learning_rate": 9.95795020990188e-06, + "loss": 145754.3875, + "step": 68590 + }, + { + "epoch": 0.1385763402109754, + "grad_norm": 55431.67578125, + "learning_rate": 9.957905022186309e-06, + "loss": 82964.3813, + "step": 68600 + }, + { + "epoch": 0.13859654084365922, + "grad_norm": 17312.91796875, + "learning_rate": 9.957859810306511e-06, + "loss": 134675.6125, + "step": 68610 + }, + { + "epoch": 0.13861674147634304, + "grad_norm": 1122885.125, + "learning_rate": 9.957814574262707e-06, + "loss": 91330.4563, + "step": 68620 + }, + { + "epoch": 0.13863694210902686, + "grad_norm": 195041.125, + "learning_rate": 9.957769314055117e-06, + "loss": 109430.5625, + "step": 68630 + }, + { + "epoch": 0.13865714274171068, + "grad_norm": 10786.771484375, + "learning_rate": 9.957724029683958e-06, + "loss": 39491.5062, + "step": 68640 + }, + { + "epoch": 0.1386773433743945, + "grad_norm": 1309723.75, + "learning_rate": 9.957678721149454e-06, + "loss": 112639.4, + "step": 68650 + }, + { + "epoch": 0.1386975440070783, + "grad_norm": 1228778.375, + "learning_rate": 9.957633388451827e-06, + "loss": 114808.15, + "step": 68660 + }, + { + "epoch": 0.1387177446397621, + "grad_norm": 292480.125, + "learning_rate": 9.957588031591295e-06, + "loss": 104921.825, + "step": 68670 + }, + { + "epoch": 0.13873794527244593, + "grad_norm": 17609.701171875, + "learning_rate": 9.957542650568079e-06, + "loss": 133587.9875, + "step": 68680 + }, + { + "epoch": 0.13875814590512975, + "grad_norm": 95327.2734375, + "learning_rate": 9.957497245382403e-06, + "loss": 40915.2125, + "step": 68690 + }, + { + "epoch": 0.13877834653781357, + "grad_norm": 14195.7919921875, + "learning_rate": 9.957451816034487e-06, + "loss": 109987.3, + "step": 68700 + }, + { + "epoch": 0.1387985471704974, + "grad_norm": 215658.703125, + "learning_rate": 9.95740636252455e-06, + "loss": 133876.325, + "step": 68710 + }, + { + "epoch": 0.1388187478031812, + "grad_norm": 11653.3193359375, + "learning_rate": 9.957360884852819e-06, + "loss": 85387.975, + "step": 68720 + }, + { + "epoch": 0.138838948435865, + "grad_norm": 961664.9375, + "learning_rate": 9.95731538301951e-06, + "loss": 179117.3125, + "step": 68730 + }, + { + "epoch": 0.13885914906854882, + "grad_norm": 389100.75, + "learning_rate": 9.957269857024847e-06, + "loss": 88626.3125, + "step": 68740 + }, + { + "epoch": 0.13887934970123264, + "grad_norm": 97198.8046875, + "learning_rate": 9.957224306869053e-06, + "loss": 94535.5125, + "step": 68750 + }, + { + "epoch": 0.13889955033391646, + "grad_norm": 262916.53125, + "learning_rate": 9.957178732552348e-06, + "loss": 109638.875, + "step": 68760 + }, + { + "epoch": 0.13891975096660028, + "grad_norm": 293381.71875, + "learning_rate": 9.957133134074955e-06, + "loss": 112421.65, + "step": 68770 + }, + { + "epoch": 0.1389399515992841, + "grad_norm": 20716.396484375, + "learning_rate": 9.957087511437099e-06, + "loss": 229038.625, + "step": 68780 + }, + { + "epoch": 0.1389601522319679, + "grad_norm": 39125.33984375, + "learning_rate": 9.957041864638997e-06, + "loss": 64194.6, + "step": 68790 + }, + { + "epoch": 0.1389803528646517, + "grad_norm": 94687.53125, + "learning_rate": 9.956996193680874e-06, + "loss": 139305.825, + "step": 68800 + }, + { + "epoch": 0.13900055349733553, + "grad_norm": 332421.625, + "learning_rate": 9.956950498562954e-06, + "loss": 85689.8625, + "step": 68810 + }, + { + "epoch": 0.13902075413001935, + "grad_norm": 17447.123046875, + "learning_rate": 9.956904779285457e-06, + "loss": 97079.975, + "step": 68820 + }, + { + "epoch": 0.13904095476270317, + "grad_norm": 866949.375, + "learning_rate": 9.956859035848608e-06, + "loss": 169889.1, + "step": 68830 + }, + { + "epoch": 0.139061155395387, + "grad_norm": 16863.87109375, + "learning_rate": 9.95681326825263e-06, + "loss": 238090.975, + "step": 68840 + }, + { + "epoch": 0.1390813560280708, + "grad_norm": 4176.48828125, + "learning_rate": 9.956767476497745e-06, + "loss": 113217.725, + "step": 68850 + }, + { + "epoch": 0.1391015566607546, + "grad_norm": 126254.921875, + "learning_rate": 9.956721660584175e-06, + "loss": 199211.9, + "step": 68860 + }, + { + "epoch": 0.13912175729343843, + "grad_norm": 288706.09375, + "learning_rate": 9.956675820512146e-06, + "loss": 232717.45, + "step": 68870 + }, + { + "epoch": 0.13914195792612225, + "grad_norm": 797839.9375, + "learning_rate": 9.956629956281881e-06, + "loss": 177105.7375, + "step": 68880 + }, + { + "epoch": 0.13916215855880607, + "grad_norm": 1707255.0, + "learning_rate": 9.956584067893602e-06, + "loss": 57075.8125, + "step": 68890 + }, + { + "epoch": 0.13918235919148988, + "grad_norm": 326171.65625, + "learning_rate": 9.956538155347534e-06, + "loss": 99435.1938, + "step": 68900 + }, + { + "epoch": 0.1392025598241737, + "grad_norm": 203962.203125, + "learning_rate": 9.9564922186439e-06, + "loss": 82617.8625, + "step": 68910 + }, + { + "epoch": 0.1392227604568575, + "grad_norm": 704604.625, + "learning_rate": 9.956446257782923e-06, + "loss": 160792.0875, + "step": 68920 + }, + { + "epoch": 0.13924296108954132, + "grad_norm": 353023.8125, + "learning_rate": 9.95640027276483e-06, + "loss": 189426.25, + "step": 68930 + }, + { + "epoch": 0.13926316172222514, + "grad_norm": 34037.4140625, + "learning_rate": 9.95635426358984e-06, + "loss": 95609.0625, + "step": 68940 + }, + { + "epoch": 0.13928336235490896, + "grad_norm": 776709.25, + "learning_rate": 9.956308230258182e-06, + "loss": 120573.5, + "step": 68950 + }, + { + "epoch": 0.13930356298759278, + "grad_norm": 52361.63671875, + "learning_rate": 9.956262172770082e-06, + "loss": 161287.375, + "step": 68960 + }, + { + "epoch": 0.1393237636202766, + "grad_norm": 9744.19140625, + "learning_rate": 9.956216091125756e-06, + "loss": 147281.4875, + "step": 68970 + }, + { + "epoch": 0.1393439642529604, + "grad_norm": 106935.6484375, + "learning_rate": 9.956169985325438e-06, + "loss": 155644.925, + "step": 68980 + }, + { + "epoch": 0.1393641648856442, + "grad_norm": 257147.03125, + "learning_rate": 9.956123855369346e-06, + "loss": 219514.875, + "step": 68990 + }, + { + "epoch": 0.13938436551832803, + "grad_norm": 30788.67578125, + "learning_rate": 9.95607770125771e-06, + "loss": 65649.0375, + "step": 69000 + }, + { + "epoch": 0.13940456615101185, + "grad_norm": 25714.236328125, + "learning_rate": 9.95603152299075e-06, + "loss": 217668.125, + "step": 69010 + }, + { + "epoch": 0.13942476678369567, + "grad_norm": 18504.5078125, + "learning_rate": 9.955985320568696e-06, + "loss": 56536.8125, + "step": 69020 + }, + { + "epoch": 0.1394449674163795, + "grad_norm": 706589.5625, + "learning_rate": 9.955939093991767e-06, + "loss": 63713.2875, + "step": 69030 + }, + { + "epoch": 0.1394651680490633, + "grad_norm": 91578.890625, + "learning_rate": 9.955892843260195e-06, + "loss": 145556.0375, + "step": 69040 + }, + { + "epoch": 0.1394853686817471, + "grad_norm": 189303.6875, + "learning_rate": 9.955846568374201e-06, + "loss": 89888.5, + "step": 69050 + }, + { + "epoch": 0.13950556931443092, + "grad_norm": 3668226.5, + "learning_rate": 9.955800269334013e-06, + "loss": 203647.2625, + "step": 69060 + }, + { + "epoch": 0.13952576994711474, + "grad_norm": 28943.052734375, + "learning_rate": 9.955753946139855e-06, + "loss": 172515.375, + "step": 69070 + }, + { + "epoch": 0.13954597057979856, + "grad_norm": 62251.1171875, + "learning_rate": 9.955707598791952e-06, + "loss": 100776.7875, + "step": 69080 + }, + { + "epoch": 0.13956617121248238, + "grad_norm": 177192.890625, + "learning_rate": 9.955661227290531e-06, + "loss": 233086.875, + "step": 69090 + }, + { + "epoch": 0.1395863718451662, + "grad_norm": 65506.32421875, + "learning_rate": 9.95561483163582e-06, + "loss": 152276.375, + "step": 69100 + }, + { + "epoch": 0.13960657247785, + "grad_norm": 99388.03125, + "learning_rate": 9.955568411828043e-06, + "loss": 186349.5, + "step": 69110 + }, + { + "epoch": 0.13962677311053381, + "grad_norm": 108064.5, + "learning_rate": 9.955521967867427e-06, + "loss": 210409.625, + "step": 69120 + }, + { + "epoch": 0.13964697374321763, + "grad_norm": 705179.375, + "learning_rate": 9.955475499754197e-06, + "loss": 60417.8125, + "step": 69130 + }, + { + "epoch": 0.13966717437590145, + "grad_norm": 5863.189453125, + "learning_rate": 9.955429007488582e-06, + "loss": 53791.2937, + "step": 69140 + }, + { + "epoch": 0.13968737500858527, + "grad_norm": 239916.375, + "learning_rate": 9.955382491070806e-06, + "loss": 91953.4625, + "step": 69150 + }, + { + "epoch": 0.1397075756412691, + "grad_norm": 70185.1953125, + "learning_rate": 9.955335950501097e-06, + "loss": 52429.3063, + "step": 69160 + }, + { + "epoch": 0.13972777627395291, + "grad_norm": 1161376.125, + "learning_rate": 9.955289385779681e-06, + "loss": 223322.725, + "step": 69170 + }, + { + "epoch": 0.1397479769066367, + "grad_norm": 732547.5, + "learning_rate": 9.955242796906785e-06, + "loss": 74654.675, + "step": 69180 + }, + { + "epoch": 0.13976817753932053, + "grad_norm": 35852.00390625, + "learning_rate": 9.955196183882637e-06, + "loss": 190516.3875, + "step": 69190 + }, + { + "epoch": 0.13978837817200435, + "grad_norm": 148582.328125, + "learning_rate": 9.955149546707465e-06, + "loss": 93607.1875, + "step": 69200 + }, + { + "epoch": 0.13980857880468817, + "grad_norm": 45810.03515625, + "learning_rate": 9.955102885381494e-06, + "loss": 138343.8375, + "step": 69210 + }, + { + "epoch": 0.13982877943737199, + "grad_norm": 563611.625, + "learning_rate": 9.955056199904953e-06, + "loss": 155884.575, + "step": 69220 + }, + { + "epoch": 0.1398489800700558, + "grad_norm": 59306.5703125, + "learning_rate": 9.955009490278069e-06, + "loss": 108869.1625, + "step": 69230 + }, + { + "epoch": 0.1398691807027396, + "grad_norm": 162082.984375, + "learning_rate": 9.95496275650107e-06, + "loss": 160081.775, + "step": 69240 + }, + { + "epoch": 0.13988938133542342, + "grad_norm": 67057.984375, + "learning_rate": 9.954915998574182e-06, + "loss": 39927.4969, + "step": 69250 + }, + { + "epoch": 0.13990958196810724, + "grad_norm": 700.4860229492188, + "learning_rate": 9.954869216497636e-06, + "loss": 126451.5125, + "step": 69260 + }, + { + "epoch": 0.13992978260079106, + "grad_norm": 19282.505859375, + "learning_rate": 9.954822410271657e-06, + "loss": 112949.725, + "step": 69270 + }, + { + "epoch": 0.13994998323347488, + "grad_norm": 491545.125, + "learning_rate": 9.954775579896476e-06, + "loss": 130337.5625, + "step": 69280 + }, + { + "epoch": 0.1399701838661587, + "grad_norm": 1994702.75, + "learning_rate": 9.954728725372319e-06, + "loss": 80915.7188, + "step": 69290 + }, + { + "epoch": 0.1399903844988425, + "grad_norm": 167778.140625, + "learning_rate": 9.954681846699414e-06, + "loss": 55151.0875, + "step": 69300 + }, + { + "epoch": 0.1400105851315263, + "grad_norm": 50510.390625, + "learning_rate": 9.954634943877993e-06, + "loss": 145910.0, + "step": 69310 + }, + { + "epoch": 0.14003078576421013, + "grad_norm": 11614.794921875, + "learning_rate": 9.95458801690828e-06, + "loss": 168930.8, + "step": 69320 + }, + { + "epoch": 0.14005098639689395, + "grad_norm": 63109.8046875, + "learning_rate": 9.954541065790509e-06, + "loss": 110507.8375, + "step": 69330 + }, + { + "epoch": 0.14007118702957777, + "grad_norm": 31061.6875, + "learning_rate": 9.954494090524903e-06, + "loss": 205373.575, + "step": 69340 + }, + { + "epoch": 0.1400913876622616, + "grad_norm": 455383.9375, + "learning_rate": 9.954447091111695e-06, + "loss": 59841.4187, + "step": 69350 + }, + { + "epoch": 0.1401115882949454, + "grad_norm": 489456.5, + "learning_rate": 9.95440006755111e-06, + "loss": 176861.95, + "step": 69360 + }, + { + "epoch": 0.1401317889276292, + "grad_norm": 57580.12109375, + "learning_rate": 9.954353019843384e-06, + "loss": 156379.5625, + "step": 69370 + }, + { + "epoch": 0.14015198956031302, + "grad_norm": 4275088.0, + "learning_rate": 9.95430594798874e-06, + "loss": 142184.55, + "step": 69380 + }, + { + "epoch": 0.14017219019299684, + "grad_norm": 0.0, + "learning_rate": 9.954258851987411e-06, + "loss": 49321.4281, + "step": 69390 + }, + { + "epoch": 0.14019239082568066, + "grad_norm": 451008.0, + "learning_rate": 9.954211731839623e-06, + "loss": 225757.4, + "step": 69400 + }, + { + "epoch": 0.14021259145836448, + "grad_norm": 581739.375, + "learning_rate": 9.95416458754561e-06, + "loss": 106033.8125, + "step": 69410 + }, + { + "epoch": 0.1402327920910483, + "grad_norm": 0.0, + "learning_rate": 9.954117419105599e-06, + "loss": 66532.45, + "step": 69420 + }, + { + "epoch": 0.1402529927237321, + "grad_norm": 22511.640625, + "learning_rate": 9.95407022651982e-06, + "loss": 111216.8625, + "step": 69430 + }, + { + "epoch": 0.14027319335641592, + "grad_norm": 30562.921875, + "learning_rate": 9.954023009788505e-06, + "loss": 42899.0437, + "step": 69440 + }, + { + "epoch": 0.14029339398909974, + "grad_norm": 742363.375, + "learning_rate": 9.953975768911881e-06, + "loss": 179518.6625, + "step": 69450 + }, + { + "epoch": 0.14031359462178356, + "grad_norm": 3015542.75, + "learning_rate": 9.953928503890181e-06, + "loss": 127685.85, + "step": 69460 + }, + { + "epoch": 0.14033379525446738, + "grad_norm": 15254.3564453125, + "learning_rate": 9.953881214723636e-06, + "loss": 80090.6313, + "step": 69470 + }, + { + "epoch": 0.1403539958871512, + "grad_norm": 388855.09375, + "learning_rate": 9.95383390141247e-06, + "loss": 107586.7125, + "step": 69480 + }, + { + "epoch": 0.14037419651983502, + "grad_norm": 4226.671875, + "learning_rate": 9.953786563956923e-06, + "loss": 89414.2063, + "step": 69490 + }, + { + "epoch": 0.1403943971525188, + "grad_norm": 708873.6875, + "learning_rate": 9.953739202357219e-06, + "loss": 235713.175, + "step": 69500 + }, + { + "epoch": 0.14041459778520263, + "grad_norm": 1751049.375, + "learning_rate": 9.953691816613592e-06, + "loss": 156658.85, + "step": 69510 + }, + { + "epoch": 0.14043479841788645, + "grad_norm": 547567.25, + "learning_rate": 9.95364440672627e-06, + "loss": 112675.2375, + "step": 69520 + }, + { + "epoch": 0.14045499905057027, + "grad_norm": 76906.625, + "learning_rate": 9.953596972695487e-06, + "loss": 91356.95, + "step": 69530 + }, + { + "epoch": 0.1404751996832541, + "grad_norm": 259118.28125, + "learning_rate": 9.953549514521474e-06, + "loss": 131822.325, + "step": 69540 + }, + { + "epoch": 0.1404954003159379, + "grad_norm": 19244.53125, + "learning_rate": 9.953502032204461e-06, + "loss": 47119.9437, + "step": 69550 + }, + { + "epoch": 0.1405156009486217, + "grad_norm": 0.0, + "learning_rate": 9.95345452574468e-06, + "loss": 178086.8125, + "step": 69560 + }, + { + "epoch": 0.14053580158130552, + "grad_norm": 123564.203125, + "learning_rate": 9.95340699514236e-06, + "loss": 96621.2188, + "step": 69570 + }, + { + "epoch": 0.14055600221398934, + "grad_norm": 432560.8125, + "learning_rate": 9.953359440397738e-06, + "loss": 142074.3875, + "step": 69580 + }, + { + "epoch": 0.14057620284667316, + "grad_norm": 2509908.5, + "learning_rate": 9.953311861511043e-06, + "loss": 72433.7688, + "step": 69590 + }, + { + "epoch": 0.14059640347935698, + "grad_norm": 144098.53125, + "learning_rate": 9.953264258482505e-06, + "loss": 137315.9, + "step": 69600 + }, + { + "epoch": 0.1406166041120408, + "grad_norm": 264667.78125, + "learning_rate": 9.953216631312358e-06, + "loss": 46225.8375, + "step": 69610 + }, + { + "epoch": 0.1406368047447246, + "grad_norm": 126372.0234375, + "learning_rate": 9.953168980000836e-06, + "loss": 224640.375, + "step": 69620 + }, + { + "epoch": 0.1406570053774084, + "grad_norm": 49618.17578125, + "learning_rate": 9.953121304548167e-06, + "loss": 83389.15, + "step": 69630 + }, + { + "epoch": 0.14067720601009223, + "grad_norm": 274006.90625, + "learning_rate": 9.953073604954586e-06, + "loss": 192188.65, + "step": 69640 + }, + { + "epoch": 0.14069740664277605, + "grad_norm": 12230.53125, + "learning_rate": 9.953025881220325e-06, + "loss": 117820.9375, + "step": 69650 + }, + { + "epoch": 0.14071760727545987, + "grad_norm": 1028670.9375, + "learning_rate": 9.952978133345616e-06, + "loss": 90700.7125, + "step": 69660 + }, + { + "epoch": 0.1407378079081437, + "grad_norm": 24301.05078125, + "learning_rate": 9.952930361330694e-06, + "loss": 118706.325, + "step": 69670 + }, + { + "epoch": 0.1407580085408275, + "grad_norm": 15906.9931640625, + "learning_rate": 9.952882565175788e-06, + "loss": 113286.4875, + "step": 69680 + }, + { + "epoch": 0.1407782091735113, + "grad_norm": 1288912.75, + "learning_rate": 9.952834744881135e-06, + "loss": 107852.45, + "step": 69690 + }, + { + "epoch": 0.14079840980619512, + "grad_norm": 699475.75, + "learning_rate": 9.952786900446964e-06, + "loss": 123073.5, + "step": 69700 + }, + { + "epoch": 0.14081861043887894, + "grad_norm": 0.0, + "learning_rate": 9.952739031873513e-06, + "loss": 52475.85, + "step": 69710 + }, + { + "epoch": 0.14083881107156276, + "grad_norm": 851560.625, + "learning_rate": 9.952691139161012e-06, + "loss": 193589.025, + "step": 69720 + }, + { + "epoch": 0.14085901170424658, + "grad_norm": 473780.625, + "learning_rate": 9.952643222309694e-06, + "loss": 50269.1, + "step": 69730 + }, + { + "epoch": 0.1408792123369304, + "grad_norm": 313422.875, + "learning_rate": 9.952595281319794e-06, + "loss": 83327.25, + "step": 69740 + }, + { + "epoch": 0.1408994129696142, + "grad_norm": 24075.115234375, + "learning_rate": 9.952547316191545e-06, + "loss": 42356.6625, + "step": 69750 + }, + { + "epoch": 0.14091961360229802, + "grad_norm": 1076625.25, + "learning_rate": 9.95249932692518e-06, + "loss": 168510.675, + "step": 69760 + }, + { + "epoch": 0.14093981423498184, + "grad_norm": 88433.9453125, + "learning_rate": 9.952451313520937e-06, + "loss": 226185.725, + "step": 69770 + }, + { + "epoch": 0.14096001486766566, + "grad_norm": 155283.578125, + "learning_rate": 9.952403275979046e-06, + "loss": 171385.8125, + "step": 69780 + }, + { + "epoch": 0.14098021550034948, + "grad_norm": 219787.96875, + "learning_rate": 9.95235521429974e-06, + "loss": 182147.4875, + "step": 69790 + }, + { + "epoch": 0.1410004161330333, + "grad_norm": 97701.484375, + "learning_rate": 9.952307128483257e-06, + "loss": 57730.675, + "step": 69800 + }, + { + "epoch": 0.14102061676571712, + "grad_norm": 139081.3125, + "learning_rate": 9.952259018529829e-06, + "loss": 223751.425, + "step": 69810 + }, + { + "epoch": 0.1410408173984009, + "grad_norm": 453718.40625, + "learning_rate": 9.952210884439693e-06, + "loss": 85252.7125, + "step": 69820 + }, + { + "epoch": 0.14106101803108473, + "grad_norm": 506699.96875, + "learning_rate": 9.95216272621308e-06, + "loss": 51426.4938, + "step": 69830 + }, + { + "epoch": 0.14108121866376855, + "grad_norm": 1273004.25, + "learning_rate": 9.952114543850227e-06, + "loss": 170023.5125, + "step": 69840 + }, + { + "epoch": 0.14110141929645237, + "grad_norm": 155253.671875, + "learning_rate": 9.952066337351367e-06, + "loss": 41251.8156, + "step": 69850 + }, + { + "epoch": 0.1411216199291362, + "grad_norm": 46147.58203125, + "learning_rate": 9.952018106716737e-06, + "loss": 125479.1875, + "step": 69860 + }, + { + "epoch": 0.14114182056182, + "grad_norm": 2869514.5, + "learning_rate": 9.951969851946573e-06, + "loss": 169723.0125, + "step": 69870 + }, + { + "epoch": 0.1411620211945038, + "grad_norm": 3297.5908203125, + "learning_rate": 9.951921573041107e-06, + "loss": 148610.5125, + "step": 69880 + }, + { + "epoch": 0.14118222182718762, + "grad_norm": 588704.625, + "learning_rate": 9.951873270000576e-06, + "loss": 178859.2, + "step": 69890 + }, + { + "epoch": 0.14120242245987144, + "grad_norm": 322772.78125, + "learning_rate": 9.951824942825215e-06, + "loss": 245392.4, + "step": 69900 + }, + { + "epoch": 0.14122262309255526, + "grad_norm": 99982.2734375, + "learning_rate": 9.951776591515262e-06, + "loss": 245324.35, + "step": 69910 + }, + { + "epoch": 0.14124282372523908, + "grad_norm": 0.0, + "learning_rate": 9.951728216070949e-06, + "loss": 61910.6438, + "step": 69920 + }, + { + "epoch": 0.1412630243579229, + "grad_norm": 45631.9765625, + "learning_rate": 9.951679816492513e-06, + "loss": 93199.7188, + "step": 69930 + }, + { + "epoch": 0.1412832249906067, + "grad_norm": 106278.828125, + "learning_rate": 9.951631392780189e-06, + "loss": 145238.0875, + "step": 69940 + }, + { + "epoch": 0.1413034256232905, + "grad_norm": 1655723.5, + "learning_rate": 9.951582944934215e-06, + "loss": 214331.7, + "step": 69950 + }, + { + "epoch": 0.14132362625597433, + "grad_norm": 1428496.0, + "learning_rate": 9.951534472954826e-06, + "loss": 105009.4, + "step": 69960 + }, + { + "epoch": 0.14134382688865815, + "grad_norm": 52928.8046875, + "learning_rate": 9.95148597684226e-06, + "loss": 123143.65, + "step": 69970 + }, + { + "epoch": 0.14136402752134197, + "grad_norm": 51582.1796875, + "learning_rate": 9.951437456596751e-06, + "loss": 44984.0875, + "step": 69980 + }, + { + "epoch": 0.1413842281540258, + "grad_norm": 486771.21875, + "learning_rate": 9.951388912218536e-06, + "loss": 199144.675, + "step": 69990 + }, + { + "epoch": 0.1414044287867096, + "grad_norm": 535623.875, + "learning_rate": 9.951340343707852e-06, + "loss": 58406.5312, + "step": 70000 + }, + { + "epoch": 0.1414246294193934, + "grad_norm": 243288.0, + "learning_rate": 9.951291751064937e-06, + "loss": 103815.475, + "step": 70010 + }, + { + "epoch": 0.14144483005207723, + "grad_norm": 120995.421875, + "learning_rate": 9.951243134290025e-06, + "loss": 55382.8938, + "step": 70020 + }, + { + "epoch": 0.14146503068476105, + "grad_norm": 1118060.875, + "learning_rate": 9.951194493383355e-06, + "loss": 163773.6125, + "step": 70030 + }, + { + "epoch": 0.14148523131744487, + "grad_norm": 690365.9375, + "learning_rate": 9.951145828345163e-06, + "loss": 154112.925, + "step": 70040 + }, + { + "epoch": 0.14150543195012869, + "grad_norm": 907012.5625, + "learning_rate": 9.951097139175688e-06, + "loss": 116776.4, + "step": 70050 + }, + { + "epoch": 0.1415256325828125, + "grad_norm": 806510.125, + "learning_rate": 9.951048425875165e-06, + "loss": 96423.1125, + "step": 70060 + }, + { + "epoch": 0.1415458332154963, + "grad_norm": 345990.8125, + "learning_rate": 9.950999688443833e-06, + "loss": 84572.5875, + "step": 70070 + }, + { + "epoch": 0.14156603384818012, + "grad_norm": 1623701.125, + "learning_rate": 9.950950926881928e-06, + "loss": 213036.5, + "step": 70080 + }, + { + "epoch": 0.14158623448086394, + "grad_norm": 3696401.75, + "learning_rate": 9.950902141189691e-06, + "loss": 184700.7, + "step": 70090 + }, + { + "epoch": 0.14160643511354776, + "grad_norm": 134687.828125, + "learning_rate": 9.950853331367356e-06, + "loss": 238073.55, + "step": 70100 + }, + { + "epoch": 0.14162663574623158, + "grad_norm": 7006.99560546875, + "learning_rate": 9.95080449741516e-06, + "loss": 156907.2375, + "step": 70110 + }, + { + "epoch": 0.1416468363789154, + "grad_norm": 1648742.875, + "learning_rate": 9.950755639333347e-06, + "loss": 203201.025, + "step": 70120 + }, + { + "epoch": 0.1416670370115992, + "grad_norm": 752049.9375, + "learning_rate": 9.95070675712215e-06, + "loss": 197113.825, + "step": 70130 + }, + { + "epoch": 0.141687237644283, + "grad_norm": 746315.1875, + "learning_rate": 9.950657850781809e-06, + "loss": 64721.1062, + "step": 70140 + }, + { + "epoch": 0.14170743827696683, + "grad_norm": 53727.6953125, + "learning_rate": 9.95060892031256e-06, + "loss": 146163.5375, + "step": 70150 + }, + { + "epoch": 0.14172763890965065, + "grad_norm": 172882.296875, + "learning_rate": 9.950559965714647e-06, + "loss": 242644.85, + "step": 70160 + }, + { + "epoch": 0.14174783954233447, + "grad_norm": 7875.47802734375, + "learning_rate": 9.950510986988304e-06, + "loss": 76314.2125, + "step": 70170 + }, + { + "epoch": 0.1417680401750183, + "grad_norm": 219769.640625, + "learning_rate": 9.95046198413377e-06, + "loss": 95426.7875, + "step": 70180 + }, + { + "epoch": 0.1417882408077021, + "grad_norm": 55165.09375, + "learning_rate": 9.950412957151286e-06, + "loss": 129886.9875, + "step": 70190 + }, + { + "epoch": 0.1418084414403859, + "grad_norm": 0.0, + "learning_rate": 9.950363906041089e-06, + "loss": 160963.4375, + "step": 70200 + }, + { + "epoch": 0.14182864207306972, + "grad_norm": 191825.109375, + "learning_rate": 9.950314830803418e-06, + "loss": 71035.6875, + "step": 70210 + }, + { + "epoch": 0.14184884270575354, + "grad_norm": 9065067.0, + "learning_rate": 9.950265731438513e-06, + "loss": 228538.6, + "step": 70220 + }, + { + "epoch": 0.14186904333843736, + "grad_norm": 263983.90625, + "learning_rate": 9.950216607946614e-06, + "loss": 219122.8, + "step": 70230 + }, + { + "epoch": 0.14188924397112118, + "grad_norm": 929750.5625, + "learning_rate": 9.95016746032796e-06, + "loss": 146275.275, + "step": 70240 + }, + { + "epoch": 0.141909444603805, + "grad_norm": 4818.89208984375, + "learning_rate": 9.95011828858279e-06, + "loss": 206530.6625, + "step": 70250 + }, + { + "epoch": 0.1419296452364888, + "grad_norm": 67780.234375, + "learning_rate": 9.950069092711342e-06, + "loss": 110450.9, + "step": 70260 + }, + { + "epoch": 0.14194984586917261, + "grad_norm": 2251.358154296875, + "learning_rate": 9.950019872713858e-06, + "loss": 174274.5625, + "step": 70270 + }, + { + "epoch": 0.14197004650185643, + "grad_norm": 32819.125, + "learning_rate": 9.94997062859058e-06, + "loss": 33330.8469, + "step": 70280 + }, + { + "epoch": 0.14199024713454025, + "grad_norm": 737241.5, + "learning_rate": 9.949921360341743e-06, + "loss": 90894.15, + "step": 70290 + }, + { + "epoch": 0.14201044776722407, + "grad_norm": 9499.416015625, + "learning_rate": 9.94987206796759e-06, + "loss": 53341.35, + "step": 70300 + }, + { + "epoch": 0.1420306483999079, + "grad_norm": 37947.16015625, + "learning_rate": 9.949822751468364e-06, + "loss": 68763.1375, + "step": 70310 + }, + { + "epoch": 0.14205084903259171, + "grad_norm": 110150.46875, + "learning_rate": 9.949773410844299e-06, + "loss": 198515.0625, + "step": 70320 + }, + { + "epoch": 0.1420710496652755, + "grad_norm": 55040.41796875, + "learning_rate": 9.94972404609564e-06, + "loss": 121435.025, + "step": 70330 + }, + { + "epoch": 0.14209125029795933, + "grad_norm": 200770.921875, + "learning_rate": 9.949674657222624e-06, + "loss": 101871.3562, + "step": 70340 + }, + { + "epoch": 0.14211145093064315, + "grad_norm": 3052881.25, + "learning_rate": 9.949625244225496e-06, + "loss": 157894.95, + "step": 70350 + }, + { + "epoch": 0.14213165156332697, + "grad_norm": 629836.0625, + "learning_rate": 9.949575807104494e-06, + "loss": 50515.9563, + "step": 70360 + }, + { + "epoch": 0.1421518521960108, + "grad_norm": 707088.8125, + "learning_rate": 9.94952634585986e-06, + "loss": 141699.375, + "step": 70370 + }, + { + "epoch": 0.1421720528286946, + "grad_norm": 8971.9287109375, + "learning_rate": 9.949476860491836e-06, + "loss": 169536.175, + "step": 70380 + }, + { + "epoch": 0.1421922534613784, + "grad_norm": 167884.046875, + "learning_rate": 9.949427351000662e-06, + "loss": 100787.6, + "step": 70390 + }, + { + "epoch": 0.14221245409406222, + "grad_norm": 140660.203125, + "learning_rate": 9.94937781738658e-06, + "loss": 95366.8562, + "step": 70400 + }, + { + "epoch": 0.14223265472674604, + "grad_norm": 393188.65625, + "learning_rate": 9.949328259649828e-06, + "loss": 33004.3875, + "step": 70410 + }, + { + "epoch": 0.14225285535942986, + "grad_norm": 11683.609375, + "learning_rate": 9.949278677790653e-06, + "loss": 52917.5625, + "step": 70420 + }, + { + "epoch": 0.14227305599211368, + "grad_norm": 75924.3828125, + "learning_rate": 9.949229071809294e-06, + "loss": 74928.3687, + "step": 70430 + }, + { + "epoch": 0.1422932566247975, + "grad_norm": 272535.21875, + "learning_rate": 9.949179441705992e-06, + "loss": 169417.2875, + "step": 70440 + }, + { + "epoch": 0.1423134572574813, + "grad_norm": 8263.5986328125, + "learning_rate": 9.949129787480988e-06, + "loss": 64410.8938, + "step": 70450 + }, + { + "epoch": 0.1423336578901651, + "grad_norm": 71779.2421875, + "learning_rate": 9.949080109134528e-06, + "loss": 133852.6625, + "step": 70460 + }, + { + "epoch": 0.14235385852284893, + "grad_norm": 175576.296875, + "learning_rate": 9.949030406666852e-06, + "loss": 59895.4437, + "step": 70470 + }, + { + "epoch": 0.14237405915553275, + "grad_norm": 32503.126953125, + "learning_rate": 9.948980680078199e-06, + "loss": 147185.725, + "step": 70480 + }, + { + "epoch": 0.14239425978821657, + "grad_norm": 31075.58984375, + "learning_rate": 9.948930929368818e-06, + "loss": 103534.55, + "step": 70490 + }, + { + "epoch": 0.1424144604209004, + "grad_norm": 1222670.875, + "learning_rate": 9.948881154538946e-06, + "loss": 144159.4125, + "step": 70500 + }, + { + "epoch": 0.1424346610535842, + "grad_norm": 644992.875, + "learning_rate": 9.948831355588828e-06, + "loss": 156298.8125, + "step": 70510 + }, + { + "epoch": 0.142454861686268, + "grad_norm": 119990.0, + "learning_rate": 9.948781532518706e-06, + "loss": 154966.2375, + "step": 70520 + }, + { + "epoch": 0.14247506231895182, + "grad_norm": 108017.96875, + "learning_rate": 9.948731685328823e-06, + "loss": 136595.6375, + "step": 70530 + }, + { + "epoch": 0.14249526295163564, + "grad_norm": 104017.1875, + "learning_rate": 9.948681814019421e-06, + "loss": 130914.475, + "step": 70540 + }, + { + "epoch": 0.14251546358431946, + "grad_norm": 14639.193359375, + "learning_rate": 9.948631918590746e-06, + "loss": 130409.2125, + "step": 70550 + }, + { + "epoch": 0.14253566421700328, + "grad_norm": 984406.4375, + "learning_rate": 9.948581999043038e-06, + "loss": 237543.25, + "step": 70560 + }, + { + "epoch": 0.1425558648496871, + "grad_norm": 3576140.25, + "learning_rate": 9.948532055376541e-06, + "loss": 250770.45, + "step": 70570 + }, + { + "epoch": 0.1425760654823709, + "grad_norm": 1265394.375, + "learning_rate": 9.9484820875915e-06, + "loss": 108715.85, + "step": 70580 + }, + { + "epoch": 0.14259626611505472, + "grad_norm": 19507.017578125, + "learning_rate": 9.948432095688157e-06, + "loss": 74623.6, + "step": 70590 + }, + { + "epoch": 0.14261646674773854, + "grad_norm": 245879.640625, + "learning_rate": 9.948382079666756e-06, + "loss": 89728.825, + "step": 70600 + }, + { + "epoch": 0.14263666738042236, + "grad_norm": 65439.8828125, + "learning_rate": 9.948332039527541e-06, + "loss": 190812.7875, + "step": 70610 + }, + { + "epoch": 0.14265686801310618, + "grad_norm": 97049.3671875, + "learning_rate": 9.948281975270758e-06, + "loss": 76002.0125, + "step": 70620 + }, + { + "epoch": 0.14267706864579, + "grad_norm": 34364.3203125, + "learning_rate": 9.948231886896646e-06, + "loss": 39557.5531, + "step": 70630 + }, + { + "epoch": 0.14269726927847382, + "grad_norm": 1213589.75, + "learning_rate": 9.948181774405453e-06, + "loss": 138670.825, + "step": 70640 + }, + { + "epoch": 0.1427174699111576, + "grad_norm": 121773.8125, + "learning_rate": 9.94813163779742e-06, + "loss": 137940.3625, + "step": 70650 + }, + { + "epoch": 0.14273767054384143, + "grad_norm": 266259.59375, + "learning_rate": 9.948081477072797e-06, + "loss": 106334.2, + "step": 70660 + }, + { + "epoch": 0.14275787117652525, + "grad_norm": 279760.90625, + "learning_rate": 9.948031292231823e-06, + "loss": 223034.875, + "step": 70670 + }, + { + "epoch": 0.14277807180920907, + "grad_norm": 45697.6953125, + "learning_rate": 9.947981083274747e-06, + "loss": 55526.2688, + "step": 70680 + }, + { + "epoch": 0.1427982724418929, + "grad_norm": 74454.625, + "learning_rate": 9.947930850201808e-06, + "loss": 96239.6187, + "step": 70690 + }, + { + "epoch": 0.1428184730745767, + "grad_norm": 475982.40625, + "learning_rate": 9.947880593013256e-06, + "loss": 90355.0875, + "step": 70700 + }, + { + "epoch": 0.1428386737072605, + "grad_norm": 90500.359375, + "learning_rate": 9.947830311709333e-06, + "loss": 83638.7875, + "step": 70710 + }, + { + "epoch": 0.14285887433994432, + "grad_norm": 791981.6875, + "learning_rate": 9.947780006290287e-06, + "loss": 199250.15, + "step": 70720 + }, + { + "epoch": 0.14287907497262814, + "grad_norm": 66063.09375, + "learning_rate": 9.947729676756359e-06, + "loss": 65202.35, + "step": 70730 + }, + { + "epoch": 0.14289927560531196, + "grad_norm": 397290.125, + "learning_rate": 9.947679323107798e-06, + "loss": 166086.3375, + "step": 70740 + }, + { + "epoch": 0.14291947623799578, + "grad_norm": 166087.46875, + "learning_rate": 9.947628945344849e-06, + "loss": 199366.125, + "step": 70750 + }, + { + "epoch": 0.1429396768706796, + "grad_norm": 167357.859375, + "learning_rate": 9.947578543467755e-06, + "loss": 108106.175, + "step": 70760 + }, + { + "epoch": 0.1429598775033634, + "grad_norm": 462502.0625, + "learning_rate": 9.947528117476764e-06, + "loss": 173664.775, + "step": 70770 + }, + { + "epoch": 0.1429800781360472, + "grad_norm": 1348252.375, + "learning_rate": 9.94747766737212e-06, + "loss": 239619.8, + "step": 70780 + }, + { + "epoch": 0.14300027876873103, + "grad_norm": 0.0, + "learning_rate": 9.94742719315407e-06, + "loss": 164368.225, + "step": 70790 + }, + { + "epoch": 0.14302047940141485, + "grad_norm": 8327.72265625, + "learning_rate": 9.947376694822861e-06, + "loss": 103489.175, + "step": 70800 + }, + { + "epoch": 0.14304068003409867, + "grad_norm": 255406.8125, + "learning_rate": 9.947326172378736e-06, + "loss": 104604.4937, + "step": 70810 + }, + { + "epoch": 0.1430608806667825, + "grad_norm": 947223.5625, + "learning_rate": 9.947275625821947e-06, + "loss": 170236.0125, + "step": 70820 + }, + { + "epoch": 0.1430810812994663, + "grad_norm": 22230.8828125, + "learning_rate": 9.947225055152735e-06, + "loss": 111400.75, + "step": 70830 + }, + { + "epoch": 0.1431012819321501, + "grad_norm": 319828.59375, + "learning_rate": 9.947174460371347e-06, + "loss": 178601.9, + "step": 70840 + }, + { + "epoch": 0.14312148256483392, + "grad_norm": 48213.65625, + "learning_rate": 9.947123841478032e-06, + "loss": 152725.0125, + "step": 70850 + }, + { + "epoch": 0.14314168319751774, + "grad_norm": 467159.84375, + "learning_rate": 9.947073198473034e-06, + "loss": 103806.525, + "step": 70860 + }, + { + "epoch": 0.14316188383020156, + "grad_norm": 138883.734375, + "learning_rate": 9.947022531356602e-06, + "loss": 123052.2375, + "step": 70870 + }, + { + "epoch": 0.14318208446288538, + "grad_norm": 15965.115234375, + "learning_rate": 9.946971840128982e-06, + "loss": 105730.2125, + "step": 70880 + }, + { + "epoch": 0.1432022850955692, + "grad_norm": 267536.21875, + "learning_rate": 9.94692112479042e-06, + "loss": 125844.525, + "step": 70890 + }, + { + "epoch": 0.143222485728253, + "grad_norm": 663017.0625, + "learning_rate": 9.946870385341167e-06, + "loss": 78113.675, + "step": 70900 + }, + { + "epoch": 0.14324268636093682, + "grad_norm": 281857.21875, + "learning_rate": 9.946819621781467e-06, + "loss": 75798.1938, + "step": 70910 + }, + { + "epoch": 0.14326288699362064, + "grad_norm": 0.0, + "learning_rate": 9.946768834111568e-06, + "loss": 102867.1438, + "step": 70920 + }, + { + "epoch": 0.14328308762630446, + "grad_norm": 126985.1484375, + "learning_rate": 9.946718022331715e-06, + "loss": 175323.3625, + "step": 70930 + }, + { + "epoch": 0.14330328825898828, + "grad_norm": 176829.40625, + "learning_rate": 9.946667186442162e-06, + "loss": 98159.0562, + "step": 70940 + }, + { + "epoch": 0.1433234888916721, + "grad_norm": 2356852.5, + "learning_rate": 9.946616326443153e-06, + "loss": 234495.25, + "step": 70950 + }, + { + "epoch": 0.14334368952435592, + "grad_norm": 236632.125, + "learning_rate": 9.946565442334935e-06, + "loss": 120210.8, + "step": 70960 + }, + { + "epoch": 0.1433638901570397, + "grad_norm": 15827.505859375, + "learning_rate": 9.946514534117755e-06, + "loss": 79450.4688, + "step": 70970 + }, + { + "epoch": 0.14338409078972353, + "grad_norm": 190793.890625, + "learning_rate": 9.946463601791865e-06, + "loss": 201626.9375, + "step": 70980 + }, + { + "epoch": 0.14340429142240735, + "grad_norm": 298073.5625, + "learning_rate": 9.94641264535751e-06, + "loss": 132993.8625, + "step": 70990 + }, + { + "epoch": 0.14342449205509117, + "grad_norm": 33988.453125, + "learning_rate": 9.946361664814942e-06, + "loss": 169579.3875, + "step": 71000 + }, + { + "epoch": 0.143444692687775, + "grad_norm": 27877.216796875, + "learning_rate": 9.946310660164407e-06, + "loss": 43631.6062, + "step": 71010 + }, + { + "epoch": 0.1434648933204588, + "grad_norm": 297021.46875, + "learning_rate": 9.946259631406153e-06, + "loss": 58214.0125, + "step": 71020 + }, + { + "epoch": 0.1434850939531426, + "grad_norm": 687277.1875, + "learning_rate": 9.946208578540428e-06, + "loss": 147826.7625, + "step": 71030 + }, + { + "epoch": 0.14350529458582642, + "grad_norm": 600244.5, + "learning_rate": 9.946157501567484e-06, + "loss": 49616.85, + "step": 71040 + }, + { + "epoch": 0.14352549521851024, + "grad_norm": 187090.890625, + "learning_rate": 9.946106400487568e-06, + "loss": 78130.525, + "step": 71050 + }, + { + "epoch": 0.14354569585119406, + "grad_norm": 133255.328125, + "learning_rate": 9.946055275300929e-06, + "loss": 87500.025, + "step": 71060 + }, + { + "epoch": 0.14356589648387788, + "grad_norm": 2572881.5, + "learning_rate": 9.946004126007817e-06, + "loss": 114909.8, + "step": 71070 + }, + { + "epoch": 0.1435860971165617, + "grad_norm": 697478.3125, + "learning_rate": 9.94595295260848e-06, + "loss": 141981.1, + "step": 71080 + }, + { + "epoch": 0.1436062977492455, + "grad_norm": 0.0, + "learning_rate": 9.945901755103169e-06, + "loss": 146351.4, + "step": 71090 + }, + { + "epoch": 0.1436264983819293, + "grad_norm": 294994.84375, + "learning_rate": 9.945850533492132e-06, + "loss": 117662.5375, + "step": 71100 + }, + { + "epoch": 0.14364669901461313, + "grad_norm": 288175.125, + "learning_rate": 9.94579928777562e-06, + "loss": 134449.675, + "step": 71110 + }, + { + "epoch": 0.14366689964729695, + "grad_norm": 85720.078125, + "learning_rate": 9.94574801795388e-06, + "loss": 151423.25, + "step": 71120 + }, + { + "epoch": 0.14368710027998077, + "grad_norm": 26869.58203125, + "learning_rate": 9.945696724027166e-06, + "loss": 200357.925, + "step": 71130 + }, + { + "epoch": 0.1437073009126646, + "grad_norm": 206584.53125, + "learning_rate": 9.945645405995726e-06, + "loss": 90632.65, + "step": 71140 + }, + { + "epoch": 0.1437275015453484, + "grad_norm": 209464.71875, + "learning_rate": 9.94559406385981e-06, + "loss": 46121.8438, + "step": 71150 + }, + { + "epoch": 0.1437477021780322, + "grad_norm": 290629.40625, + "learning_rate": 9.945542697619667e-06, + "loss": 104142.85, + "step": 71160 + }, + { + "epoch": 0.14376790281071603, + "grad_norm": 272458.125, + "learning_rate": 9.94549130727555e-06, + "loss": 145884.075, + "step": 71170 + }, + { + "epoch": 0.14378810344339985, + "grad_norm": 19979.330078125, + "learning_rate": 9.945439892827709e-06, + "loss": 71024.2937, + "step": 71180 + }, + { + "epoch": 0.14380830407608367, + "grad_norm": 68036.734375, + "learning_rate": 9.945388454276392e-06, + "loss": 88124.4, + "step": 71190 + }, + { + "epoch": 0.14382850470876749, + "grad_norm": 64501.95703125, + "learning_rate": 9.945336991621854e-06, + "loss": 326187.3, + "step": 71200 + }, + { + "epoch": 0.1438487053414513, + "grad_norm": 331383.59375, + "learning_rate": 9.945285504864342e-06, + "loss": 115320.4, + "step": 71210 + }, + { + "epoch": 0.1438689059741351, + "grad_norm": 122503.9765625, + "learning_rate": 9.945233994004107e-06, + "loss": 92508.4, + "step": 71220 + }, + { + "epoch": 0.14388910660681892, + "grad_norm": 12269.8017578125, + "learning_rate": 9.945182459041403e-06, + "loss": 119655.625, + "step": 71230 + }, + { + "epoch": 0.14390930723950274, + "grad_norm": 83112.4765625, + "learning_rate": 9.945130899976477e-06, + "loss": 156081.2, + "step": 71240 + }, + { + "epoch": 0.14392950787218656, + "grad_norm": 21309.24609375, + "learning_rate": 9.945079316809585e-06, + "loss": 36777.2375, + "step": 71250 + }, + { + "epoch": 0.14394970850487038, + "grad_norm": 247092.34375, + "learning_rate": 9.945027709540975e-06, + "loss": 116718.325, + "step": 71260 + }, + { + "epoch": 0.1439699091375542, + "grad_norm": 237657.9375, + "learning_rate": 9.9449760781709e-06, + "loss": 148716.7375, + "step": 71270 + }, + { + "epoch": 0.14399010977023802, + "grad_norm": 11813.0791015625, + "learning_rate": 9.944924422699613e-06, + "loss": 79382.3625, + "step": 71280 + }, + { + "epoch": 0.1440103104029218, + "grad_norm": 48140.3828125, + "learning_rate": 9.944872743127363e-06, + "loss": 214997.575, + "step": 71290 + }, + { + "epoch": 0.14403051103560563, + "grad_norm": 225638.890625, + "learning_rate": 9.944821039454403e-06, + "loss": 152457.6875, + "step": 71300 + }, + { + "epoch": 0.14405071166828945, + "grad_norm": 133195.203125, + "learning_rate": 9.944769311680984e-06, + "loss": 231636.375, + "step": 71310 + }, + { + "epoch": 0.14407091230097327, + "grad_norm": 120436.3125, + "learning_rate": 9.94471755980736e-06, + "loss": 84137.125, + "step": 71320 + }, + { + "epoch": 0.1440911129336571, + "grad_norm": 634733.375, + "learning_rate": 9.944665783833782e-06, + "loss": 82231.975, + "step": 71330 + }, + { + "epoch": 0.1441113135663409, + "grad_norm": 552183.5, + "learning_rate": 9.944613983760503e-06, + "loss": 127974.25, + "step": 71340 + }, + { + "epoch": 0.1441315141990247, + "grad_norm": 246018.625, + "learning_rate": 9.944562159587774e-06, + "loss": 151791.775, + "step": 71350 + }, + { + "epoch": 0.14415171483170852, + "grad_norm": 12697.712890625, + "learning_rate": 9.94451031131585e-06, + "loss": 81337.4875, + "step": 71360 + }, + { + "epoch": 0.14417191546439234, + "grad_norm": 65381.76953125, + "learning_rate": 9.944458438944983e-06, + "loss": 96139.375, + "step": 71370 + }, + { + "epoch": 0.14419211609707616, + "grad_norm": 31077.173828125, + "learning_rate": 9.944406542475425e-06, + "loss": 132266.3875, + "step": 71380 + }, + { + "epoch": 0.14421231672975998, + "grad_norm": 1800510.375, + "learning_rate": 9.944354621907428e-06, + "loss": 74599.2437, + "step": 71390 + }, + { + "epoch": 0.1442325173624438, + "grad_norm": 150278.328125, + "learning_rate": 9.944302677241247e-06, + "loss": 101919.2188, + "step": 71400 + }, + { + "epoch": 0.1442527179951276, + "grad_norm": 535383.375, + "learning_rate": 9.944250708477135e-06, + "loss": 153907.4375, + "step": 71410 + }, + { + "epoch": 0.14427291862781141, + "grad_norm": 941580.625, + "learning_rate": 9.944198715615343e-06, + "loss": 253625.1, + "step": 71420 + }, + { + "epoch": 0.14429311926049523, + "grad_norm": 206171.53125, + "learning_rate": 9.944146698656127e-06, + "loss": 137001.6125, + "step": 71430 + }, + { + "epoch": 0.14431331989317905, + "grad_norm": 68935.3046875, + "learning_rate": 9.94409465759974e-06, + "loss": 66314.3375, + "step": 71440 + }, + { + "epoch": 0.14433352052586287, + "grad_norm": 144439.3125, + "learning_rate": 9.944042592446434e-06, + "loss": 44869.8625, + "step": 71450 + }, + { + "epoch": 0.1443537211585467, + "grad_norm": 39348.72265625, + "learning_rate": 9.943990503196466e-06, + "loss": 163949.55, + "step": 71460 + }, + { + "epoch": 0.14437392179123051, + "grad_norm": 379455.4375, + "learning_rate": 9.943938389850087e-06, + "loss": 107502.325, + "step": 71470 + }, + { + "epoch": 0.1443941224239143, + "grad_norm": 46873.5625, + "learning_rate": 9.943886252407551e-06, + "loss": 177761.475, + "step": 71480 + }, + { + "epoch": 0.14441432305659813, + "grad_norm": 105712.3359375, + "learning_rate": 9.943834090869116e-06, + "loss": 91608.825, + "step": 71490 + }, + { + "epoch": 0.14443452368928195, + "grad_norm": 25525.6875, + "learning_rate": 9.94378190523503e-06, + "loss": 107744.2875, + "step": 71500 + }, + { + "epoch": 0.14445472432196577, + "grad_norm": 50047.56640625, + "learning_rate": 9.943729695505552e-06, + "loss": 88437.5938, + "step": 71510 + }, + { + "epoch": 0.1444749249546496, + "grad_norm": 166264.453125, + "learning_rate": 9.943677461680935e-06, + "loss": 242553.85, + "step": 71520 + }, + { + "epoch": 0.1444951255873334, + "grad_norm": 235732.140625, + "learning_rate": 9.943625203761434e-06, + "loss": 67950.9563, + "step": 71530 + }, + { + "epoch": 0.1445153262200172, + "grad_norm": 20432.20703125, + "learning_rate": 9.943572921747302e-06, + "loss": 136388.7375, + "step": 71540 + }, + { + "epoch": 0.14453552685270102, + "grad_norm": 399233.65625, + "learning_rate": 9.943520615638796e-06, + "loss": 237186.275, + "step": 71550 + }, + { + "epoch": 0.14455572748538484, + "grad_norm": 30325.47265625, + "learning_rate": 9.943468285436171e-06, + "loss": 139118.675, + "step": 71560 + }, + { + "epoch": 0.14457592811806866, + "grad_norm": 374125.625, + "learning_rate": 9.94341593113968e-06, + "loss": 142978.9125, + "step": 71570 + }, + { + "epoch": 0.14459612875075248, + "grad_norm": 14512.78515625, + "learning_rate": 9.943363552749579e-06, + "loss": 55554.6875, + "step": 71580 + }, + { + "epoch": 0.1446163293834363, + "grad_norm": 1097593.5, + "learning_rate": 9.943311150266124e-06, + "loss": 65858.1625, + "step": 71590 + }, + { + "epoch": 0.14463653001612012, + "grad_norm": 538962.1875, + "learning_rate": 9.94325872368957e-06, + "loss": 192735.0, + "step": 71600 + }, + { + "epoch": 0.1446567306488039, + "grad_norm": 255164.421875, + "learning_rate": 9.943206273020174e-06, + "loss": 290107.45, + "step": 71610 + }, + { + "epoch": 0.14467693128148773, + "grad_norm": 439210.28125, + "learning_rate": 9.943153798258188e-06, + "loss": 117155.7, + "step": 71620 + }, + { + "epoch": 0.14469713191417155, + "grad_norm": 752925.9375, + "learning_rate": 9.94310129940387e-06, + "loss": 93406.0625, + "step": 71630 + }, + { + "epoch": 0.14471733254685537, + "grad_norm": 19147.3671875, + "learning_rate": 9.943048776457479e-06, + "loss": 73738.1187, + "step": 71640 + }, + { + "epoch": 0.1447375331795392, + "grad_norm": 52865.85546875, + "learning_rate": 9.942996229419264e-06, + "loss": 83580.1562, + "step": 71650 + }, + { + "epoch": 0.144757733812223, + "grad_norm": 199097.15625, + "learning_rate": 9.942943658289487e-06, + "loss": 57969.5312, + "step": 71660 + }, + { + "epoch": 0.1447779344449068, + "grad_norm": 209228.828125, + "learning_rate": 9.942891063068401e-06, + "loss": 68128.4312, + "step": 71670 + }, + { + "epoch": 0.14479813507759062, + "grad_norm": 1265456.25, + "learning_rate": 9.942838443756265e-06, + "loss": 206301.35, + "step": 71680 + }, + { + "epoch": 0.14481833571027444, + "grad_norm": 134207.140625, + "learning_rate": 9.942785800353332e-06, + "loss": 114249.0125, + "step": 71690 + }, + { + "epoch": 0.14483853634295826, + "grad_norm": 2187520.0, + "learning_rate": 9.942733132859861e-06, + "loss": 216160.075, + "step": 71700 + }, + { + "epoch": 0.14485873697564208, + "grad_norm": 432830.9375, + "learning_rate": 9.94268044127611e-06, + "loss": 78453.4375, + "step": 71710 + }, + { + "epoch": 0.1448789376083259, + "grad_norm": 45847.90234375, + "learning_rate": 9.942627725602332e-06, + "loss": 104898.8875, + "step": 71720 + }, + { + "epoch": 0.1448991382410097, + "grad_norm": 311835.625, + "learning_rate": 9.942574985838785e-06, + "loss": 94777.5437, + "step": 71730 + }, + { + "epoch": 0.14491933887369352, + "grad_norm": 56873.94140625, + "learning_rate": 9.942522221985728e-06, + "loss": 93977.3438, + "step": 71740 + }, + { + "epoch": 0.14493953950637734, + "grad_norm": 2919015.25, + "learning_rate": 9.942469434043418e-06, + "loss": 200808.3375, + "step": 71750 + }, + { + "epoch": 0.14495974013906116, + "grad_norm": 2059011.875, + "learning_rate": 9.942416622012113e-06, + "loss": 262334.05, + "step": 71760 + }, + { + "epoch": 0.14497994077174498, + "grad_norm": 158676.15625, + "learning_rate": 9.942363785892065e-06, + "loss": 89011.3062, + "step": 71770 + }, + { + "epoch": 0.1450001414044288, + "grad_norm": 8152995.5, + "learning_rate": 9.942310925683538e-06, + "loss": 188493.25, + "step": 71780 + }, + { + "epoch": 0.14502034203711262, + "grad_norm": 911395.3125, + "learning_rate": 9.942258041386785e-06, + "loss": 100095.0375, + "step": 71790 + }, + { + "epoch": 0.1450405426697964, + "grad_norm": 938078.875, + "learning_rate": 9.942205133002067e-06, + "loss": 70842.85, + "step": 71800 + }, + { + "epoch": 0.14506074330248023, + "grad_norm": 9742.0576171875, + "learning_rate": 9.94215220052964e-06, + "loss": 59833.6125, + "step": 71810 + }, + { + "epoch": 0.14508094393516405, + "grad_norm": 200750.734375, + "learning_rate": 9.942099243969765e-06, + "loss": 102295.7063, + "step": 71820 + }, + { + "epoch": 0.14510114456784787, + "grad_norm": 397876.5625, + "learning_rate": 9.942046263322694e-06, + "loss": 105842.9625, + "step": 71830 + }, + { + "epoch": 0.1451213452005317, + "grad_norm": 159132.484375, + "learning_rate": 9.941993258588691e-06, + "loss": 71863.5375, + "step": 71840 + }, + { + "epoch": 0.1451415458332155, + "grad_norm": 398958.84375, + "learning_rate": 9.941940229768012e-06, + "loss": 97860.7188, + "step": 71850 + }, + { + "epoch": 0.1451617464658993, + "grad_norm": 1745429.75, + "learning_rate": 9.941887176860916e-06, + "loss": 203037.7625, + "step": 71860 + }, + { + "epoch": 0.14518194709858312, + "grad_norm": 118269.8984375, + "learning_rate": 9.94183409986766e-06, + "loss": 66384.425, + "step": 71870 + }, + { + "epoch": 0.14520214773126694, + "grad_norm": 53743.66796875, + "learning_rate": 9.941780998788506e-06, + "loss": 154363.825, + "step": 71880 + }, + { + "epoch": 0.14522234836395076, + "grad_norm": 828546.375, + "learning_rate": 9.941727873623709e-06, + "loss": 150953.1, + "step": 71890 + }, + { + "epoch": 0.14524254899663458, + "grad_norm": 502576.84375, + "learning_rate": 9.94167472437353e-06, + "loss": 67173.5312, + "step": 71900 + }, + { + "epoch": 0.1452627496293184, + "grad_norm": 41078.80859375, + "learning_rate": 9.941621551038228e-06, + "loss": 57137.9, + "step": 71910 + }, + { + "epoch": 0.14528295026200222, + "grad_norm": 25154.228515625, + "learning_rate": 9.941568353618064e-06, + "loss": 73336.3188, + "step": 71920 + }, + { + "epoch": 0.145303150894686, + "grad_norm": 1290831.875, + "learning_rate": 9.941515132113291e-06, + "loss": 155249.45, + "step": 71930 + }, + { + "epoch": 0.14532335152736983, + "grad_norm": 238509.375, + "learning_rate": 9.941461886524176e-06, + "loss": 89540.0063, + "step": 71940 + }, + { + "epoch": 0.14534355216005365, + "grad_norm": 53127.06640625, + "learning_rate": 9.941408616850974e-06, + "loss": 89791.35, + "step": 71950 + }, + { + "epoch": 0.14536375279273747, + "grad_norm": 132947.0, + "learning_rate": 9.941355323093944e-06, + "loss": 66700.8125, + "step": 71960 + }, + { + "epoch": 0.1453839534254213, + "grad_norm": 41887.609375, + "learning_rate": 9.94130200525335e-06, + "loss": 179631.9375, + "step": 71970 + }, + { + "epoch": 0.1454041540581051, + "grad_norm": 284372.25, + "learning_rate": 9.941248663329448e-06, + "loss": 75648.3188, + "step": 71980 + }, + { + "epoch": 0.1454243546907889, + "grad_norm": 176632.9375, + "learning_rate": 9.941195297322498e-06, + "loss": 228697.3, + "step": 71990 + }, + { + "epoch": 0.14544455532347272, + "grad_norm": 432153.25, + "learning_rate": 9.941141907232766e-06, + "loss": 267960.975, + "step": 72000 + }, + { + "epoch": 0.14546475595615654, + "grad_norm": 198702.484375, + "learning_rate": 9.941088493060504e-06, + "loss": 41618.7781, + "step": 72010 + }, + { + "epoch": 0.14548495658884036, + "grad_norm": 152492.78125, + "learning_rate": 9.941035054805977e-06, + "loss": 59453.8313, + "step": 72020 + }, + { + "epoch": 0.14550515722152418, + "grad_norm": 441475.25, + "learning_rate": 9.940981592469443e-06, + "loss": 90241.2125, + "step": 72030 + }, + { + "epoch": 0.145525357854208, + "grad_norm": 35229.97265625, + "learning_rate": 9.940928106051166e-06, + "loss": 147257.925, + "step": 72040 + }, + { + "epoch": 0.1455455584868918, + "grad_norm": 1353823.375, + "learning_rate": 9.940874595551403e-06, + "loss": 149708.95, + "step": 72050 + }, + { + "epoch": 0.14556575911957562, + "grad_norm": 3369.8525390625, + "learning_rate": 9.940821060970418e-06, + "loss": 71636.1875, + "step": 72060 + }, + { + "epoch": 0.14558595975225944, + "grad_norm": 1623997.5, + "learning_rate": 9.940767502308469e-06, + "loss": 336464.05, + "step": 72070 + }, + { + "epoch": 0.14560616038494326, + "grad_norm": 67692.96875, + "learning_rate": 9.940713919565819e-06, + "loss": 103036.9625, + "step": 72080 + }, + { + "epoch": 0.14562636101762708, + "grad_norm": 41952.98828125, + "learning_rate": 9.94066031274273e-06, + "loss": 108543.425, + "step": 72090 + }, + { + "epoch": 0.1456465616503109, + "grad_norm": 99410.2421875, + "learning_rate": 9.94060668183946e-06, + "loss": 154719.2125, + "step": 72100 + }, + { + "epoch": 0.14566676228299472, + "grad_norm": 1575550.25, + "learning_rate": 9.940553026856273e-06, + "loss": 247820.25, + "step": 72110 + }, + { + "epoch": 0.1456869629156785, + "grad_norm": 1115397.5, + "learning_rate": 9.940499347793429e-06, + "loss": 159925.15, + "step": 72120 + }, + { + "epoch": 0.14570716354836233, + "grad_norm": 2078018.0, + "learning_rate": 9.940445644651191e-06, + "loss": 239874.9, + "step": 72130 + }, + { + "epoch": 0.14572736418104615, + "grad_norm": 31973.3359375, + "learning_rate": 9.94039191742982e-06, + "loss": 72640.8875, + "step": 72140 + }, + { + "epoch": 0.14574756481372997, + "grad_norm": 21450.49609375, + "learning_rate": 9.940338166129578e-06, + "loss": 77211.375, + "step": 72150 + }, + { + "epoch": 0.1457677654464138, + "grad_norm": 58665.1015625, + "learning_rate": 9.940284390750727e-06, + "loss": 137084.25, + "step": 72160 + }, + { + "epoch": 0.1457879660790976, + "grad_norm": 64736.98046875, + "learning_rate": 9.94023059129353e-06, + "loss": 138280.0375, + "step": 72170 + }, + { + "epoch": 0.1458081667117814, + "grad_norm": 72224.7578125, + "learning_rate": 9.940176767758247e-06, + "loss": 197463.9375, + "step": 72180 + }, + { + "epoch": 0.14582836734446522, + "grad_norm": 286364.1875, + "learning_rate": 9.940122920145142e-06, + "loss": 67872.2, + "step": 72190 + }, + { + "epoch": 0.14584856797714904, + "grad_norm": 850770.375, + "learning_rate": 9.940069048454478e-06, + "loss": 181052.6, + "step": 72200 + }, + { + "epoch": 0.14586876860983286, + "grad_norm": 114135.7265625, + "learning_rate": 9.940015152686514e-06, + "loss": 81863.8188, + "step": 72210 + }, + { + "epoch": 0.14588896924251668, + "grad_norm": 601993.3125, + "learning_rate": 9.939961232841517e-06, + "loss": 87016.9375, + "step": 72220 + }, + { + "epoch": 0.1459091698752005, + "grad_norm": 441895.96875, + "learning_rate": 9.939907288919749e-06, + "loss": 344356.125, + "step": 72230 + }, + { + "epoch": 0.14592937050788432, + "grad_norm": 12994.6005859375, + "learning_rate": 9.93985332092147e-06, + "loss": 162401.45, + "step": 72240 + }, + { + "epoch": 0.1459495711405681, + "grad_norm": 78435.734375, + "learning_rate": 9.939799328846947e-06, + "loss": 193603.2375, + "step": 72250 + }, + { + "epoch": 0.14596977177325193, + "grad_norm": 1316764.75, + "learning_rate": 9.93974531269644e-06, + "loss": 272913.375, + "step": 72260 + }, + { + "epoch": 0.14598997240593575, + "grad_norm": 66961.3359375, + "learning_rate": 9.939691272470214e-06, + "loss": 78117.6313, + "step": 72270 + }, + { + "epoch": 0.14601017303861957, + "grad_norm": 691634.5, + "learning_rate": 9.939637208168532e-06, + "loss": 115679.3, + "step": 72280 + }, + { + "epoch": 0.1460303736713034, + "grad_norm": 315936.1875, + "learning_rate": 9.939583119791656e-06, + "loss": 46491.7625, + "step": 72290 + }, + { + "epoch": 0.1460505743039872, + "grad_norm": 464016.09375, + "learning_rate": 9.939529007339852e-06, + "loss": 60258.5875, + "step": 72300 + }, + { + "epoch": 0.146070774936671, + "grad_norm": 89321.6953125, + "learning_rate": 9.939474870813383e-06, + "loss": 71546.7937, + "step": 72310 + }, + { + "epoch": 0.14609097556935483, + "grad_norm": 50029.546875, + "learning_rate": 9.939420710212511e-06, + "loss": 158523.2125, + "step": 72320 + }, + { + "epoch": 0.14611117620203865, + "grad_norm": 4078.58984375, + "learning_rate": 9.939366525537503e-06, + "loss": 188405.825, + "step": 72330 + }, + { + "epoch": 0.14613137683472247, + "grad_norm": 978509.875, + "learning_rate": 9.939312316788622e-06, + "loss": 239247.25, + "step": 72340 + }, + { + "epoch": 0.14615157746740629, + "grad_norm": 278221.21875, + "learning_rate": 9.93925808396613e-06, + "loss": 172829.5, + "step": 72350 + }, + { + "epoch": 0.1461717781000901, + "grad_norm": 256234.390625, + "learning_rate": 9.939203827070296e-06, + "loss": 237154.125, + "step": 72360 + }, + { + "epoch": 0.1461919787327739, + "grad_norm": 2797107.5, + "learning_rate": 9.939149546101379e-06, + "loss": 181859.9, + "step": 72370 + }, + { + "epoch": 0.14621217936545772, + "grad_norm": 41259.953125, + "learning_rate": 9.939095241059648e-06, + "loss": 126921.7125, + "step": 72380 + }, + { + "epoch": 0.14623237999814154, + "grad_norm": 598630.375, + "learning_rate": 9.939040911945365e-06, + "loss": 81585.9187, + "step": 72390 + }, + { + "epoch": 0.14625258063082536, + "grad_norm": 2108091.0, + "learning_rate": 9.938986558758795e-06, + "loss": 119335.775, + "step": 72400 + }, + { + "epoch": 0.14627278126350918, + "grad_norm": 96916.53125, + "learning_rate": 9.938932181500206e-06, + "loss": 67869.1938, + "step": 72410 + }, + { + "epoch": 0.146292981896193, + "grad_norm": 177160.734375, + "learning_rate": 9.938877780169858e-06, + "loss": 174369.425, + "step": 72420 + }, + { + "epoch": 0.14631318252887682, + "grad_norm": 101332.90625, + "learning_rate": 9.938823354768019e-06, + "loss": 114433.8125, + "step": 72430 + }, + { + "epoch": 0.1463333831615606, + "grad_norm": 2163923.5, + "learning_rate": 9.938768905294954e-06, + "loss": 215728.95, + "step": 72440 + }, + { + "epoch": 0.14635358379424443, + "grad_norm": 554447.1875, + "learning_rate": 9.938714431750928e-06, + "loss": 63768.1125, + "step": 72450 + }, + { + "epoch": 0.14637378442692825, + "grad_norm": 23486.11328125, + "learning_rate": 9.938659934136208e-06, + "loss": 189607.5, + "step": 72460 + }, + { + "epoch": 0.14639398505961207, + "grad_norm": 11200.181640625, + "learning_rate": 9.93860541245106e-06, + "loss": 116993.575, + "step": 72470 + }, + { + "epoch": 0.1464141856922959, + "grad_norm": 449424.0, + "learning_rate": 9.938550866695745e-06, + "loss": 133260.4125, + "step": 72480 + }, + { + "epoch": 0.1464343863249797, + "grad_norm": 373546.0625, + "learning_rate": 9.938496296870532e-06, + "loss": 50278.0219, + "step": 72490 + }, + { + "epoch": 0.1464545869576635, + "grad_norm": 58243.48046875, + "learning_rate": 9.938441702975689e-06, + "loss": 123272.0125, + "step": 72500 + }, + { + "epoch": 0.14647478759034732, + "grad_norm": 167012.90625, + "learning_rate": 9.93838708501148e-06, + "loss": 130042.4125, + "step": 72510 + }, + { + "epoch": 0.14649498822303114, + "grad_norm": 244125.109375, + "learning_rate": 9.93833244297817e-06, + "loss": 107041.05, + "step": 72520 + }, + { + "epoch": 0.14651518885571496, + "grad_norm": 415125.34375, + "learning_rate": 9.938277776876029e-06, + "loss": 313850.95, + "step": 72530 + }, + { + "epoch": 0.14653538948839878, + "grad_norm": 1039586.0, + "learning_rate": 9.938223086705318e-06, + "loss": 371873.525, + "step": 72540 + }, + { + "epoch": 0.1465555901210826, + "grad_norm": 44121.671875, + "learning_rate": 9.938168372466308e-06, + "loss": 88488.0625, + "step": 72550 + }, + { + "epoch": 0.14657579075376642, + "grad_norm": 2832070.0, + "learning_rate": 9.938113634159266e-06, + "loss": 97154.1625, + "step": 72560 + }, + { + "epoch": 0.14659599138645021, + "grad_norm": 3556.123291015625, + "learning_rate": 9.938058871784453e-06, + "loss": 63989.3438, + "step": 72570 + }, + { + "epoch": 0.14661619201913403, + "grad_norm": 347184.90625, + "learning_rate": 9.938004085342144e-06, + "loss": 159709.775, + "step": 72580 + }, + { + "epoch": 0.14663639265181785, + "grad_norm": 520943.96875, + "learning_rate": 9.9379492748326e-06, + "loss": 71795.0688, + "step": 72590 + }, + { + "epoch": 0.14665659328450167, + "grad_norm": 341752.9375, + "learning_rate": 9.937894440256091e-06, + "loss": 135177.75, + "step": 72600 + }, + { + "epoch": 0.1466767939171855, + "grad_norm": 125839.0234375, + "learning_rate": 9.937839581612883e-06, + "loss": 86451.575, + "step": 72610 + }, + { + "epoch": 0.14669699454986931, + "grad_norm": 1272923.5, + "learning_rate": 9.937784698903244e-06, + "loss": 81985.25, + "step": 72620 + }, + { + "epoch": 0.1467171951825531, + "grad_norm": 136577.34375, + "learning_rate": 9.937729792127439e-06, + "loss": 75671.8125, + "step": 72630 + }, + { + "epoch": 0.14673739581523693, + "grad_norm": 68929.921875, + "learning_rate": 9.93767486128574e-06, + "loss": 143138.525, + "step": 72640 + }, + { + "epoch": 0.14675759644792075, + "grad_norm": 9896.1552734375, + "learning_rate": 9.937619906378413e-06, + "loss": 86033.0, + "step": 72650 + }, + { + "epoch": 0.14677779708060457, + "grad_norm": 861214.375, + "learning_rate": 9.937564927405724e-06, + "loss": 189754.1625, + "step": 72660 + }, + { + "epoch": 0.1467979977132884, + "grad_norm": 99163.8203125, + "learning_rate": 9.937509924367944e-06, + "loss": 48418.325, + "step": 72670 + }, + { + "epoch": 0.1468181983459722, + "grad_norm": 464633.84375, + "learning_rate": 9.937454897265338e-06, + "loss": 209007.1125, + "step": 72680 + }, + { + "epoch": 0.146838398978656, + "grad_norm": 1126340.375, + "learning_rate": 9.937399846098177e-06, + "loss": 112155.425, + "step": 72690 + }, + { + "epoch": 0.14685859961133982, + "grad_norm": 36871.625, + "learning_rate": 9.937344770866727e-06, + "loss": 156711.725, + "step": 72700 + }, + { + "epoch": 0.14687880024402364, + "grad_norm": 0.0, + "learning_rate": 9.937289671571257e-06, + "loss": 44603.0687, + "step": 72710 + }, + { + "epoch": 0.14689900087670746, + "grad_norm": 62561.06640625, + "learning_rate": 9.937234548212038e-06, + "loss": 179568.75, + "step": 72720 + }, + { + "epoch": 0.14691920150939128, + "grad_norm": 552964.0625, + "learning_rate": 9.937179400789336e-06, + "loss": 135947.125, + "step": 72730 + }, + { + "epoch": 0.1469394021420751, + "grad_norm": 4631875.5, + "learning_rate": 9.937124229303419e-06, + "loss": 201663.4125, + "step": 72740 + }, + { + "epoch": 0.14695960277475892, + "grad_norm": 40201.46875, + "learning_rate": 9.937069033754558e-06, + "loss": 110894.75, + "step": 72750 + }, + { + "epoch": 0.1469798034074427, + "grad_norm": 79202.9765625, + "learning_rate": 9.937013814143021e-06, + "loss": 70793.2312, + "step": 72760 + }, + { + "epoch": 0.14700000404012653, + "grad_norm": 1211046.875, + "learning_rate": 9.936958570469077e-06, + "loss": 118118.0, + "step": 72770 + }, + { + "epoch": 0.14702020467281035, + "grad_norm": 29946.90625, + "learning_rate": 9.936903302732997e-06, + "loss": 164743.425, + "step": 72780 + }, + { + "epoch": 0.14704040530549417, + "grad_norm": 24342.54296875, + "learning_rate": 9.936848010935049e-06, + "loss": 116898.3125, + "step": 72790 + }, + { + "epoch": 0.147060605938178, + "grad_norm": 100092.421875, + "learning_rate": 9.936792695075502e-06, + "loss": 22042.8781, + "step": 72800 + }, + { + "epoch": 0.1470808065708618, + "grad_norm": 763073.25, + "learning_rate": 9.936737355154627e-06, + "loss": 93657.05, + "step": 72810 + }, + { + "epoch": 0.1471010072035456, + "grad_norm": 23030.42578125, + "learning_rate": 9.936681991172692e-06, + "loss": 130981.475, + "step": 72820 + }, + { + "epoch": 0.14712120783622942, + "grad_norm": 14379.5927734375, + "learning_rate": 9.936626603129968e-06, + "loss": 100975.0312, + "step": 72830 + }, + { + "epoch": 0.14714140846891324, + "grad_norm": 2769.40087890625, + "learning_rate": 9.936571191026726e-06, + "loss": 62686.8187, + "step": 72840 + }, + { + "epoch": 0.14716160910159706, + "grad_norm": 313312.375, + "learning_rate": 9.936515754863231e-06, + "loss": 68728.5125, + "step": 72850 + }, + { + "epoch": 0.14718180973428088, + "grad_norm": 85906.7421875, + "learning_rate": 9.93646029463976e-06, + "loss": 335680.975, + "step": 72860 + }, + { + "epoch": 0.1472020103669647, + "grad_norm": 67711.640625, + "learning_rate": 9.93640481035658e-06, + "loss": 70645.6562, + "step": 72870 + }, + { + "epoch": 0.14722221099964852, + "grad_norm": 552845.125, + "learning_rate": 9.936349302013962e-06, + "loss": 84374.85, + "step": 72880 + }, + { + "epoch": 0.14724241163233232, + "grad_norm": 964645.3125, + "learning_rate": 9.936293769612175e-06, + "loss": 115088.8, + "step": 72890 + }, + { + "epoch": 0.14726261226501614, + "grad_norm": 452909.0625, + "learning_rate": 9.936238213151491e-06, + "loss": 122139.9875, + "step": 72900 + }, + { + "epoch": 0.14728281289769996, + "grad_norm": 202997.90625, + "learning_rate": 9.93618263263218e-06, + "loss": 131867.725, + "step": 72910 + }, + { + "epoch": 0.14730301353038378, + "grad_norm": 417369.09375, + "learning_rate": 9.936127028054516e-06, + "loss": 101981.3188, + "step": 72920 + }, + { + "epoch": 0.1473232141630676, + "grad_norm": 28441.216796875, + "learning_rate": 9.936071399418764e-06, + "loss": 63927.65, + "step": 72930 + }, + { + "epoch": 0.14734341479575142, + "grad_norm": 198061.1875, + "learning_rate": 9.936015746725202e-06, + "loss": 44345.025, + "step": 72940 + }, + { + "epoch": 0.1473636154284352, + "grad_norm": 96693.4453125, + "learning_rate": 9.935960069974096e-06, + "loss": 44007.4156, + "step": 72950 + }, + { + "epoch": 0.14738381606111903, + "grad_norm": 758495.1875, + "learning_rate": 9.93590436916572e-06, + "loss": 118572.2625, + "step": 72960 + }, + { + "epoch": 0.14740401669380285, + "grad_norm": 28640.6015625, + "learning_rate": 9.935848644300345e-06, + "loss": 40998.575, + "step": 72970 + }, + { + "epoch": 0.14742421732648667, + "grad_norm": 0.0, + "learning_rate": 9.935792895378243e-06, + "loss": 148107.675, + "step": 72980 + }, + { + "epoch": 0.1474444179591705, + "grad_norm": 2092.55908203125, + "learning_rate": 9.935737122399683e-06, + "loss": 134634.3125, + "step": 72990 + }, + { + "epoch": 0.1474646185918543, + "grad_norm": 63562.05078125, + "learning_rate": 9.93568132536494e-06, + "loss": 45151.6687, + "step": 73000 + }, + { + "epoch": 0.1474848192245381, + "grad_norm": 105314.7109375, + "learning_rate": 9.935625504274284e-06, + "loss": 42887.4625, + "step": 73010 + }, + { + "epoch": 0.14750501985722192, + "grad_norm": 332469.375, + "learning_rate": 9.93556965912799e-06, + "loss": 129940.575, + "step": 73020 + }, + { + "epoch": 0.14752522048990574, + "grad_norm": 28338.5390625, + "learning_rate": 9.935513789926327e-06, + "loss": 74977.6812, + "step": 73030 + }, + { + "epoch": 0.14754542112258956, + "grad_norm": 374774.34375, + "learning_rate": 9.935457896669568e-06, + "loss": 164543.225, + "step": 73040 + }, + { + "epoch": 0.14756562175527338, + "grad_norm": 177745.0625, + "learning_rate": 9.935401979357985e-06, + "loss": 30261.0563, + "step": 73050 + }, + { + "epoch": 0.1475858223879572, + "grad_norm": 395701.15625, + "learning_rate": 9.935346037991854e-06, + "loss": 118594.4375, + "step": 73060 + }, + { + "epoch": 0.14760602302064102, + "grad_norm": 630697.375, + "learning_rate": 9.935290072571442e-06, + "loss": 103165.0, + "step": 73070 + }, + { + "epoch": 0.1476262236533248, + "grad_norm": 26098.74609375, + "learning_rate": 9.935234083097028e-06, + "loss": 157862.075, + "step": 73080 + }, + { + "epoch": 0.14764642428600863, + "grad_norm": 248678.453125, + "learning_rate": 9.935178069568878e-06, + "loss": 153982.0125, + "step": 73090 + }, + { + "epoch": 0.14766662491869245, + "grad_norm": 44965.7265625, + "learning_rate": 9.93512203198727e-06, + "loss": 142552.7125, + "step": 73100 + }, + { + "epoch": 0.14768682555137627, + "grad_norm": 166191.9375, + "learning_rate": 9.935065970352477e-06, + "loss": 57568.7375, + "step": 73110 + }, + { + "epoch": 0.1477070261840601, + "grad_norm": 89414.4375, + "learning_rate": 9.93500988466477e-06, + "loss": 185883.475, + "step": 73120 + }, + { + "epoch": 0.1477272268167439, + "grad_norm": 39345.61328125, + "learning_rate": 9.934953774924425e-06, + "loss": 139905.225, + "step": 73130 + }, + { + "epoch": 0.1477474274494277, + "grad_norm": 244645.71875, + "learning_rate": 9.934897641131712e-06, + "loss": 72542.7563, + "step": 73140 + }, + { + "epoch": 0.14776762808211152, + "grad_norm": 512703.1875, + "learning_rate": 9.934841483286907e-06, + "loss": 173262.3875, + "step": 73150 + }, + { + "epoch": 0.14778782871479534, + "grad_norm": 8582.7841796875, + "learning_rate": 9.934785301390282e-06, + "loss": 114404.2125, + "step": 73160 + }, + { + "epoch": 0.14780802934747916, + "grad_norm": 1062115.5, + "learning_rate": 9.934729095442113e-06, + "loss": 240061.95, + "step": 73170 + }, + { + "epoch": 0.14782822998016298, + "grad_norm": 153117.328125, + "learning_rate": 9.934672865442673e-06, + "loss": 65500.8375, + "step": 73180 + }, + { + "epoch": 0.1478484306128468, + "grad_norm": 136076.65625, + "learning_rate": 9.934616611392235e-06, + "loss": 91667.675, + "step": 73190 + }, + { + "epoch": 0.1478686312455306, + "grad_norm": 1445598.75, + "learning_rate": 9.934560333291077e-06, + "loss": 230509.95, + "step": 73200 + }, + { + "epoch": 0.14788883187821442, + "grad_norm": 75220.140625, + "learning_rate": 9.934504031139468e-06, + "loss": 100543.1625, + "step": 73210 + }, + { + "epoch": 0.14790903251089824, + "grad_norm": 87365.5390625, + "learning_rate": 9.934447704937684e-06, + "loss": 144734.0875, + "step": 73220 + }, + { + "epoch": 0.14792923314358206, + "grad_norm": 0.0, + "learning_rate": 9.934391354686002e-06, + "loss": 103323.2875, + "step": 73230 + }, + { + "epoch": 0.14794943377626588, + "grad_norm": 26754.146484375, + "learning_rate": 9.934334980384694e-06, + "loss": 62398.3625, + "step": 73240 + }, + { + "epoch": 0.1479696344089497, + "grad_norm": 14568.1376953125, + "learning_rate": 9.934278582034037e-06, + "loss": 144329.2125, + "step": 73250 + }, + { + "epoch": 0.14798983504163352, + "grad_norm": 447703.28125, + "learning_rate": 9.934222159634303e-06, + "loss": 112409.5875, + "step": 73260 + }, + { + "epoch": 0.1480100356743173, + "grad_norm": 270827.15625, + "learning_rate": 9.93416571318577e-06, + "loss": 44152.2063, + "step": 73270 + }, + { + "epoch": 0.14803023630700113, + "grad_norm": 459746.6875, + "learning_rate": 9.934109242688712e-06, + "loss": 208860.85, + "step": 73280 + }, + { + "epoch": 0.14805043693968495, + "grad_norm": 146252.046875, + "learning_rate": 9.934052748143403e-06, + "loss": 142814.9, + "step": 73290 + }, + { + "epoch": 0.14807063757236877, + "grad_norm": 490941.09375, + "learning_rate": 9.93399622955012e-06, + "loss": 112550.3625, + "step": 73300 + }, + { + "epoch": 0.1480908382050526, + "grad_norm": 215531.578125, + "learning_rate": 9.933939686909137e-06, + "loss": 152872.65, + "step": 73310 + }, + { + "epoch": 0.1481110388377364, + "grad_norm": 2060299.125, + "learning_rate": 9.933883120220731e-06, + "loss": 244800.225, + "step": 73320 + }, + { + "epoch": 0.1481312394704202, + "grad_norm": 89874.1484375, + "learning_rate": 9.933826529485178e-06, + "loss": 145331.9125, + "step": 73330 + }, + { + "epoch": 0.14815144010310402, + "grad_norm": 6084.28173828125, + "learning_rate": 9.933769914702751e-06, + "loss": 94134.4312, + "step": 73340 + }, + { + "epoch": 0.14817164073578784, + "grad_norm": 164398.640625, + "learning_rate": 9.933713275873728e-06, + "loss": 176909.05, + "step": 73350 + }, + { + "epoch": 0.14819184136847166, + "grad_norm": 486049.59375, + "learning_rate": 9.933656612998387e-06, + "loss": 99083.5437, + "step": 73360 + }, + { + "epoch": 0.14821204200115548, + "grad_norm": 151783.09375, + "learning_rate": 9.933599926077e-06, + "loss": 52204.0938, + "step": 73370 + }, + { + "epoch": 0.1482322426338393, + "grad_norm": 21651.283203125, + "learning_rate": 9.933543215109846e-06, + "loss": 157991.6375, + "step": 73380 + }, + { + "epoch": 0.14825244326652312, + "grad_norm": 262239.8125, + "learning_rate": 9.933486480097201e-06, + "loss": 113967.575, + "step": 73390 + }, + { + "epoch": 0.1482726438992069, + "grad_norm": 1440.56103515625, + "learning_rate": 9.93342972103934e-06, + "loss": 70246.5312, + "step": 73400 + }, + { + "epoch": 0.14829284453189073, + "grad_norm": 131303.28125, + "learning_rate": 9.933372937936542e-06, + "loss": 222107.25, + "step": 73410 + }, + { + "epoch": 0.14831304516457455, + "grad_norm": 3373151.5, + "learning_rate": 9.933316130789084e-06, + "loss": 130212.475, + "step": 73420 + }, + { + "epoch": 0.14833324579725837, + "grad_norm": 176411.03125, + "learning_rate": 9.93325929959724e-06, + "loss": 151850.6875, + "step": 73430 + }, + { + "epoch": 0.1483534464299422, + "grad_norm": 19668.64453125, + "learning_rate": 9.933202444361288e-06, + "loss": 70599.7937, + "step": 73440 + }, + { + "epoch": 0.148373647062626, + "grad_norm": 1013615.875, + "learning_rate": 9.933145565081506e-06, + "loss": 124944.525, + "step": 73450 + }, + { + "epoch": 0.1483938476953098, + "grad_norm": 687325.9375, + "learning_rate": 9.933088661758172e-06, + "loss": 124422.4375, + "step": 73460 + }, + { + "epoch": 0.14841404832799363, + "grad_norm": 588001.8125, + "learning_rate": 9.933031734391561e-06, + "loss": 87395.65, + "step": 73470 + }, + { + "epoch": 0.14843424896067745, + "grad_norm": 141526.296875, + "learning_rate": 9.93297478298195e-06, + "loss": 63621.6937, + "step": 73480 + }, + { + "epoch": 0.14845444959336127, + "grad_norm": 111867.7421875, + "learning_rate": 9.93291780752962e-06, + "loss": 26880.4813, + "step": 73490 + }, + { + "epoch": 0.14847465022604509, + "grad_norm": 172619.234375, + "learning_rate": 9.932860808034847e-06, + "loss": 161066.575, + "step": 73500 + }, + { + "epoch": 0.1484948508587289, + "grad_norm": 314864.1875, + "learning_rate": 9.93280378449791e-06, + "loss": 154890.4375, + "step": 73510 + }, + { + "epoch": 0.1485150514914127, + "grad_norm": 216581.296875, + "learning_rate": 9.932746736919084e-06, + "loss": 162893.6375, + "step": 73520 + }, + { + "epoch": 0.14853525212409652, + "grad_norm": 264834.375, + "learning_rate": 9.93268966529865e-06, + "loss": 56996.05, + "step": 73530 + }, + { + "epoch": 0.14855545275678034, + "grad_norm": 7801.56201171875, + "learning_rate": 9.932632569636882e-06, + "loss": 79271.4438, + "step": 73540 + }, + { + "epoch": 0.14857565338946416, + "grad_norm": 202933.171875, + "learning_rate": 9.932575449934063e-06, + "loss": 29349.5719, + "step": 73550 + }, + { + "epoch": 0.14859585402214798, + "grad_norm": 0.0, + "learning_rate": 9.93251830619047e-06, + "loss": 148187.875, + "step": 73560 + }, + { + "epoch": 0.1486160546548318, + "grad_norm": 180254.671875, + "learning_rate": 9.93246113840638e-06, + "loss": 104707.825, + "step": 73570 + }, + { + "epoch": 0.14863625528751562, + "grad_norm": 346513.71875, + "learning_rate": 9.932403946582071e-06, + "loss": 155748.6375, + "step": 73580 + }, + { + "epoch": 0.1486564559201994, + "grad_norm": 496060.15625, + "learning_rate": 9.932346730717828e-06, + "loss": 74639.5938, + "step": 73590 + }, + { + "epoch": 0.14867665655288323, + "grad_norm": 73615.4140625, + "learning_rate": 9.932289490813922e-06, + "loss": 113061.3125, + "step": 73600 + }, + { + "epoch": 0.14869685718556705, + "grad_norm": 390302.0625, + "learning_rate": 9.932232226870635e-06, + "loss": 59870.0625, + "step": 73610 + }, + { + "epoch": 0.14871705781825087, + "grad_norm": 62445.453125, + "learning_rate": 9.932174938888248e-06, + "loss": 109132.125, + "step": 73620 + }, + { + "epoch": 0.1487372584509347, + "grad_norm": 372841.0625, + "learning_rate": 9.932117626867037e-06, + "loss": 59819.1562, + "step": 73630 + }, + { + "epoch": 0.1487574590836185, + "grad_norm": 1926705.75, + "learning_rate": 9.932060290807283e-06, + "loss": 171254.625, + "step": 73640 + }, + { + "epoch": 0.1487776597163023, + "grad_norm": 1351430.75, + "learning_rate": 9.932002930709268e-06, + "loss": 98551.375, + "step": 73650 + }, + { + "epoch": 0.14879786034898612, + "grad_norm": 1616787.75, + "learning_rate": 9.931945546573266e-06, + "loss": 119523.175, + "step": 73660 + }, + { + "epoch": 0.14881806098166994, + "grad_norm": 587397.125, + "learning_rate": 9.931888138399562e-06, + "loss": 120104.8375, + "step": 73670 + }, + { + "epoch": 0.14883826161435376, + "grad_norm": 278011.125, + "learning_rate": 9.93183070618843e-06, + "loss": 125084.65, + "step": 73680 + }, + { + "epoch": 0.14885846224703758, + "grad_norm": 672854.9375, + "learning_rate": 9.931773249940156e-06, + "loss": 39849.7063, + "step": 73690 + }, + { + "epoch": 0.1488786628797214, + "grad_norm": 185038.734375, + "learning_rate": 9.931715769655017e-06, + "loss": 158611.1875, + "step": 73700 + }, + { + "epoch": 0.14889886351240522, + "grad_norm": 612694.25, + "learning_rate": 9.931658265333293e-06, + "loss": 74619.1, + "step": 73710 + }, + { + "epoch": 0.14891906414508901, + "grad_norm": 9671.087890625, + "learning_rate": 9.931600736975264e-06, + "loss": 143536.7625, + "step": 73720 + }, + { + "epoch": 0.14893926477777283, + "grad_norm": 2254856.25, + "learning_rate": 9.93154318458121e-06, + "loss": 140425.1375, + "step": 73730 + }, + { + "epoch": 0.14895946541045665, + "grad_norm": 39393.01953125, + "learning_rate": 9.931485608151416e-06, + "loss": 134431.675, + "step": 73740 + }, + { + "epoch": 0.14897966604314047, + "grad_norm": 40034.9296875, + "learning_rate": 9.931428007686158e-06, + "loss": 128398.8625, + "step": 73750 + }, + { + "epoch": 0.1489998666758243, + "grad_norm": 158029.3125, + "learning_rate": 9.931370383185717e-06, + "loss": 83613.2, + "step": 73760 + }, + { + "epoch": 0.14902006730850811, + "grad_norm": 6902867.0, + "learning_rate": 9.931312734650376e-06, + "loss": 237773.875, + "step": 73770 + }, + { + "epoch": 0.1490402679411919, + "grad_norm": 71400.6953125, + "learning_rate": 9.931255062080415e-06, + "loss": 132556.575, + "step": 73780 + }, + { + "epoch": 0.14906046857387573, + "grad_norm": 21281.650390625, + "learning_rate": 9.931197365476113e-06, + "loss": 101277.4563, + "step": 73790 + }, + { + "epoch": 0.14908066920655955, + "grad_norm": 92356.390625, + "learning_rate": 9.931139644837755e-06, + "loss": 56379.85, + "step": 73800 + }, + { + "epoch": 0.14910086983924337, + "grad_norm": 2240867.25, + "learning_rate": 9.93108190016562e-06, + "loss": 155662.3, + "step": 73810 + }, + { + "epoch": 0.1491210704719272, + "grad_norm": 341513.9375, + "learning_rate": 9.93102413145999e-06, + "loss": 63734.6562, + "step": 73820 + }, + { + "epoch": 0.149141271104611, + "grad_norm": 27638.712890625, + "learning_rate": 9.930966338721146e-06, + "loss": 27824.5719, + "step": 73830 + }, + { + "epoch": 0.1491614717372948, + "grad_norm": 60889.671875, + "learning_rate": 9.930908521949371e-06, + "loss": 109940.6125, + "step": 73840 + }, + { + "epoch": 0.14918167236997862, + "grad_norm": 260753.03125, + "learning_rate": 9.930850681144946e-06, + "loss": 135582.8, + "step": 73850 + }, + { + "epoch": 0.14920187300266244, + "grad_norm": 52926.30078125, + "learning_rate": 9.930792816308151e-06, + "loss": 273761.525, + "step": 73860 + }, + { + "epoch": 0.14922207363534626, + "grad_norm": 318761.78125, + "learning_rate": 9.930734927439272e-06, + "loss": 103335.5813, + "step": 73870 + }, + { + "epoch": 0.14924227426803008, + "grad_norm": 601615.875, + "learning_rate": 9.930677014538587e-06, + "loss": 198839.1625, + "step": 73880 + }, + { + "epoch": 0.1492624749007139, + "grad_norm": 712975.125, + "learning_rate": 9.93061907760638e-06, + "loss": 126285.425, + "step": 73890 + }, + { + "epoch": 0.14928267553339772, + "grad_norm": 256812.734375, + "learning_rate": 9.930561116642936e-06, + "loss": 144816.4375, + "step": 73900 + }, + { + "epoch": 0.1493028761660815, + "grad_norm": 1048016.0, + "learning_rate": 9.930503131648535e-06, + "loss": 278659.675, + "step": 73910 + }, + { + "epoch": 0.14932307679876533, + "grad_norm": 318752.75, + "learning_rate": 9.930445122623458e-06, + "loss": 150451.85, + "step": 73920 + }, + { + "epoch": 0.14934327743144915, + "grad_norm": 143665.28125, + "learning_rate": 9.93038708956799e-06, + "loss": 113139.725, + "step": 73930 + }, + { + "epoch": 0.14936347806413297, + "grad_norm": 111512.78125, + "learning_rate": 9.930329032482412e-06, + "loss": 162840.175, + "step": 73940 + }, + { + "epoch": 0.1493836786968168, + "grad_norm": 1041160.6875, + "learning_rate": 9.930270951367012e-06, + "loss": 198977.15, + "step": 73950 + }, + { + "epoch": 0.1494038793295006, + "grad_norm": 23301.62890625, + "learning_rate": 9.930212846222065e-06, + "loss": 66323.7937, + "step": 73960 + }, + { + "epoch": 0.1494240799621844, + "grad_norm": 366832.6875, + "learning_rate": 9.930154717047862e-06, + "loss": 57321.65, + "step": 73970 + }, + { + "epoch": 0.14944428059486822, + "grad_norm": 693548.6875, + "learning_rate": 9.930096563844682e-06, + "loss": 152558.7125, + "step": 73980 + }, + { + "epoch": 0.14946448122755204, + "grad_norm": 1138209.625, + "learning_rate": 9.930038386612809e-06, + "loss": 133124.2875, + "step": 73990 + }, + { + "epoch": 0.14948468186023586, + "grad_norm": 34571.89453125, + "learning_rate": 9.929980185352525e-06, + "loss": 96272.6187, + "step": 74000 + }, + { + "epoch": 0.14950488249291968, + "grad_norm": 101243.234375, + "learning_rate": 9.929921960064117e-06, + "loss": 24362.4688, + "step": 74010 + }, + { + "epoch": 0.1495250831256035, + "grad_norm": 657751.0625, + "learning_rate": 9.929863710747869e-06, + "loss": 174213.65, + "step": 74020 + }, + { + "epoch": 0.14954528375828732, + "grad_norm": 159182.609375, + "learning_rate": 9.929805437404061e-06, + "loss": 126544.8, + "step": 74030 + }, + { + "epoch": 0.14956548439097112, + "grad_norm": 88173.2109375, + "learning_rate": 9.929747140032979e-06, + "loss": 112725.025, + "step": 74040 + }, + { + "epoch": 0.14958568502365494, + "grad_norm": 407872.1875, + "learning_rate": 9.929688818634909e-06, + "loss": 118210.1, + "step": 74050 + }, + { + "epoch": 0.14960588565633876, + "grad_norm": 221310.28125, + "learning_rate": 9.929630473210132e-06, + "loss": 95288.7375, + "step": 74060 + }, + { + "epoch": 0.14962608628902258, + "grad_norm": 1287163.25, + "learning_rate": 9.929572103758935e-06, + "loss": 94815.7375, + "step": 74070 + }, + { + "epoch": 0.1496462869217064, + "grad_norm": 163545.0, + "learning_rate": 9.929513710281602e-06, + "loss": 132261.3, + "step": 74080 + }, + { + "epoch": 0.14966648755439022, + "grad_norm": 69567.9296875, + "learning_rate": 9.929455292778416e-06, + "loss": 97945.1375, + "step": 74090 + }, + { + "epoch": 0.149686688187074, + "grad_norm": 307739.40625, + "learning_rate": 9.929396851249661e-06, + "loss": 130051.75, + "step": 74100 + }, + { + "epoch": 0.14970688881975783, + "grad_norm": 426152.125, + "learning_rate": 9.929338385695626e-06, + "loss": 148969.0625, + "step": 74110 + }, + { + "epoch": 0.14972708945244165, + "grad_norm": 95217.03125, + "learning_rate": 9.929279896116595e-06, + "loss": 73377.0312, + "step": 74120 + }, + { + "epoch": 0.14974729008512547, + "grad_norm": 712227.3125, + "learning_rate": 9.92922138251285e-06, + "loss": 128698.925, + "step": 74130 + }, + { + "epoch": 0.1497674907178093, + "grad_norm": 19650.3984375, + "learning_rate": 9.929162844884676e-06, + "loss": 74776.4125, + "step": 74140 + }, + { + "epoch": 0.1497876913504931, + "grad_norm": 207884.28125, + "learning_rate": 9.929104283232363e-06, + "loss": 115165.4875, + "step": 74150 + }, + { + "epoch": 0.1498078919831769, + "grad_norm": 60253.171875, + "learning_rate": 9.929045697556192e-06, + "loss": 36988.6188, + "step": 74160 + }, + { + "epoch": 0.14982809261586072, + "grad_norm": 34997.55859375, + "learning_rate": 9.92898708785645e-06, + "loss": 123211.2875, + "step": 74170 + }, + { + "epoch": 0.14984829324854454, + "grad_norm": 123241.34375, + "learning_rate": 9.928928454133424e-06, + "loss": 32266.2062, + "step": 74180 + }, + { + "epoch": 0.14986849388122836, + "grad_norm": 278926.09375, + "learning_rate": 9.928869796387396e-06, + "loss": 48488.9906, + "step": 74190 + }, + { + "epoch": 0.14988869451391218, + "grad_norm": 20673.228515625, + "learning_rate": 9.928811114618658e-06, + "loss": 117806.4375, + "step": 74200 + }, + { + "epoch": 0.149908895146596, + "grad_norm": 897898.8125, + "learning_rate": 9.92875240882749e-06, + "loss": 60918.8812, + "step": 74210 + }, + { + "epoch": 0.14992909577927982, + "grad_norm": 1280051.625, + "learning_rate": 9.92869367901418e-06, + "loss": 98681.775, + "step": 74220 + }, + { + "epoch": 0.1499492964119636, + "grad_norm": 54947.91015625, + "learning_rate": 9.928634925179018e-06, + "loss": 182394.875, + "step": 74230 + }, + { + "epoch": 0.14996949704464743, + "grad_norm": 1079128.625, + "learning_rate": 9.928576147322283e-06, + "loss": 115012.1125, + "step": 74240 + }, + { + "epoch": 0.14998969767733125, + "grad_norm": 439863.125, + "learning_rate": 9.92851734544427e-06, + "loss": 93947.85, + "step": 74250 + }, + { + "epoch": 0.15000989831001507, + "grad_norm": 359976.9375, + "learning_rate": 9.928458519545258e-06, + "loss": 160820.4625, + "step": 74260 + }, + { + "epoch": 0.1500300989426989, + "grad_norm": 102867.7421875, + "learning_rate": 9.928399669625537e-06, + "loss": 79749.475, + "step": 74270 + }, + { + "epoch": 0.1500502995753827, + "grad_norm": 2009043.0, + "learning_rate": 9.928340795685396e-06, + "loss": 178921.55, + "step": 74280 + }, + { + "epoch": 0.1500705002080665, + "grad_norm": 52912.21875, + "learning_rate": 9.928281897725117e-06, + "loss": 138362.75, + "step": 74290 + }, + { + "epoch": 0.15009070084075032, + "grad_norm": 10412.0419921875, + "learning_rate": 9.928222975744992e-06, + "loss": 94129.8, + "step": 74300 + }, + { + "epoch": 0.15011090147343414, + "grad_norm": 60256.24609375, + "learning_rate": 9.928164029745304e-06, + "loss": 106529.1, + "step": 74310 + }, + { + "epoch": 0.15013110210611796, + "grad_norm": 1043776.875, + "learning_rate": 9.928105059726342e-06, + "loss": 96091.0875, + "step": 74320 + }, + { + "epoch": 0.15015130273880178, + "grad_norm": 339154.8125, + "learning_rate": 9.928046065688396e-06, + "loss": 172381.2875, + "step": 74330 + }, + { + "epoch": 0.1501715033714856, + "grad_norm": 280242.78125, + "learning_rate": 9.927987047631749e-06, + "loss": 123850.125, + "step": 74340 + }, + { + "epoch": 0.15019170400416942, + "grad_norm": 1606414.0, + "learning_rate": 9.927928005556692e-06, + "loss": 150482.7625, + "step": 74350 + }, + { + "epoch": 0.15021190463685322, + "grad_norm": 1563423.0, + "learning_rate": 9.927868939463511e-06, + "loss": 117642.9, + "step": 74360 + }, + { + "epoch": 0.15023210526953704, + "grad_norm": 30381.802734375, + "learning_rate": 9.927809849352496e-06, + "loss": 102902.0312, + "step": 74370 + }, + { + "epoch": 0.15025230590222086, + "grad_norm": 902447.4375, + "learning_rate": 9.927750735223932e-06, + "loss": 133994.5625, + "step": 74380 + }, + { + "epoch": 0.15027250653490468, + "grad_norm": 116156.25, + "learning_rate": 9.927691597078109e-06, + "loss": 182663.4, + "step": 74390 + }, + { + "epoch": 0.1502927071675885, + "grad_norm": 353664.21875, + "learning_rate": 9.927632434915315e-06, + "loss": 150047.8375, + "step": 74400 + }, + { + "epoch": 0.15031290780027232, + "grad_norm": 1817601.0, + "learning_rate": 9.927573248735839e-06, + "loss": 136348.625, + "step": 74410 + }, + { + "epoch": 0.1503331084329561, + "grad_norm": 82412.09375, + "learning_rate": 9.927514038539966e-06, + "loss": 149739.0125, + "step": 74420 + }, + { + "epoch": 0.15035330906563993, + "grad_norm": 81053.3671875, + "learning_rate": 9.927454804327989e-06, + "loss": 150020.6875, + "step": 74430 + }, + { + "epoch": 0.15037350969832375, + "grad_norm": 402836.0625, + "learning_rate": 9.927395546100195e-06, + "loss": 69844.275, + "step": 74440 + }, + { + "epoch": 0.15039371033100757, + "grad_norm": 62500.3828125, + "learning_rate": 9.927336263856873e-06, + "loss": 50593.675, + "step": 74450 + }, + { + "epoch": 0.1504139109636914, + "grad_norm": 384060.59375, + "learning_rate": 9.92727695759831e-06, + "loss": 43233.725, + "step": 74460 + }, + { + "epoch": 0.1504341115963752, + "grad_norm": 54802.828125, + "learning_rate": 9.927217627324798e-06, + "loss": 125667.8875, + "step": 74470 + }, + { + "epoch": 0.150454312229059, + "grad_norm": 343447.0, + "learning_rate": 9.927158273036624e-06, + "loss": 51950.0687, + "step": 74480 + }, + { + "epoch": 0.15047451286174282, + "grad_norm": 879098.25, + "learning_rate": 9.92709889473408e-06, + "loss": 143523.075, + "step": 74490 + }, + { + "epoch": 0.15049471349442664, + "grad_norm": 1016631.625, + "learning_rate": 9.927039492417452e-06, + "loss": 159982.1125, + "step": 74500 + }, + { + "epoch": 0.15051491412711046, + "grad_norm": 74317.953125, + "learning_rate": 9.92698006608703e-06, + "loss": 55394.8688, + "step": 74510 + }, + { + "epoch": 0.15053511475979428, + "grad_norm": 147310.65625, + "learning_rate": 9.926920615743108e-06, + "loss": 107089.55, + "step": 74520 + }, + { + "epoch": 0.1505553153924781, + "grad_norm": 1579159.25, + "learning_rate": 9.92686114138597e-06, + "loss": 177449.375, + "step": 74530 + }, + { + "epoch": 0.15057551602516192, + "grad_norm": 35613.02734375, + "learning_rate": 9.926801643015908e-06, + "loss": 88202.6938, + "step": 74540 + }, + { + "epoch": 0.1505957166578457, + "grad_norm": 8074.17578125, + "learning_rate": 9.926742120633215e-06, + "loss": 81105.8062, + "step": 74550 + }, + { + "epoch": 0.15061591729052953, + "grad_norm": 19521.935546875, + "learning_rate": 9.926682574238175e-06, + "loss": 145012.1125, + "step": 74560 + }, + { + "epoch": 0.15063611792321335, + "grad_norm": 36077.421875, + "learning_rate": 9.926623003831085e-06, + "loss": 25255.0687, + "step": 74570 + }, + { + "epoch": 0.15065631855589717, + "grad_norm": 581795.125, + "learning_rate": 9.92656340941223e-06, + "loss": 143961.5, + "step": 74580 + }, + { + "epoch": 0.150676519188581, + "grad_norm": 1837607.625, + "learning_rate": 9.926503790981903e-06, + "loss": 56877.8313, + "step": 74590 + }, + { + "epoch": 0.1506967198212648, + "grad_norm": 2073301.5, + "learning_rate": 9.926444148540394e-06, + "loss": 101074.35, + "step": 74600 + }, + { + "epoch": 0.1507169204539486, + "grad_norm": 886437.75, + "learning_rate": 9.926384482087994e-06, + "loss": 49565.425, + "step": 74610 + }, + { + "epoch": 0.15073712108663243, + "grad_norm": 76659.421875, + "learning_rate": 9.926324791624993e-06, + "loss": 74338.4563, + "step": 74620 + }, + { + "epoch": 0.15075732171931625, + "grad_norm": 44732.58984375, + "learning_rate": 9.926265077151682e-06, + "loss": 57219.6188, + "step": 74630 + }, + { + "epoch": 0.15077752235200007, + "grad_norm": 43107.88671875, + "learning_rate": 9.926205338668353e-06, + "loss": 38724.1125, + "step": 74640 + }, + { + "epoch": 0.15079772298468389, + "grad_norm": 81383.484375, + "learning_rate": 9.926145576175297e-06, + "loss": 132896.05, + "step": 74650 + }, + { + "epoch": 0.1508179236173677, + "grad_norm": 62001.86328125, + "learning_rate": 9.926085789672806e-06, + "loss": 135555.325, + "step": 74660 + }, + { + "epoch": 0.15083812425005153, + "grad_norm": 28989.6796875, + "learning_rate": 9.926025979161169e-06, + "loss": 50966.475, + "step": 74670 + }, + { + "epoch": 0.15085832488273532, + "grad_norm": 563229.3125, + "learning_rate": 9.925966144640677e-06, + "loss": 48518.8844, + "step": 74680 + }, + { + "epoch": 0.15087852551541914, + "grad_norm": 146616.6875, + "learning_rate": 9.925906286111627e-06, + "loss": 43543.0813, + "step": 74690 + }, + { + "epoch": 0.15089872614810296, + "grad_norm": 386072.5625, + "learning_rate": 9.925846403574306e-06, + "loss": 53834.7125, + "step": 74700 + }, + { + "epoch": 0.15091892678078678, + "grad_norm": 79760.2265625, + "learning_rate": 9.925786497029007e-06, + "loss": 56922.3875, + "step": 74710 + }, + { + "epoch": 0.1509391274134706, + "grad_norm": 292453.875, + "learning_rate": 9.925726566476021e-06, + "loss": 187647.6125, + "step": 74720 + }, + { + "epoch": 0.15095932804615442, + "grad_norm": 187170.96875, + "learning_rate": 9.925666611915642e-06, + "loss": 143365.475, + "step": 74730 + }, + { + "epoch": 0.1509795286788382, + "grad_norm": 49383.453125, + "learning_rate": 9.925606633348161e-06, + "loss": 48242.475, + "step": 74740 + }, + { + "epoch": 0.15099972931152203, + "grad_norm": 80738.203125, + "learning_rate": 9.92554663077387e-06, + "loss": 58599.3562, + "step": 74750 + }, + { + "epoch": 0.15101992994420585, + "grad_norm": 2336117.5, + "learning_rate": 9.925486604193064e-06, + "loss": 188633.9, + "step": 74760 + }, + { + "epoch": 0.15104013057688967, + "grad_norm": 202696.25, + "learning_rate": 9.925426553606033e-06, + "loss": 69280.8188, + "step": 74770 + }, + { + "epoch": 0.1510603312095735, + "grad_norm": 239602.296875, + "learning_rate": 9.92536647901307e-06, + "loss": 143412.9625, + "step": 74780 + }, + { + "epoch": 0.1510805318422573, + "grad_norm": 106833.171875, + "learning_rate": 9.925306380414468e-06, + "loss": 79966.6187, + "step": 74790 + }, + { + "epoch": 0.1511007324749411, + "grad_norm": 596992.0625, + "learning_rate": 9.925246257810519e-06, + "loss": 93480.0625, + "step": 74800 + }, + { + "epoch": 0.15112093310762492, + "grad_norm": 153223.90625, + "learning_rate": 9.925186111201519e-06, + "loss": 221224.35, + "step": 74810 + }, + { + "epoch": 0.15114113374030874, + "grad_norm": 331260.875, + "learning_rate": 9.92512594058776e-06, + "loss": 92479.6438, + "step": 74820 + }, + { + "epoch": 0.15116133437299256, + "grad_norm": 91458.734375, + "learning_rate": 9.925065745969531e-06, + "loss": 221947.4, + "step": 74830 + }, + { + "epoch": 0.15118153500567638, + "grad_norm": 1697780.875, + "learning_rate": 9.925005527347132e-06, + "loss": 127544.3, + "step": 74840 + }, + { + "epoch": 0.1512017356383602, + "grad_norm": 71837.8203125, + "learning_rate": 9.924945284720852e-06, + "loss": 64151.9062, + "step": 74850 + }, + { + "epoch": 0.15122193627104402, + "grad_norm": 417433.03125, + "learning_rate": 9.924885018090987e-06, + "loss": 226157.825, + "step": 74860 + }, + { + "epoch": 0.15124213690372781, + "grad_norm": 209533.734375, + "learning_rate": 9.924824727457829e-06, + "loss": 43321.1062, + "step": 74870 + }, + { + "epoch": 0.15126233753641163, + "grad_norm": 193447.1875, + "learning_rate": 9.924764412821673e-06, + "loss": 88741.4875, + "step": 74880 + }, + { + "epoch": 0.15128253816909545, + "grad_norm": 786613.5, + "learning_rate": 9.924704074182811e-06, + "loss": 151848.975, + "step": 74890 + }, + { + "epoch": 0.15130273880177927, + "grad_norm": 352860.1875, + "learning_rate": 9.92464371154154e-06, + "loss": 42285.175, + "step": 74900 + }, + { + "epoch": 0.1513229394344631, + "grad_norm": 360970.40625, + "learning_rate": 9.924583324898152e-06, + "loss": 143786.3375, + "step": 74910 + }, + { + "epoch": 0.15134314006714691, + "grad_norm": 1319207.125, + "learning_rate": 9.924522914252943e-06, + "loss": 212663.1, + "step": 74920 + }, + { + "epoch": 0.1513633406998307, + "grad_norm": 283654.71875, + "learning_rate": 9.924462479606207e-06, + "loss": 117924.85, + "step": 74930 + }, + { + "epoch": 0.15138354133251453, + "grad_norm": 138132.859375, + "learning_rate": 9.924402020958238e-06, + "loss": 88336.625, + "step": 74940 + }, + { + "epoch": 0.15140374196519835, + "grad_norm": 123579.0234375, + "learning_rate": 9.92434153830933e-06, + "loss": 56021.4875, + "step": 74950 + }, + { + "epoch": 0.15142394259788217, + "grad_norm": 76196.1015625, + "learning_rate": 9.92428103165978e-06, + "loss": 119769.25, + "step": 74960 + }, + { + "epoch": 0.151444143230566, + "grad_norm": 10306.5947265625, + "learning_rate": 9.92422050100988e-06, + "loss": 102036.8813, + "step": 74970 + }, + { + "epoch": 0.1514643438632498, + "grad_norm": 38264.40625, + "learning_rate": 9.924159946359927e-06, + "loss": 88953.9312, + "step": 74980 + }, + { + "epoch": 0.15148454449593363, + "grad_norm": 25597.177734375, + "learning_rate": 9.924099367710215e-06, + "loss": 113607.725, + "step": 74990 + }, + { + "epoch": 0.15150474512861742, + "grad_norm": 352029.40625, + "learning_rate": 9.924038765061042e-06, + "loss": 150546.3, + "step": 75000 + }, + { + "epoch": 0.15152494576130124, + "grad_norm": 807081.75, + "learning_rate": 9.923978138412698e-06, + "loss": 121259.275, + "step": 75010 + }, + { + "epoch": 0.15154514639398506, + "grad_norm": 251592.96875, + "learning_rate": 9.923917487765484e-06, + "loss": 133544.45, + "step": 75020 + }, + { + "epoch": 0.15156534702666888, + "grad_norm": 161267.125, + "learning_rate": 9.923856813119694e-06, + "loss": 58803.6687, + "step": 75030 + }, + { + "epoch": 0.1515855476593527, + "grad_norm": 229765.875, + "learning_rate": 9.92379611447562e-06, + "loss": 62651.2937, + "step": 75040 + }, + { + "epoch": 0.15160574829203652, + "grad_norm": 998161.875, + "learning_rate": 9.923735391833564e-06, + "loss": 209028.2875, + "step": 75050 + }, + { + "epoch": 0.1516259489247203, + "grad_norm": 235788.390625, + "learning_rate": 9.923674645193819e-06, + "loss": 50624.7125, + "step": 75060 + }, + { + "epoch": 0.15164614955740413, + "grad_norm": 198747.46875, + "learning_rate": 9.92361387455668e-06, + "loss": 102894.0625, + "step": 75070 + }, + { + "epoch": 0.15166635019008795, + "grad_norm": 110325.4296875, + "learning_rate": 9.923553079922443e-06, + "loss": 106651.975, + "step": 75080 + }, + { + "epoch": 0.15168655082277177, + "grad_norm": 342402.1875, + "learning_rate": 9.923492261291406e-06, + "loss": 82370.4, + "step": 75090 + }, + { + "epoch": 0.1517067514554556, + "grad_norm": 1745549.875, + "learning_rate": 9.923431418663866e-06, + "loss": 196564.4875, + "step": 75100 + }, + { + "epoch": 0.1517269520881394, + "grad_norm": 54156.7109375, + "learning_rate": 9.923370552040117e-06, + "loss": 73089.9563, + "step": 75110 + }, + { + "epoch": 0.1517471527208232, + "grad_norm": 2020124.25, + "learning_rate": 9.923309661420458e-06, + "loss": 153567.95, + "step": 75120 + }, + { + "epoch": 0.15176735335350702, + "grad_norm": 2026408.625, + "learning_rate": 9.923248746805185e-06, + "loss": 71291.7375, + "step": 75130 + }, + { + "epoch": 0.15178755398619084, + "grad_norm": 22890.458984375, + "learning_rate": 9.923187808194594e-06, + "loss": 109790.85, + "step": 75140 + }, + { + "epoch": 0.15180775461887466, + "grad_norm": 26492.533203125, + "learning_rate": 9.923126845588982e-06, + "loss": 92389.1187, + "step": 75150 + }, + { + "epoch": 0.15182795525155848, + "grad_norm": 15883.1728515625, + "learning_rate": 9.92306585898865e-06, + "loss": 58577.1438, + "step": 75160 + }, + { + "epoch": 0.1518481558842423, + "grad_norm": 6780.25732421875, + "learning_rate": 9.92300484839389e-06, + "loss": 53075.325, + "step": 75170 + }, + { + "epoch": 0.15186835651692612, + "grad_norm": 853228.3125, + "learning_rate": 9.922943813805e-06, + "loss": 64093.2063, + "step": 75180 + }, + { + "epoch": 0.15188855714960992, + "grad_norm": 936318.125, + "learning_rate": 9.92288275522228e-06, + "loss": 131386.8, + "step": 75190 + }, + { + "epoch": 0.15190875778229374, + "grad_norm": 0.0, + "learning_rate": 9.922821672646028e-06, + "loss": 100958.7375, + "step": 75200 + }, + { + "epoch": 0.15192895841497756, + "grad_norm": 1599859.375, + "learning_rate": 9.922760566076538e-06, + "loss": 94671.9625, + "step": 75210 + }, + { + "epoch": 0.15194915904766138, + "grad_norm": 873282.375, + "learning_rate": 9.922699435514112e-06, + "loss": 193609.4, + "step": 75220 + }, + { + "epoch": 0.1519693596803452, + "grad_norm": 2254415.25, + "learning_rate": 9.922638280959044e-06, + "loss": 257169.1, + "step": 75230 + }, + { + "epoch": 0.15198956031302902, + "grad_norm": 368729.28125, + "learning_rate": 9.922577102411638e-06, + "loss": 90581.2375, + "step": 75240 + }, + { + "epoch": 0.1520097609457128, + "grad_norm": 842464.875, + "learning_rate": 9.922515899872184e-06, + "loss": 136421.475, + "step": 75250 + }, + { + "epoch": 0.15202996157839663, + "grad_norm": 497553.5625, + "learning_rate": 9.922454673340987e-06, + "loss": 196127.7875, + "step": 75260 + }, + { + "epoch": 0.15205016221108045, + "grad_norm": 432961.78125, + "learning_rate": 9.922393422818342e-06, + "loss": 110453.7375, + "step": 75270 + }, + { + "epoch": 0.15207036284376427, + "grad_norm": 2406899.75, + "learning_rate": 9.922332148304548e-06, + "loss": 99452.275, + "step": 75280 + }, + { + "epoch": 0.1520905634764481, + "grad_norm": 395568.6875, + "learning_rate": 9.922270849799903e-06, + "loss": 141090.2125, + "step": 75290 + }, + { + "epoch": 0.1521107641091319, + "grad_norm": 124376.5859375, + "learning_rate": 9.922209527304709e-06, + "loss": 149896.25, + "step": 75300 + }, + { + "epoch": 0.15213096474181573, + "grad_norm": 125749.828125, + "learning_rate": 9.922148180819261e-06, + "loss": 69360.7937, + "step": 75310 + }, + { + "epoch": 0.15215116537449952, + "grad_norm": 325048.15625, + "learning_rate": 9.922086810343862e-06, + "loss": 62179.3125, + "step": 75320 + }, + { + "epoch": 0.15217136600718334, + "grad_norm": 455073.6875, + "learning_rate": 9.922025415878809e-06, + "loss": 45754.9938, + "step": 75330 + }, + { + "epoch": 0.15219156663986716, + "grad_norm": 205048.953125, + "learning_rate": 9.9219639974244e-06, + "loss": 173292.0, + "step": 75340 + }, + { + "epoch": 0.15221176727255098, + "grad_norm": 559638.125, + "learning_rate": 9.921902554980935e-06, + "loss": 169049.625, + "step": 75350 + }, + { + "epoch": 0.1522319679052348, + "grad_norm": 296884.5, + "learning_rate": 9.921841088548713e-06, + "loss": 150430.3375, + "step": 75360 + }, + { + "epoch": 0.15225216853791862, + "grad_norm": 2231917.25, + "learning_rate": 9.921779598128036e-06, + "loss": 92926.1375, + "step": 75370 + }, + { + "epoch": 0.1522723691706024, + "grad_norm": 269309.25, + "learning_rate": 9.921718083719203e-06, + "loss": 70876.775, + "step": 75380 + }, + { + "epoch": 0.15229256980328623, + "grad_norm": 763945.875, + "learning_rate": 9.921656545322512e-06, + "loss": 176934.7375, + "step": 75390 + }, + { + "epoch": 0.15231277043597005, + "grad_norm": 454170.90625, + "learning_rate": 9.921594982938262e-06, + "loss": 80038.025, + "step": 75400 + }, + { + "epoch": 0.15233297106865387, + "grad_norm": 915518.5, + "learning_rate": 9.921533396566758e-06, + "loss": 124992.1, + "step": 75410 + }, + { + "epoch": 0.1523531717013377, + "grad_norm": 194566.3125, + "learning_rate": 9.921471786208296e-06, + "loss": 82004.8188, + "step": 75420 + }, + { + "epoch": 0.1523733723340215, + "grad_norm": 1260443.125, + "learning_rate": 9.921410151863177e-06, + "loss": 160225.3625, + "step": 75430 + }, + { + "epoch": 0.1523935729667053, + "grad_norm": 80427.078125, + "learning_rate": 9.921348493531701e-06, + "loss": 141750.5125, + "step": 75440 + }, + { + "epoch": 0.15241377359938912, + "grad_norm": 104178.65625, + "learning_rate": 9.921286811214173e-06, + "loss": 17265.5687, + "step": 75450 + }, + { + "epoch": 0.15243397423207294, + "grad_norm": 209732.671875, + "learning_rate": 9.921225104910886e-06, + "loss": 25459.0141, + "step": 75460 + }, + { + "epoch": 0.15245417486475676, + "grad_norm": 514938.78125, + "learning_rate": 9.921163374622147e-06, + "loss": 91409.7312, + "step": 75470 + }, + { + "epoch": 0.15247437549744058, + "grad_norm": 3233021.75, + "learning_rate": 9.921101620348252e-06, + "loss": 196855.5375, + "step": 75480 + }, + { + "epoch": 0.1524945761301244, + "grad_norm": 501331.25, + "learning_rate": 9.921039842089508e-06, + "loss": 115752.2875, + "step": 75490 + }, + { + "epoch": 0.15251477676280822, + "grad_norm": 316025.25, + "learning_rate": 9.92097803984621e-06, + "loss": 36749.1406, + "step": 75500 + }, + { + "epoch": 0.15253497739549202, + "grad_norm": 118678.734375, + "learning_rate": 9.920916213618664e-06, + "loss": 81285.9125, + "step": 75510 + }, + { + "epoch": 0.15255517802817584, + "grad_norm": 483935.40625, + "learning_rate": 9.920854363407168e-06, + "loss": 256884.35, + "step": 75520 + }, + { + "epoch": 0.15257537866085966, + "grad_norm": 740572.5, + "learning_rate": 9.920792489212023e-06, + "loss": 174157.3, + "step": 75530 + }, + { + "epoch": 0.15259557929354348, + "grad_norm": 54057.51171875, + "learning_rate": 9.920730591033534e-06, + "loss": 200410.75, + "step": 75540 + }, + { + "epoch": 0.1526157799262273, + "grad_norm": 68993.8671875, + "learning_rate": 9.920668668872002e-06, + "loss": 72022.4563, + "step": 75550 + }, + { + "epoch": 0.15263598055891112, + "grad_norm": 181320.125, + "learning_rate": 9.920606722727726e-06, + "loss": 104102.6812, + "step": 75560 + }, + { + "epoch": 0.1526561811915949, + "grad_norm": 238424.296875, + "learning_rate": 9.920544752601011e-06, + "loss": 168293.5625, + "step": 75570 + }, + { + "epoch": 0.15267638182427873, + "grad_norm": 547338.125, + "learning_rate": 9.920482758492156e-06, + "loss": 75981.2625, + "step": 75580 + }, + { + "epoch": 0.15269658245696255, + "grad_norm": 40408.94921875, + "learning_rate": 9.920420740401466e-06, + "loss": 107955.375, + "step": 75590 + }, + { + "epoch": 0.15271678308964637, + "grad_norm": 76948.2578125, + "learning_rate": 9.920358698329242e-06, + "loss": 42288.6906, + "step": 75600 + }, + { + "epoch": 0.1527369837223302, + "grad_norm": 35424.78125, + "learning_rate": 9.920296632275785e-06, + "loss": 87621.3625, + "step": 75610 + }, + { + "epoch": 0.152757184355014, + "grad_norm": 409145.1875, + "learning_rate": 9.9202345422414e-06, + "loss": 153932.525, + "step": 75620 + }, + { + "epoch": 0.15277738498769783, + "grad_norm": 106076.5234375, + "learning_rate": 9.92017242822639e-06, + "loss": 77861.35, + "step": 75630 + }, + { + "epoch": 0.15279758562038162, + "grad_norm": 659653.625, + "learning_rate": 9.920110290231056e-06, + "loss": 243861.9, + "step": 75640 + }, + { + "epoch": 0.15281778625306544, + "grad_norm": 240537.734375, + "learning_rate": 9.920048128255699e-06, + "loss": 244372.025, + "step": 75650 + }, + { + "epoch": 0.15283798688574926, + "grad_norm": 225474.609375, + "learning_rate": 9.919985942300625e-06, + "loss": 116444.5875, + "step": 75660 + }, + { + "epoch": 0.15285818751843308, + "grad_norm": 22452.849609375, + "learning_rate": 9.919923732366137e-06, + "loss": 133599.4625, + "step": 75670 + }, + { + "epoch": 0.1528783881511169, + "grad_norm": 1603282.5, + "learning_rate": 9.919861498452538e-06, + "loss": 69663.125, + "step": 75680 + }, + { + "epoch": 0.15289858878380072, + "grad_norm": 178093.5625, + "learning_rate": 9.91979924056013e-06, + "loss": 132252.5375, + "step": 75690 + }, + { + "epoch": 0.1529187894164845, + "grad_norm": 1099562.125, + "learning_rate": 9.919736958689216e-06, + "loss": 166982.6875, + "step": 75700 + }, + { + "epoch": 0.15293899004916833, + "grad_norm": 381917.9375, + "learning_rate": 9.919674652840103e-06, + "loss": 200497.825, + "step": 75710 + }, + { + "epoch": 0.15295919068185215, + "grad_norm": 154749.84375, + "learning_rate": 9.91961232301309e-06, + "loss": 29666.9906, + "step": 75720 + }, + { + "epoch": 0.15297939131453597, + "grad_norm": 208917.875, + "learning_rate": 9.919549969208486e-06, + "loss": 134769.4625, + "step": 75730 + }, + { + "epoch": 0.1529995919472198, + "grad_norm": 33928.19921875, + "learning_rate": 9.919487591426591e-06, + "loss": 85960.4, + "step": 75740 + }, + { + "epoch": 0.1530197925799036, + "grad_norm": 1470976.25, + "learning_rate": 9.91942518966771e-06, + "loss": 174680.8875, + "step": 75750 + }, + { + "epoch": 0.1530399932125874, + "grad_norm": 257826.765625, + "learning_rate": 9.919362763932145e-06, + "loss": 187809.65, + "step": 75760 + }, + { + "epoch": 0.15306019384527123, + "grad_norm": 737452.375, + "learning_rate": 9.919300314220206e-06, + "loss": 127052.25, + "step": 75770 + }, + { + "epoch": 0.15308039447795505, + "grad_norm": 339597.0, + "learning_rate": 9.919237840532192e-06, + "loss": 85306.9937, + "step": 75780 + }, + { + "epoch": 0.15310059511063887, + "grad_norm": 132585.90625, + "learning_rate": 9.91917534286841e-06, + "loss": 89133.1062, + "step": 75790 + }, + { + "epoch": 0.15312079574332269, + "grad_norm": 1320223.375, + "learning_rate": 9.919112821229165e-06, + "loss": 60561.55, + "step": 75800 + }, + { + "epoch": 0.1531409963760065, + "grad_norm": 55887.00390625, + "learning_rate": 9.91905027561476e-06, + "loss": 147462.6625, + "step": 75810 + }, + { + "epoch": 0.15316119700869033, + "grad_norm": 231342.15625, + "learning_rate": 9.918987706025498e-06, + "loss": 97403.1938, + "step": 75820 + }, + { + "epoch": 0.15318139764137412, + "grad_norm": 1005863.8125, + "learning_rate": 9.918925112461688e-06, + "loss": 74130.7875, + "step": 75830 + }, + { + "epoch": 0.15320159827405794, + "grad_norm": 8880.2158203125, + "learning_rate": 9.918862494923635e-06, + "loss": 47543.8594, + "step": 75840 + }, + { + "epoch": 0.15322179890674176, + "grad_norm": 858145.125, + "learning_rate": 9.918799853411642e-06, + "loss": 86252.5875, + "step": 75850 + }, + { + "epoch": 0.15324199953942558, + "grad_norm": 270009.28125, + "learning_rate": 9.918737187926014e-06, + "loss": 234779.575, + "step": 75860 + }, + { + "epoch": 0.1532622001721094, + "grad_norm": 499486.3125, + "learning_rate": 9.91867449846706e-06, + "loss": 50615.7406, + "step": 75870 + }, + { + "epoch": 0.15328240080479322, + "grad_norm": 83590.859375, + "learning_rate": 9.91861178503508e-06, + "loss": 235033.1, + "step": 75880 + }, + { + "epoch": 0.153302601437477, + "grad_norm": 2141611.25, + "learning_rate": 9.918549047630386e-06, + "loss": 126918.325, + "step": 75890 + }, + { + "epoch": 0.15332280207016083, + "grad_norm": 46492.609375, + "learning_rate": 9.918486286253279e-06, + "loss": 67738.95, + "step": 75900 + }, + { + "epoch": 0.15334300270284465, + "grad_norm": 138706.515625, + "learning_rate": 9.918423500904066e-06, + "loss": 127261.2, + "step": 75910 + }, + { + "epoch": 0.15336320333552847, + "grad_norm": 38186.140625, + "learning_rate": 9.918360691583056e-06, + "loss": 70201.6938, + "step": 75920 + }, + { + "epoch": 0.1533834039682123, + "grad_norm": 88318.59375, + "learning_rate": 9.918297858290548e-06, + "loss": 103061.6313, + "step": 75930 + }, + { + "epoch": 0.1534036046008961, + "grad_norm": 153855.0, + "learning_rate": 9.918235001026856e-06, + "loss": 184889.0875, + "step": 75940 + }, + { + "epoch": 0.1534238052335799, + "grad_norm": 520011.84375, + "learning_rate": 9.918172119792283e-06, + "loss": 190199.2, + "step": 75950 + }, + { + "epoch": 0.15344400586626372, + "grad_norm": 370037.40625, + "learning_rate": 9.918109214587134e-06, + "loss": 90560.0437, + "step": 75960 + }, + { + "epoch": 0.15346420649894754, + "grad_norm": 40712.22265625, + "learning_rate": 9.918046285411717e-06, + "loss": 228314.3, + "step": 75970 + }, + { + "epoch": 0.15348440713163136, + "grad_norm": 130804.8046875, + "learning_rate": 9.917983332266342e-06, + "loss": 197139.225, + "step": 75980 + }, + { + "epoch": 0.15350460776431518, + "grad_norm": 16699.775390625, + "learning_rate": 9.91792035515131e-06, + "loss": 47511.3219, + "step": 75990 + }, + { + "epoch": 0.153524808396999, + "grad_norm": 0.0, + "learning_rate": 9.91785735406693e-06, + "loss": 67781.4312, + "step": 76000 + }, + { + "epoch": 0.15354500902968282, + "grad_norm": 96037.1015625, + "learning_rate": 9.917794329013511e-06, + "loss": 131015.2375, + "step": 76010 + }, + { + "epoch": 0.15356520966236661, + "grad_norm": 1042610.9375, + "learning_rate": 9.917731279991358e-06, + "loss": 168492.55, + "step": 76020 + }, + { + "epoch": 0.15358541029505043, + "grad_norm": 333273.5, + "learning_rate": 9.91766820700078e-06, + "loss": 299623.35, + "step": 76030 + }, + { + "epoch": 0.15360561092773425, + "grad_norm": 92062.96875, + "learning_rate": 9.917605110042084e-06, + "loss": 69860.7625, + "step": 76040 + }, + { + "epoch": 0.15362581156041807, + "grad_norm": 26556.5078125, + "learning_rate": 9.917541989115579e-06, + "loss": 78304.8875, + "step": 76050 + }, + { + "epoch": 0.1536460121931019, + "grad_norm": 56467.60546875, + "learning_rate": 9.917478844221566e-06, + "loss": 80524.3188, + "step": 76060 + }, + { + "epoch": 0.15366621282578571, + "grad_norm": 454618.3125, + "learning_rate": 9.91741567536036e-06, + "loss": 45155.0625, + "step": 76070 + }, + { + "epoch": 0.1536864134584695, + "grad_norm": 1100001.5, + "learning_rate": 9.917352482532267e-06, + "loss": 142774.525, + "step": 76080 + }, + { + "epoch": 0.15370661409115333, + "grad_norm": 35389.24609375, + "learning_rate": 9.917289265737594e-06, + "loss": 162067.6375, + "step": 76090 + }, + { + "epoch": 0.15372681472383715, + "grad_norm": 51324.92578125, + "learning_rate": 9.91722602497665e-06, + "loss": 42813.5469, + "step": 76100 + }, + { + "epoch": 0.15374701535652097, + "grad_norm": 430393.0625, + "learning_rate": 9.917162760249741e-06, + "loss": 72074.2937, + "step": 76110 + }, + { + "epoch": 0.1537672159892048, + "grad_norm": 1279112.625, + "learning_rate": 9.91709947155718e-06, + "loss": 228910.725, + "step": 76120 + }, + { + "epoch": 0.1537874166218886, + "grad_norm": 1779864.5, + "learning_rate": 9.91703615889927e-06, + "loss": 62228.3187, + "step": 76130 + }, + { + "epoch": 0.15380761725457243, + "grad_norm": 0.0, + "learning_rate": 9.916972822276322e-06, + "loss": 68641.5, + "step": 76140 + }, + { + "epoch": 0.15382781788725622, + "grad_norm": 2255809.5, + "learning_rate": 9.916909461688646e-06, + "loss": 144539.375, + "step": 76150 + }, + { + "epoch": 0.15384801851994004, + "grad_norm": 290993.3125, + "learning_rate": 9.916846077136548e-06, + "loss": 61711.7125, + "step": 76160 + }, + { + "epoch": 0.15386821915262386, + "grad_norm": 517359.78125, + "learning_rate": 9.916782668620341e-06, + "loss": 71889.325, + "step": 76170 + }, + { + "epoch": 0.15388841978530768, + "grad_norm": 13635.4970703125, + "learning_rate": 9.91671923614033e-06, + "loss": 118473.075, + "step": 76180 + }, + { + "epoch": 0.1539086204179915, + "grad_norm": 2322805.75, + "learning_rate": 9.916655779696826e-06, + "loss": 113644.3375, + "step": 76190 + }, + { + "epoch": 0.15392882105067532, + "grad_norm": 509851.40625, + "learning_rate": 9.91659229929014e-06, + "loss": 137225.8125, + "step": 76200 + }, + { + "epoch": 0.1539490216833591, + "grad_norm": 645042.6875, + "learning_rate": 9.916528794920577e-06, + "loss": 65249.0625, + "step": 76210 + }, + { + "epoch": 0.15396922231604293, + "grad_norm": 474752.53125, + "learning_rate": 9.916465266588448e-06, + "loss": 148291.4, + "step": 76220 + }, + { + "epoch": 0.15398942294872675, + "grad_norm": 860559.8125, + "learning_rate": 9.916401714294067e-06, + "loss": 189008.55, + "step": 76230 + }, + { + "epoch": 0.15400962358141057, + "grad_norm": 49123.94140625, + "learning_rate": 9.916338138037738e-06, + "loss": 77468.8375, + "step": 76240 + }, + { + "epoch": 0.1540298242140944, + "grad_norm": 44029.28125, + "learning_rate": 9.916274537819774e-06, + "loss": 102466.0875, + "step": 76250 + }, + { + "epoch": 0.1540500248467782, + "grad_norm": 1700959.875, + "learning_rate": 9.916210913640483e-06, + "loss": 142437.75, + "step": 76260 + }, + { + "epoch": 0.154070225479462, + "grad_norm": 16855.75, + "learning_rate": 9.916147265500179e-06, + "loss": 178356.5875, + "step": 76270 + }, + { + "epoch": 0.15409042611214582, + "grad_norm": 255537.4375, + "learning_rate": 9.916083593399167e-06, + "loss": 93876.2875, + "step": 76280 + }, + { + "epoch": 0.15411062674482964, + "grad_norm": 137374.953125, + "learning_rate": 9.916019897337761e-06, + "loss": 154296.1125, + "step": 76290 + }, + { + "epoch": 0.15413082737751346, + "grad_norm": 10588.6044921875, + "learning_rate": 9.915956177316269e-06, + "loss": 43102.3969, + "step": 76300 + }, + { + "epoch": 0.15415102801019728, + "grad_norm": 17541.4921875, + "learning_rate": 9.915892433335004e-06, + "loss": 60571.0375, + "step": 76310 + }, + { + "epoch": 0.1541712286428811, + "grad_norm": 126791.8671875, + "learning_rate": 9.915828665394274e-06, + "loss": 149433.5625, + "step": 76320 + }, + { + "epoch": 0.15419142927556492, + "grad_norm": 544288.0, + "learning_rate": 9.915764873494393e-06, + "loss": 118384.8625, + "step": 76330 + }, + { + "epoch": 0.15421162990824872, + "grad_norm": 11531.0693359375, + "learning_rate": 9.915701057635669e-06, + "loss": 43743.75, + "step": 76340 + }, + { + "epoch": 0.15423183054093254, + "grad_norm": 180313.3125, + "learning_rate": 9.915637217818415e-06, + "loss": 68292.9125, + "step": 76350 + }, + { + "epoch": 0.15425203117361636, + "grad_norm": 634415.3125, + "learning_rate": 9.915573354042943e-06, + "loss": 211495.3, + "step": 76360 + }, + { + "epoch": 0.15427223180630018, + "grad_norm": 392160.40625, + "learning_rate": 9.91550946630956e-06, + "loss": 71018.5063, + "step": 76370 + }, + { + "epoch": 0.154292432438984, + "grad_norm": 1655104.375, + "learning_rate": 9.915445554618581e-06, + "loss": 102649.7125, + "step": 76380 + }, + { + "epoch": 0.15431263307166782, + "grad_norm": 17010.548828125, + "learning_rate": 9.915381618970317e-06, + "loss": 50556.4344, + "step": 76390 + }, + { + "epoch": 0.1543328337043516, + "grad_norm": 205334.0625, + "learning_rate": 9.915317659365078e-06, + "loss": 101299.6313, + "step": 76400 + }, + { + "epoch": 0.15435303433703543, + "grad_norm": 605298.3125, + "learning_rate": 9.915253675803178e-06, + "loss": 121561.5375, + "step": 76410 + }, + { + "epoch": 0.15437323496971925, + "grad_norm": 345595.0, + "learning_rate": 9.915189668284927e-06, + "loss": 115066.7625, + "step": 76420 + }, + { + "epoch": 0.15439343560240307, + "grad_norm": 29567.42578125, + "learning_rate": 9.915125636810638e-06, + "loss": 148387.0125, + "step": 76430 + }, + { + "epoch": 0.1544136362350869, + "grad_norm": 389237.25, + "learning_rate": 9.915061581380622e-06, + "loss": 85296.3938, + "step": 76440 + }, + { + "epoch": 0.1544338368677707, + "grad_norm": 440942.09375, + "learning_rate": 9.914997501995193e-06, + "loss": 248982.575, + "step": 76450 + }, + { + "epoch": 0.15445403750045453, + "grad_norm": 986211.5, + "learning_rate": 9.914933398654663e-06, + "loss": 256006.025, + "step": 76460 + }, + { + "epoch": 0.15447423813313832, + "grad_norm": 2199887.75, + "learning_rate": 9.914869271359342e-06, + "loss": 181546.15, + "step": 76470 + }, + { + "epoch": 0.15449443876582214, + "grad_norm": 1948407.625, + "learning_rate": 9.914805120109545e-06, + "loss": 217123.55, + "step": 76480 + }, + { + "epoch": 0.15451463939850596, + "grad_norm": 846742.25, + "learning_rate": 9.914740944905585e-06, + "loss": 200468.125, + "step": 76490 + }, + { + "epoch": 0.15453484003118978, + "grad_norm": 350777.84375, + "learning_rate": 9.914676745747772e-06, + "loss": 143024.9, + "step": 76500 + }, + { + "epoch": 0.1545550406638736, + "grad_norm": 38667.8046875, + "learning_rate": 9.914612522636423e-06, + "loss": 72055.425, + "step": 76510 + }, + { + "epoch": 0.15457524129655742, + "grad_norm": 299216.375, + "learning_rate": 9.914548275571845e-06, + "loss": 112320.6625, + "step": 76520 + }, + { + "epoch": 0.1545954419292412, + "grad_norm": 4043287.5, + "learning_rate": 9.914484004554356e-06, + "loss": 159847.05, + "step": 76530 + }, + { + "epoch": 0.15461564256192503, + "grad_norm": 525720.1875, + "learning_rate": 9.91441970958427e-06, + "loss": 234460.325, + "step": 76540 + }, + { + "epoch": 0.15463584319460885, + "grad_norm": 1441302.75, + "learning_rate": 9.914355390661897e-06, + "loss": 77776.9625, + "step": 76550 + }, + { + "epoch": 0.15465604382729267, + "grad_norm": 6576.724609375, + "learning_rate": 9.914291047787552e-06, + "loss": 168134.5125, + "step": 76560 + }, + { + "epoch": 0.1546762444599765, + "grad_norm": 96033.7265625, + "learning_rate": 9.914226680961549e-06, + "loss": 119834.5875, + "step": 76570 + }, + { + "epoch": 0.1546964450926603, + "grad_norm": 192724.75, + "learning_rate": 9.9141622901842e-06, + "loss": 62774.6375, + "step": 76580 + }, + { + "epoch": 0.1547166457253441, + "grad_norm": 126961.9453125, + "learning_rate": 9.914097875455821e-06, + "loss": 174903.7375, + "step": 76590 + }, + { + "epoch": 0.15473684635802792, + "grad_norm": 199099.5, + "learning_rate": 9.914033436776724e-06, + "loss": 58483.3125, + "step": 76600 + }, + { + "epoch": 0.15475704699071174, + "grad_norm": 66708.765625, + "learning_rate": 9.913968974147225e-06, + "loss": 93480.2812, + "step": 76610 + }, + { + "epoch": 0.15477724762339556, + "grad_norm": 57661.44140625, + "learning_rate": 9.913904487567636e-06, + "loss": 59690.7438, + "step": 76620 + }, + { + "epoch": 0.15479744825607938, + "grad_norm": 2571081.0, + "learning_rate": 9.913839977038274e-06, + "loss": 143869.325, + "step": 76630 + }, + { + "epoch": 0.1548176488887632, + "grad_norm": 7760.9970703125, + "learning_rate": 9.913775442559451e-06, + "loss": 199400.4875, + "step": 76640 + }, + { + "epoch": 0.15483784952144702, + "grad_norm": 30677.373046875, + "learning_rate": 9.913710884131483e-06, + "loss": 87945.1812, + "step": 76650 + }, + { + "epoch": 0.15485805015413082, + "grad_norm": 1257054.375, + "learning_rate": 9.913646301754685e-06, + "loss": 102756.175, + "step": 76660 + }, + { + "epoch": 0.15487825078681464, + "grad_norm": 120654.75, + "learning_rate": 9.913581695429368e-06, + "loss": 66943.0562, + "step": 76670 + }, + { + "epoch": 0.15489845141949846, + "grad_norm": 1762298.25, + "learning_rate": 9.913517065155852e-06, + "loss": 63745.9625, + "step": 76680 + }, + { + "epoch": 0.15491865205218228, + "grad_norm": 928351.625, + "learning_rate": 9.91345241093445e-06, + "loss": 64913.9437, + "step": 76690 + }, + { + "epoch": 0.1549388526848661, + "grad_norm": 356067.53125, + "learning_rate": 9.913387732765475e-06, + "loss": 75590.8813, + "step": 76700 + }, + { + "epoch": 0.15495905331754992, + "grad_norm": 667563.8125, + "learning_rate": 9.913323030649247e-06, + "loss": 127063.25, + "step": 76710 + }, + { + "epoch": 0.1549792539502337, + "grad_norm": 13751690.0, + "learning_rate": 9.913258304586076e-06, + "loss": 104161.3062, + "step": 76720 + }, + { + "epoch": 0.15499945458291753, + "grad_norm": 425603.34375, + "learning_rate": 9.91319355457628e-06, + "loss": 67132.8938, + "step": 76730 + }, + { + "epoch": 0.15501965521560135, + "grad_norm": 242004.0625, + "learning_rate": 9.913128780620175e-06, + "loss": 65226.1125, + "step": 76740 + }, + { + "epoch": 0.15503985584828517, + "grad_norm": 4073.267822265625, + "learning_rate": 9.913063982718076e-06, + "loss": 101892.2625, + "step": 76750 + }, + { + "epoch": 0.155060056480969, + "grad_norm": 472335.15625, + "learning_rate": 9.9129991608703e-06, + "loss": 117348.1625, + "step": 76760 + }, + { + "epoch": 0.1550802571136528, + "grad_norm": 432541.53125, + "learning_rate": 9.912934315077162e-06, + "loss": 87202.5875, + "step": 76770 + }, + { + "epoch": 0.15510045774633663, + "grad_norm": 262323.6875, + "learning_rate": 9.912869445338978e-06, + "loss": 110842.625, + "step": 76780 + }, + { + "epoch": 0.15512065837902042, + "grad_norm": 268394.28125, + "learning_rate": 9.912804551656064e-06, + "loss": 95647.075, + "step": 76790 + }, + { + "epoch": 0.15514085901170424, + "grad_norm": 72954.9453125, + "learning_rate": 9.912739634028734e-06, + "loss": 199482.9, + "step": 76800 + }, + { + "epoch": 0.15516105964438806, + "grad_norm": 1298921.5, + "learning_rate": 9.91267469245731e-06, + "loss": 202039.55, + "step": 76810 + }, + { + "epoch": 0.15518126027707188, + "grad_norm": 614680.8125, + "learning_rate": 9.912609726942104e-06, + "loss": 100420.6687, + "step": 76820 + }, + { + "epoch": 0.1552014609097557, + "grad_norm": 391468.71875, + "learning_rate": 9.912544737483434e-06, + "loss": 57189.8375, + "step": 76830 + }, + { + "epoch": 0.15522166154243952, + "grad_norm": 122603.5078125, + "learning_rate": 9.912479724081617e-06, + "loss": 165536.6625, + "step": 76840 + }, + { + "epoch": 0.1552418621751233, + "grad_norm": 21064.248046875, + "learning_rate": 9.912414686736971e-06, + "loss": 99315.8562, + "step": 76850 + }, + { + "epoch": 0.15526206280780713, + "grad_norm": 229256.9375, + "learning_rate": 9.912349625449808e-06, + "loss": 72675.1625, + "step": 76860 + }, + { + "epoch": 0.15528226344049095, + "grad_norm": 1785757.25, + "learning_rate": 9.912284540220452e-06, + "loss": 172528.3625, + "step": 76870 + }, + { + "epoch": 0.15530246407317477, + "grad_norm": 15311.0791015625, + "learning_rate": 9.912219431049217e-06, + "loss": 57443.3313, + "step": 76880 + }, + { + "epoch": 0.1553226647058586, + "grad_norm": 56682.22265625, + "learning_rate": 9.912154297936418e-06, + "loss": 55024.1625, + "step": 76890 + }, + { + "epoch": 0.1553428653385424, + "grad_norm": 480001.9375, + "learning_rate": 9.912089140882377e-06, + "loss": 141441.05, + "step": 76900 + }, + { + "epoch": 0.1553630659712262, + "grad_norm": 1321676.0, + "learning_rate": 9.912023959887408e-06, + "loss": 85890.8625, + "step": 76910 + }, + { + "epoch": 0.15538326660391003, + "grad_norm": 537742.125, + "learning_rate": 9.91195875495183e-06, + "loss": 56136.725, + "step": 76920 + }, + { + "epoch": 0.15540346723659385, + "grad_norm": 15211.4560546875, + "learning_rate": 9.911893526075961e-06, + "loss": 154061.7625, + "step": 76930 + }, + { + "epoch": 0.15542366786927767, + "grad_norm": 206781.46875, + "learning_rate": 9.911828273260119e-06, + "loss": 94449.5125, + "step": 76940 + }, + { + "epoch": 0.15544386850196149, + "grad_norm": 568470.4375, + "learning_rate": 9.911762996504621e-06, + "loss": 135318.625, + "step": 76950 + }, + { + "epoch": 0.1554640691346453, + "grad_norm": 551527.3125, + "learning_rate": 9.911697695809787e-06, + "loss": 90894.35, + "step": 76960 + }, + { + "epoch": 0.15548426976732913, + "grad_norm": 784382.5625, + "learning_rate": 9.911632371175934e-06, + "loss": 126901.3125, + "step": 76970 + }, + { + "epoch": 0.15550447040001292, + "grad_norm": 2241920.75, + "learning_rate": 9.911567022603379e-06, + "loss": 62982.9688, + "step": 76980 + }, + { + "epoch": 0.15552467103269674, + "grad_norm": 88014.203125, + "learning_rate": 9.911501650092443e-06, + "loss": 163549.3625, + "step": 76990 + }, + { + "epoch": 0.15554487166538056, + "grad_norm": 479683.6875, + "learning_rate": 9.911436253643445e-06, + "loss": 215596.25, + "step": 77000 + }, + { + "epoch": 0.15556507229806438, + "grad_norm": 322550.09375, + "learning_rate": 9.911370833256701e-06, + "loss": 88892.4563, + "step": 77010 + }, + { + "epoch": 0.1555852729307482, + "grad_norm": 2153642.25, + "learning_rate": 9.91130538893253e-06, + "loss": 149681.3875, + "step": 77020 + }, + { + "epoch": 0.15560547356343202, + "grad_norm": 1474981.75, + "learning_rate": 9.911239920671253e-06, + "loss": 83474.2875, + "step": 77030 + }, + { + "epoch": 0.1556256741961158, + "grad_norm": 113758.0625, + "learning_rate": 9.91117442847319e-06, + "loss": 106468.0375, + "step": 77040 + }, + { + "epoch": 0.15564587482879963, + "grad_norm": 11870.33984375, + "learning_rate": 9.911108912338656e-06, + "loss": 48286.2, + "step": 77050 + }, + { + "epoch": 0.15566607546148345, + "grad_norm": 817626.8125, + "learning_rate": 9.911043372267975e-06, + "loss": 120265.7125, + "step": 77060 + }, + { + "epoch": 0.15568627609416727, + "grad_norm": 1367610.5, + "learning_rate": 9.910977808261463e-06, + "loss": 122699.3125, + "step": 77070 + }, + { + "epoch": 0.1557064767268511, + "grad_norm": 51804.8515625, + "learning_rate": 9.910912220319443e-06, + "loss": 137441.2375, + "step": 77080 + }, + { + "epoch": 0.1557266773595349, + "grad_norm": 21683.466796875, + "learning_rate": 9.910846608442229e-06, + "loss": 36967.35, + "step": 77090 + }, + { + "epoch": 0.15574687799221873, + "grad_norm": 23299.5625, + "learning_rate": 9.910780972630146e-06, + "loss": 62398.1188, + "step": 77100 + }, + { + "epoch": 0.15576707862490252, + "grad_norm": 1128331.25, + "learning_rate": 9.910715312883512e-06, + "loss": 89361.85, + "step": 77110 + }, + { + "epoch": 0.15578727925758634, + "grad_norm": 662580.25, + "learning_rate": 9.910649629202648e-06, + "loss": 136698.45, + "step": 77120 + }, + { + "epoch": 0.15580747989027016, + "grad_norm": 107923.8046875, + "learning_rate": 9.910583921587872e-06, + "loss": 92526.6, + "step": 77130 + }, + { + "epoch": 0.15582768052295398, + "grad_norm": 843.6083984375, + "learning_rate": 9.910518190039506e-06, + "loss": 94291.6, + "step": 77140 + }, + { + "epoch": 0.1558478811556378, + "grad_norm": 650877.625, + "learning_rate": 9.91045243455787e-06, + "loss": 113671.4875, + "step": 77150 + }, + { + "epoch": 0.15586808178832162, + "grad_norm": 1887607.5, + "learning_rate": 9.910386655143285e-06, + "loss": 204974.3125, + "step": 77160 + }, + { + "epoch": 0.15588828242100541, + "grad_norm": 8672.4052734375, + "learning_rate": 9.91032085179607e-06, + "loss": 73857.2312, + "step": 77170 + }, + { + "epoch": 0.15590848305368923, + "grad_norm": 685931.5, + "learning_rate": 9.910255024516546e-06, + "loss": 120080.6125, + "step": 77180 + }, + { + "epoch": 0.15592868368637305, + "grad_norm": 2602320.25, + "learning_rate": 9.910189173305035e-06, + "loss": 136804.075, + "step": 77190 + }, + { + "epoch": 0.15594888431905687, + "grad_norm": 1432290.5, + "learning_rate": 9.91012329816186e-06, + "loss": 145444.8625, + "step": 77200 + }, + { + "epoch": 0.1559690849517407, + "grad_norm": 1597860.375, + "learning_rate": 9.910057399087338e-06, + "loss": 193364.025, + "step": 77210 + }, + { + "epoch": 0.15598928558442451, + "grad_norm": 0.0, + "learning_rate": 9.90999147608179e-06, + "loss": 20264.5141, + "step": 77220 + }, + { + "epoch": 0.1560094862171083, + "grad_norm": 1334011.5, + "learning_rate": 9.909925529145541e-06, + "loss": 39637.6719, + "step": 77230 + }, + { + "epoch": 0.15602968684979213, + "grad_norm": 338694.90625, + "learning_rate": 9.90985955827891e-06, + "loss": 88500.1875, + "step": 77240 + }, + { + "epoch": 0.15604988748247595, + "grad_norm": 9461.4423828125, + "learning_rate": 9.90979356348222e-06, + "loss": 205214.725, + "step": 77250 + }, + { + "epoch": 0.15607008811515977, + "grad_norm": 57229.80859375, + "learning_rate": 9.909727544755789e-06, + "loss": 83005.0688, + "step": 77260 + }, + { + "epoch": 0.1560902887478436, + "grad_norm": 641695.375, + "learning_rate": 9.909661502099943e-06, + "loss": 108390.0625, + "step": 77270 + }, + { + "epoch": 0.1561104893805274, + "grad_norm": 84955.8515625, + "learning_rate": 9.909595435515002e-06, + "loss": 82545.6687, + "step": 77280 + }, + { + "epoch": 0.15613069001321123, + "grad_norm": 2983.060546875, + "learning_rate": 9.90952934500129e-06, + "loss": 115460.2625, + "step": 77290 + }, + { + "epoch": 0.15615089064589502, + "grad_norm": 306391.625, + "learning_rate": 9.909463230559127e-06, + "loss": 58342.4688, + "step": 77300 + }, + { + "epoch": 0.15617109127857884, + "grad_norm": 735283.375, + "learning_rate": 9.909397092188834e-06, + "loss": 87805.7625, + "step": 77310 + }, + { + "epoch": 0.15619129191126266, + "grad_norm": 307454.03125, + "learning_rate": 9.909330929890734e-06, + "loss": 50185.4469, + "step": 77320 + }, + { + "epoch": 0.15621149254394648, + "grad_norm": 76590.1328125, + "learning_rate": 9.909264743665153e-06, + "loss": 111400.35, + "step": 77330 + }, + { + "epoch": 0.1562316931766303, + "grad_norm": 0.0, + "learning_rate": 9.90919853351241e-06, + "loss": 162041.775, + "step": 77340 + }, + { + "epoch": 0.15625189380931412, + "grad_norm": 576323.8125, + "learning_rate": 9.90913229943283e-06, + "loss": 225476.4, + "step": 77350 + }, + { + "epoch": 0.1562720944419979, + "grad_norm": 9010.0126953125, + "learning_rate": 9.909066041426733e-06, + "loss": 66380.175, + "step": 77360 + }, + { + "epoch": 0.15629229507468173, + "grad_norm": 158363.734375, + "learning_rate": 9.908999759494444e-06, + "loss": 146221.625, + "step": 77370 + }, + { + "epoch": 0.15631249570736555, + "grad_norm": 12485.7529296875, + "learning_rate": 9.908933453636287e-06, + "loss": 161827.0, + "step": 77380 + }, + { + "epoch": 0.15633269634004937, + "grad_norm": 82070.8515625, + "learning_rate": 9.90886712385258e-06, + "loss": 149866.9, + "step": 77390 + }, + { + "epoch": 0.1563528969727332, + "grad_norm": 177699.09375, + "learning_rate": 9.908800770143654e-06, + "loss": 148910.65, + "step": 77400 + }, + { + "epoch": 0.156373097605417, + "grad_norm": 176303.015625, + "learning_rate": 9.908734392509827e-06, + "loss": 71762.7937, + "step": 77410 + }, + { + "epoch": 0.15639329823810083, + "grad_norm": 135268.484375, + "learning_rate": 9.908667990951424e-06, + "loss": 52082.2375, + "step": 77420 + }, + { + "epoch": 0.15641349887078462, + "grad_norm": 463114.5, + "learning_rate": 9.908601565468768e-06, + "loss": 33047.8875, + "step": 77430 + }, + { + "epoch": 0.15643369950346844, + "grad_norm": 258961.234375, + "learning_rate": 9.908535116062185e-06, + "loss": 93614.1812, + "step": 77440 + }, + { + "epoch": 0.15645390013615226, + "grad_norm": 1211461.375, + "learning_rate": 9.908468642731996e-06, + "loss": 189477.4125, + "step": 77450 + }, + { + "epoch": 0.15647410076883608, + "grad_norm": 724824.0625, + "learning_rate": 9.908402145478526e-06, + "loss": 71912.3375, + "step": 77460 + }, + { + "epoch": 0.1564943014015199, + "grad_norm": 565862.6875, + "learning_rate": 9.908335624302099e-06, + "loss": 103230.925, + "step": 77470 + }, + { + "epoch": 0.15651450203420372, + "grad_norm": 1561787.125, + "learning_rate": 9.908269079203039e-06, + "loss": 131513.975, + "step": 77480 + }, + { + "epoch": 0.15653470266688752, + "grad_norm": 690169.0625, + "learning_rate": 9.908202510181673e-06, + "loss": 97945.3687, + "step": 77490 + }, + { + "epoch": 0.15655490329957134, + "grad_norm": 108257.1953125, + "learning_rate": 9.908135917238321e-06, + "loss": 94715.3938, + "step": 77500 + }, + { + "epoch": 0.15657510393225516, + "grad_norm": 280405.625, + "learning_rate": 9.90806930037331e-06, + "loss": 56851.6687, + "step": 77510 + }, + { + "epoch": 0.15659530456493898, + "grad_norm": 114436.015625, + "learning_rate": 9.908002659586966e-06, + "loss": 70253.7875, + "step": 77520 + }, + { + "epoch": 0.1566155051976228, + "grad_norm": 979752.75, + "learning_rate": 9.907935994879612e-06, + "loss": 126424.275, + "step": 77530 + }, + { + "epoch": 0.15663570583030662, + "grad_norm": 38243.15625, + "learning_rate": 9.907869306251571e-06, + "loss": 137913.95, + "step": 77540 + }, + { + "epoch": 0.1566559064629904, + "grad_norm": 137548.171875, + "learning_rate": 9.907802593703173e-06, + "loss": 62631.0938, + "step": 77550 + }, + { + "epoch": 0.15667610709567423, + "grad_norm": 269477.28125, + "learning_rate": 9.90773585723474e-06, + "loss": 50124.8187, + "step": 77560 + }, + { + "epoch": 0.15669630772835805, + "grad_norm": 578319.3125, + "learning_rate": 9.907669096846596e-06, + "loss": 78519.0, + "step": 77570 + }, + { + "epoch": 0.15671650836104187, + "grad_norm": 583074.375, + "learning_rate": 9.90760231253907e-06, + "loss": 86713.4563, + "step": 77580 + }, + { + "epoch": 0.1567367089937257, + "grad_norm": 55707.52734375, + "learning_rate": 9.907535504312484e-06, + "loss": 63403.0813, + "step": 77590 + }, + { + "epoch": 0.1567569096264095, + "grad_norm": 32349.7578125, + "learning_rate": 9.907468672167165e-06, + "loss": 109539.4375, + "step": 77600 + }, + { + "epoch": 0.15677711025909333, + "grad_norm": 351479.4375, + "learning_rate": 9.90740181610344e-06, + "loss": 89437.0312, + "step": 77610 + }, + { + "epoch": 0.15679731089177712, + "grad_norm": 769632.0625, + "learning_rate": 9.907334936121634e-06, + "loss": 73783.2812, + "step": 77620 + }, + { + "epoch": 0.15681751152446094, + "grad_norm": 0.0, + "learning_rate": 9.907268032222072e-06, + "loss": 97589.6062, + "step": 77630 + }, + { + "epoch": 0.15683771215714476, + "grad_norm": 294626.5625, + "learning_rate": 9.90720110440508e-06, + "loss": 192700.25, + "step": 77640 + }, + { + "epoch": 0.15685791278982858, + "grad_norm": 35087.91796875, + "learning_rate": 9.907134152670987e-06, + "loss": 100635.4, + "step": 77650 + }, + { + "epoch": 0.1568781134225124, + "grad_norm": 940207.0625, + "learning_rate": 9.907067177020115e-06, + "loss": 108614.425, + "step": 77660 + }, + { + "epoch": 0.15689831405519622, + "grad_norm": 507597.5625, + "learning_rate": 9.907000177452794e-06, + "loss": 86738.85, + "step": 77670 + }, + { + "epoch": 0.15691851468788, + "grad_norm": 92541.71875, + "learning_rate": 9.90693315396935e-06, + "loss": 169351.575, + "step": 77680 + }, + { + "epoch": 0.15693871532056383, + "grad_norm": 1632104.5, + "learning_rate": 9.906866106570108e-06, + "loss": 221582.35, + "step": 77690 + }, + { + "epoch": 0.15695891595324765, + "grad_norm": 13811.5947265625, + "learning_rate": 9.906799035255395e-06, + "loss": 90619.5625, + "step": 77700 + }, + { + "epoch": 0.15697911658593147, + "grad_norm": 196196.421875, + "learning_rate": 9.90673194002554e-06, + "loss": 67719.4, + "step": 77710 + }, + { + "epoch": 0.1569993172186153, + "grad_norm": 0.0, + "learning_rate": 9.906664820880869e-06, + "loss": 57276.7375, + "step": 77720 + }, + { + "epoch": 0.1570195178512991, + "grad_norm": 29989.671875, + "learning_rate": 9.906597677821708e-06, + "loss": 123002.325, + "step": 77730 + }, + { + "epoch": 0.15703971848398293, + "grad_norm": 370742.0, + "learning_rate": 9.906530510848384e-06, + "loss": 64610.4563, + "step": 77740 + }, + { + "epoch": 0.15705991911666672, + "grad_norm": 962833.9375, + "learning_rate": 9.906463319961225e-06, + "loss": 79364.4688, + "step": 77750 + }, + { + "epoch": 0.15708011974935054, + "grad_norm": 17886.16015625, + "learning_rate": 9.906396105160561e-06, + "loss": 67906.4, + "step": 77760 + }, + { + "epoch": 0.15710032038203436, + "grad_norm": 864838.5625, + "learning_rate": 9.906328866446717e-06, + "loss": 67430.875, + "step": 77770 + }, + { + "epoch": 0.15712052101471818, + "grad_norm": 116985.0390625, + "learning_rate": 9.906261603820022e-06, + "loss": 41861.0, + "step": 77780 + }, + { + "epoch": 0.157140721647402, + "grad_norm": 44921.28125, + "learning_rate": 9.906194317280802e-06, + "loss": 167995.4375, + "step": 77790 + }, + { + "epoch": 0.15716092228008582, + "grad_norm": 262675.0, + "learning_rate": 9.906127006829385e-06, + "loss": 58392.0062, + "step": 77800 + }, + { + "epoch": 0.15718112291276962, + "grad_norm": 568976.0, + "learning_rate": 9.9060596724661e-06, + "loss": 97154.5562, + "step": 77810 + }, + { + "epoch": 0.15720132354545344, + "grad_norm": 76252.3515625, + "learning_rate": 9.905992314191277e-06, + "loss": 177081.1375, + "step": 77820 + }, + { + "epoch": 0.15722152417813726, + "grad_norm": 131918.03125, + "learning_rate": 9.905924932005241e-06, + "loss": 139190.8, + "step": 77830 + }, + { + "epoch": 0.15724172481082108, + "grad_norm": 268122.03125, + "learning_rate": 9.905857525908322e-06, + "loss": 68810.55, + "step": 77840 + }, + { + "epoch": 0.1572619254435049, + "grad_norm": 114240.078125, + "learning_rate": 9.905790095900849e-06, + "loss": 128524.25, + "step": 77850 + }, + { + "epoch": 0.15728212607618872, + "grad_norm": 1330859.375, + "learning_rate": 9.905722641983151e-06, + "loss": 107846.8625, + "step": 77860 + }, + { + "epoch": 0.1573023267088725, + "grad_norm": 1019136.6875, + "learning_rate": 9.905655164155554e-06, + "loss": 166821.7625, + "step": 77870 + }, + { + "epoch": 0.15732252734155633, + "grad_norm": 894650.25, + "learning_rate": 9.90558766241839e-06, + "loss": 105430.9875, + "step": 77880 + }, + { + "epoch": 0.15734272797424015, + "grad_norm": 33571.140625, + "learning_rate": 9.905520136771985e-06, + "loss": 144708.5, + "step": 77890 + }, + { + "epoch": 0.15736292860692397, + "grad_norm": 741886.75, + "learning_rate": 9.90545258721667e-06, + "loss": 112624.4, + "step": 77900 + }, + { + "epoch": 0.1573831292396078, + "grad_norm": 115468.984375, + "learning_rate": 9.905385013752777e-06, + "loss": 49152.5656, + "step": 77910 + }, + { + "epoch": 0.1574033298722916, + "grad_norm": 581551.875, + "learning_rate": 9.905317416380629e-06, + "loss": 105482.875, + "step": 77920 + }, + { + "epoch": 0.15742353050497543, + "grad_norm": 1875223.25, + "learning_rate": 9.905249795100561e-06, + "loss": 154717.05, + "step": 77930 + }, + { + "epoch": 0.15744373113765922, + "grad_norm": 145024.59375, + "learning_rate": 9.905182149912899e-06, + "loss": 103146.1625, + "step": 77940 + }, + { + "epoch": 0.15746393177034304, + "grad_norm": 703470.3125, + "learning_rate": 9.905114480817976e-06, + "loss": 150665.1875, + "step": 77950 + }, + { + "epoch": 0.15748413240302686, + "grad_norm": 99013.5234375, + "learning_rate": 9.905046787816118e-06, + "loss": 121291.975, + "step": 77960 + }, + { + "epoch": 0.15750433303571068, + "grad_norm": 223069.84375, + "learning_rate": 9.904979070907657e-06, + "loss": 85153.025, + "step": 77970 + }, + { + "epoch": 0.1575245336683945, + "grad_norm": 2141177.0, + "learning_rate": 9.904911330092923e-06, + "loss": 129683.0125, + "step": 77980 + }, + { + "epoch": 0.15754473430107832, + "grad_norm": 19362.40234375, + "learning_rate": 9.904843565372249e-06, + "loss": 105280.1125, + "step": 77990 + }, + { + "epoch": 0.1575649349337621, + "grad_norm": 34863.2578125, + "learning_rate": 9.904775776745959e-06, + "loss": 275417.05, + "step": 78000 + }, + { + "epoch": 0.15758513556644593, + "grad_norm": 1232935.625, + "learning_rate": 9.904707964214386e-06, + "loss": 100813.9125, + "step": 78010 + }, + { + "epoch": 0.15760533619912975, + "grad_norm": 117649.78125, + "learning_rate": 9.904640127777865e-06, + "loss": 153276.45, + "step": 78020 + }, + { + "epoch": 0.15762553683181357, + "grad_norm": 432705.3125, + "learning_rate": 9.904572267436721e-06, + "loss": 33332.0719, + "step": 78030 + }, + { + "epoch": 0.1576457374644974, + "grad_norm": 150498.671875, + "learning_rate": 9.904504383191286e-06, + "loss": 51566.3344, + "step": 78040 + }, + { + "epoch": 0.1576659380971812, + "grad_norm": 1619478.875, + "learning_rate": 9.904436475041892e-06, + "loss": 109063.0, + "step": 78050 + }, + { + "epoch": 0.15768613872986503, + "grad_norm": 873747.6875, + "learning_rate": 9.904368542988869e-06, + "loss": 93196.125, + "step": 78060 + }, + { + "epoch": 0.15770633936254883, + "grad_norm": 38711.28515625, + "learning_rate": 9.90430058703255e-06, + "loss": 144547.9, + "step": 78070 + }, + { + "epoch": 0.15772653999523265, + "grad_norm": 20535.13671875, + "learning_rate": 9.904232607173262e-06, + "loss": 83110.1125, + "step": 78080 + }, + { + "epoch": 0.15774674062791647, + "grad_norm": 4914.59326171875, + "learning_rate": 9.90416460341134e-06, + "loss": 176445.85, + "step": 78090 + }, + { + "epoch": 0.15776694126060029, + "grad_norm": 171895.46875, + "learning_rate": 9.904096575747117e-06, + "loss": 125751.6875, + "step": 78100 + }, + { + "epoch": 0.1577871418932841, + "grad_norm": 1291308.25, + "learning_rate": 9.90402852418092e-06, + "loss": 120977.875, + "step": 78110 + }, + { + "epoch": 0.15780734252596793, + "grad_norm": 115406.09375, + "learning_rate": 9.903960448713084e-06, + "loss": 53103.7063, + "step": 78120 + }, + { + "epoch": 0.15782754315865172, + "grad_norm": 78147.7578125, + "learning_rate": 9.903892349343938e-06, + "loss": 138679.2375, + "step": 78130 + }, + { + "epoch": 0.15784774379133554, + "grad_norm": 151808.015625, + "learning_rate": 9.903824226073816e-06, + "loss": 100205.1187, + "step": 78140 + }, + { + "epoch": 0.15786794442401936, + "grad_norm": 160154.46875, + "learning_rate": 9.90375607890305e-06, + "loss": 63800.1312, + "step": 78150 + }, + { + "epoch": 0.15788814505670318, + "grad_norm": 132369.90625, + "learning_rate": 9.903687907831972e-06, + "loss": 48894.7344, + "step": 78160 + }, + { + "epoch": 0.157908345689387, + "grad_norm": 758706.4375, + "learning_rate": 9.903619712860912e-06, + "loss": 122843.1875, + "step": 78170 + }, + { + "epoch": 0.15792854632207082, + "grad_norm": 3338895.25, + "learning_rate": 9.903551493990205e-06, + "loss": 138873.3, + "step": 78180 + }, + { + "epoch": 0.1579487469547546, + "grad_norm": 559704.6875, + "learning_rate": 9.903483251220183e-06, + "loss": 75703.95, + "step": 78190 + }, + { + "epoch": 0.15796894758743843, + "grad_norm": 528290.0625, + "learning_rate": 9.903414984551178e-06, + "loss": 111059.0, + "step": 78200 + }, + { + "epoch": 0.15798914822012225, + "grad_norm": 353133.3125, + "learning_rate": 9.903346693983524e-06, + "loss": 52341.3531, + "step": 78210 + }, + { + "epoch": 0.15800934885280607, + "grad_norm": 168161.65625, + "learning_rate": 9.903278379517554e-06, + "loss": 56434.4812, + "step": 78220 + }, + { + "epoch": 0.1580295494854899, + "grad_norm": 830071.3125, + "learning_rate": 9.903210041153597e-06, + "loss": 226619.95, + "step": 78230 + }, + { + "epoch": 0.1580497501181737, + "grad_norm": 118650.453125, + "learning_rate": 9.90314167889199e-06, + "loss": 62151.275, + "step": 78240 + }, + { + "epoch": 0.15806995075085753, + "grad_norm": 17830.048828125, + "learning_rate": 9.903073292733065e-06, + "loss": 47106.5656, + "step": 78250 + }, + { + "epoch": 0.15809015138354132, + "grad_norm": 2701929.0, + "learning_rate": 9.903004882677157e-06, + "loss": 170998.35, + "step": 78260 + }, + { + "epoch": 0.15811035201622514, + "grad_norm": 1135378.125, + "learning_rate": 9.902936448724596e-06, + "loss": 245101.55, + "step": 78270 + }, + { + "epoch": 0.15813055264890896, + "grad_norm": 70271.6875, + "learning_rate": 9.90286799087572e-06, + "loss": 109900.55, + "step": 78280 + }, + { + "epoch": 0.15815075328159278, + "grad_norm": 229449.1875, + "learning_rate": 9.902799509130857e-06, + "loss": 86093.55, + "step": 78290 + }, + { + "epoch": 0.1581709539142766, + "grad_norm": 379467.09375, + "learning_rate": 9.902731003490344e-06, + "loss": 78078.6687, + "step": 78300 + }, + { + "epoch": 0.15819115454696042, + "grad_norm": 1264376.875, + "learning_rate": 9.902662473954516e-06, + "loss": 89620.7063, + "step": 78310 + }, + { + "epoch": 0.15821135517964421, + "grad_norm": 1680761.625, + "learning_rate": 9.902593920523706e-06, + "loss": 201493.95, + "step": 78320 + }, + { + "epoch": 0.15823155581232803, + "grad_norm": 177090.328125, + "learning_rate": 9.902525343198249e-06, + "loss": 153111.1125, + "step": 78330 + }, + { + "epoch": 0.15825175644501185, + "grad_norm": 3967704.75, + "learning_rate": 9.902456741978475e-06, + "loss": 156096.325, + "step": 78340 + }, + { + "epoch": 0.15827195707769567, + "grad_norm": 0.0, + "learning_rate": 9.902388116864723e-06, + "loss": 171132.8125, + "step": 78350 + }, + { + "epoch": 0.1582921577103795, + "grad_norm": 49557.14453125, + "learning_rate": 9.902319467857326e-06, + "loss": 206870.0, + "step": 78360 + }, + { + "epoch": 0.15831235834306331, + "grad_norm": 1438854.375, + "learning_rate": 9.902250794956618e-06, + "loss": 76761.4, + "step": 78370 + }, + { + "epoch": 0.15833255897574713, + "grad_norm": 281014.625, + "learning_rate": 9.902182098162933e-06, + "loss": 108821.45, + "step": 78380 + }, + { + "epoch": 0.15835275960843093, + "grad_norm": 560693.4375, + "learning_rate": 9.90211337747661e-06, + "loss": 193918.9375, + "step": 78390 + }, + { + "epoch": 0.15837296024111475, + "grad_norm": 146684.3125, + "learning_rate": 9.90204463289798e-06, + "loss": 52343.9875, + "step": 78400 + }, + { + "epoch": 0.15839316087379857, + "grad_norm": 802906.625, + "learning_rate": 9.901975864427378e-06, + "loss": 54397.475, + "step": 78410 + }, + { + "epoch": 0.1584133615064824, + "grad_norm": 116223.265625, + "learning_rate": 9.90190707206514e-06, + "loss": 97032.7312, + "step": 78420 + }, + { + "epoch": 0.1584335621391662, + "grad_norm": 460775.28125, + "learning_rate": 9.901838255811602e-06, + "loss": 108845.4375, + "step": 78430 + }, + { + "epoch": 0.15845376277185003, + "grad_norm": 961994.0, + "learning_rate": 9.9017694156671e-06, + "loss": 80592.3, + "step": 78440 + }, + { + "epoch": 0.15847396340453382, + "grad_norm": 598792.3125, + "learning_rate": 9.901700551631966e-06, + "loss": 138805.525, + "step": 78450 + }, + { + "epoch": 0.15849416403721764, + "grad_norm": 25935.87890625, + "learning_rate": 9.901631663706539e-06, + "loss": 54407.9125, + "step": 78460 + }, + { + "epoch": 0.15851436466990146, + "grad_norm": 0.0, + "learning_rate": 9.901562751891155e-06, + "loss": 106320.2125, + "step": 78470 + }, + { + "epoch": 0.15853456530258528, + "grad_norm": 85746.0546875, + "learning_rate": 9.901493816186148e-06, + "loss": 82996.8687, + "step": 78480 + }, + { + "epoch": 0.1585547659352691, + "grad_norm": 448079.03125, + "learning_rate": 9.901424856591855e-06, + "loss": 57527.475, + "step": 78490 + }, + { + "epoch": 0.15857496656795292, + "grad_norm": 150311.203125, + "learning_rate": 9.901355873108611e-06, + "loss": 55201.1125, + "step": 78500 + }, + { + "epoch": 0.1585951672006367, + "grad_norm": 224670.25, + "learning_rate": 9.901286865736752e-06, + "loss": 87271.2312, + "step": 78510 + }, + { + "epoch": 0.15861536783332053, + "grad_norm": 159393.734375, + "learning_rate": 9.901217834476616e-06, + "loss": 85730.8375, + "step": 78520 + }, + { + "epoch": 0.15863556846600435, + "grad_norm": 261485.546875, + "learning_rate": 9.90114877932854e-06, + "loss": 136493.25, + "step": 78530 + }, + { + "epoch": 0.15865576909868817, + "grad_norm": 238226.328125, + "learning_rate": 9.901079700292858e-06, + "loss": 119348.475, + "step": 78540 + }, + { + "epoch": 0.158675969731372, + "grad_norm": 1597156.625, + "learning_rate": 9.901010597369908e-06, + "loss": 112438.15, + "step": 78550 + }, + { + "epoch": 0.1586961703640558, + "grad_norm": 177503.5, + "learning_rate": 9.900941470560025e-06, + "loss": 49261.4281, + "step": 78560 + }, + { + "epoch": 0.15871637099673963, + "grad_norm": 23415.939453125, + "learning_rate": 9.900872319863551e-06, + "loss": 30937.0031, + "step": 78570 + }, + { + "epoch": 0.15873657162942342, + "grad_norm": 253768.671875, + "learning_rate": 9.90080314528082e-06, + "loss": 68435.2125, + "step": 78580 + }, + { + "epoch": 0.15875677226210724, + "grad_norm": 255983.875, + "learning_rate": 9.900733946812167e-06, + "loss": 107126.6875, + "step": 78590 + }, + { + "epoch": 0.15877697289479106, + "grad_norm": 19934.837890625, + "learning_rate": 9.900664724457932e-06, + "loss": 30076.9313, + "step": 78600 + }, + { + "epoch": 0.15879717352747488, + "grad_norm": 44970.02734375, + "learning_rate": 9.900595478218449e-06, + "loss": 68150.4062, + "step": 78610 + }, + { + "epoch": 0.1588173741601587, + "grad_norm": 197417.125, + "learning_rate": 9.900526208094061e-06, + "loss": 103012.4438, + "step": 78620 + }, + { + "epoch": 0.15883757479284252, + "grad_norm": 1185002.25, + "learning_rate": 9.900456914085101e-06, + "loss": 129238.475, + "step": 78630 + }, + { + "epoch": 0.15885777542552632, + "grad_norm": 822395.3125, + "learning_rate": 9.90038759619191e-06, + "loss": 55517.55, + "step": 78640 + }, + { + "epoch": 0.15887797605821014, + "grad_norm": 267546.40625, + "learning_rate": 9.900318254414823e-06, + "loss": 48536.325, + "step": 78650 + }, + { + "epoch": 0.15889817669089396, + "grad_norm": 60618.01953125, + "learning_rate": 9.900248888754179e-06, + "loss": 226869.175, + "step": 78660 + }, + { + "epoch": 0.15891837732357778, + "grad_norm": 271747.5625, + "learning_rate": 9.900179499210316e-06, + "loss": 154429.1875, + "step": 78670 + }, + { + "epoch": 0.1589385779562616, + "grad_norm": 2193204.25, + "learning_rate": 9.900110085783573e-06, + "loss": 49146.0, + "step": 78680 + }, + { + "epoch": 0.15895877858894542, + "grad_norm": 200287.328125, + "learning_rate": 9.900040648474287e-06, + "loss": 179065.075, + "step": 78690 + }, + { + "epoch": 0.15897897922162924, + "grad_norm": 6181344.0, + "learning_rate": 9.899971187282799e-06, + "loss": 251699.35, + "step": 78700 + }, + { + "epoch": 0.15899917985431303, + "grad_norm": 162077.40625, + "learning_rate": 9.899901702209445e-06, + "loss": 71576.925, + "step": 78710 + }, + { + "epoch": 0.15901938048699685, + "grad_norm": 1446377.0, + "learning_rate": 9.899832193254564e-06, + "loss": 136752.175, + "step": 78720 + }, + { + "epoch": 0.15903958111968067, + "grad_norm": 1210015.0, + "learning_rate": 9.899762660418495e-06, + "loss": 93885.7625, + "step": 78730 + }, + { + "epoch": 0.1590597817523645, + "grad_norm": 358656.6875, + "learning_rate": 9.899693103701577e-06, + "loss": 117800.3375, + "step": 78740 + }, + { + "epoch": 0.1590799823850483, + "grad_norm": 573873.1875, + "learning_rate": 9.899623523104149e-06, + "loss": 80035.6187, + "step": 78750 + }, + { + "epoch": 0.15910018301773213, + "grad_norm": 53630.234375, + "learning_rate": 9.89955391862655e-06, + "loss": 109293.0125, + "step": 78760 + }, + { + "epoch": 0.15912038365041592, + "grad_norm": 369292.03125, + "learning_rate": 9.89948429026912e-06, + "loss": 95030.7563, + "step": 78770 + }, + { + "epoch": 0.15914058428309974, + "grad_norm": 737118.25, + "learning_rate": 9.8994146380322e-06, + "loss": 85010.7063, + "step": 78780 + }, + { + "epoch": 0.15916078491578356, + "grad_norm": 318441.90625, + "learning_rate": 9.899344961916123e-06, + "loss": 156320.2625, + "step": 78790 + }, + { + "epoch": 0.15918098554846738, + "grad_norm": 275150.09375, + "learning_rate": 9.899275261921236e-06, + "loss": 40344.2281, + "step": 78800 + }, + { + "epoch": 0.1592011861811512, + "grad_norm": 36022.625, + "learning_rate": 9.899205538047873e-06, + "loss": 65808.375, + "step": 78810 + }, + { + "epoch": 0.15922138681383502, + "grad_norm": 18012.708984375, + "learning_rate": 9.899135790296379e-06, + "loss": 79750.9125, + "step": 78820 + }, + { + "epoch": 0.1592415874465188, + "grad_norm": 34040.42578125, + "learning_rate": 9.89906601866709e-06, + "loss": 41711.0219, + "step": 78830 + }, + { + "epoch": 0.15926178807920263, + "grad_norm": 701889.0625, + "learning_rate": 9.898996223160348e-06, + "loss": 90909.275, + "step": 78840 + }, + { + "epoch": 0.15928198871188645, + "grad_norm": 746252.625, + "learning_rate": 9.898926403776492e-06, + "loss": 230890.725, + "step": 78850 + }, + { + "epoch": 0.15930218934457027, + "grad_norm": 1459656.875, + "learning_rate": 9.898856560515864e-06, + "loss": 129497.4, + "step": 78860 + }, + { + "epoch": 0.1593223899772541, + "grad_norm": 278439.5, + "learning_rate": 9.898786693378801e-06, + "loss": 49190.625, + "step": 78870 + }, + { + "epoch": 0.1593425906099379, + "grad_norm": 968128.1875, + "learning_rate": 9.898716802365648e-06, + "loss": 126903.125, + "step": 78880 + }, + { + "epoch": 0.15936279124262173, + "grad_norm": 800317.25, + "learning_rate": 9.898646887476742e-06, + "loss": 161684.95, + "step": 78890 + }, + { + "epoch": 0.15938299187530552, + "grad_norm": 1821520.25, + "learning_rate": 9.898576948712427e-06, + "loss": 67584.6125, + "step": 78900 + }, + { + "epoch": 0.15940319250798934, + "grad_norm": 562190.0625, + "learning_rate": 9.89850698607304e-06, + "loss": 298644.8, + "step": 78910 + }, + { + "epoch": 0.15942339314067316, + "grad_norm": 704976.6875, + "learning_rate": 9.898436999558924e-06, + "loss": 126553.5875, + "step": 78920 + }, + { + "epoch": 0.15944359377335698, + "grad_norm": 422536.59375, + "learning_rate": 9.898366989170423e-06, + "loss": 111452.95, + "step": 78930 + }, + { + "epoch": 0.1594637944060408, + "grad_norm": 1253903.5, + "learning_rate": 9.898296954907874e-06, + "loss": 140254.85, + "step": 78940 + }, + { + "epoch": 0.15948399503872462, + "grad_norm": 295802.1875, + "learning_rate": 9.898226896771619e-06, + "loss": 132222.825, + "step": 78950 + }, + { + "epoch": 0.15950419567140842, + "grad_norm": 3118487.0, + "learning_rate": 9.898156814762e-06, + "loss": 86575.2625, + "step": 78960 + }, + { + "epoch": 0.15952439630409224, + "grad_norm": 324184.375, + "learning_rate": 9.898086708879359e-06, + "loss": 107218.325, + "step": 78970 + }, + { + "epoch": 0.15954459693677606, + "grad_norm": 99813.7890625, + "learning_rate": 9.898016579124039e-06, + "loss": 46621.4844, + "step": 78980 + }, + { + "epoch": 0.15956479756945988, + "grad_norm": 61445.84765625, + "learning_rate": 9.897946425496379e-06, + "loss": 99341.4937, + "step": 78990 + }, + { + "epoch": 0.1595849982021437, + "grad_norm": 73642.7421875, + "learning_rate": 9.89787624799672e-06, + "loss": 90058.6375, + "step": 79000 + }, + { + "epoch": 0.15960519883482752, + "grad_norm": 6206.48779296875, + "learning_rate": 9.897806046625408e-06, + "loss": 69125.8, + "step": 79010 + }, + { + "epoch": 0.1596253994675113, + "grad_norm": 637418.125, + "learning_rate": 9.897735821382786e-06, + "loss": 156423.3, + "step": 79020 + }, + { + "epoch": 0.15964560010019513, + "grad_norm": 49064.4453125, + "learning_rate": 9.89766557226919e-06, + "loss": 52316.45, + "step": 79030 + }, + { + "epoch": 0.15966580073287895, + "grad_norm": 61658.29296875, + "learning_rate": 9.897595299284968e-06, + "loss": 85909.6875, + "step": 79040 + }, + { + "epoch": 0.15968600136556277, + "grad_norm": 582662.9375, + "learning_rate": 9.897525002430459e-06, + "loss": 221393.55, + "step": 79050 + }, + { + "epoch": 0.1597062019982466, + "grad_norm": 228696.25, + "learning_rate": 9.89745468170601e-06, + "loss": 80532.2812, + "step": 79060 + }, + { + "epoch": 0.1597264026309304, + "grad_norm": 43459.79296875, + "learning_rate": 9.897384337111956e-06, + "loss": 114951.2625, + "step": 79070 + }, + { + "epoch": 0.15974660326361423, + "grad_norm": 529533.625, + "learning_rate": 9.89731396864865e-06, + "loss": 85789.6875, + "step": 79080 + }, + { + "epoch": 0.15976680389629802, + "grad_norm": 416032.4375, + "learning_rate": 9.897243576316426e-06, + "loss": 82967.9312, + "step": 79090 + }, + { + "epoch": 0.15978700452898184, + "grad_norm": 15628.0927734375, + "learning_rate": 9.897173160115633e-06, + "loss": 117010.55, + "step": 79100 + }, + { + "epoch": 0.15980720516166566, + "grad_norm": 111395.2890625, + "learning_rate": 9.89710272004661e-06, + "loss": 109372.9, + "step": 79110 + }, + { + "epoch": 0.15982740579434948, + "grad_norm": 69714.359375, + "learning_rate": 9.897032256109705e-06, + "loss": 90917.4688, + "step": 79120 + }, + { + "epoch": 0.1598476064270333, + "grad_norm": 971941.0625, + "learning_rate": 9.896961768305255e-06, + "loss": 148297.4875, + "step": 79130 + }, + { + "epoch": 0.15986780705971712, + "grad_norm": 155449.5625, + "learning_rate": 9.89689125663361e-06, + "loss": 39572.5406, + "step": 79140 + }, + { + "epoch": 0.1598880076924009, + "grad_norm": 269227.59375, + "learning_rate": 9.89682072109511e-06, + "loss": 95257.025, + "step": 79150 + }, + { + "epoch": 0.15990820832508473, + "grad_norm": 3444103.75, + "learning_rate": 9.8967501616901e-06, + "loss": 215750.75, + "step": 79160 + }, + { + "epoch": 0.15992840895776855, + "grad_norm": 106206.0546875, + "learning_rate": 9.896679578418924e-06, + "loss": 202804.025, + "step": 79170 + }, + { + "epoch": 0.15994860959045237, + "grad_norm": 171312.359375, + "learning_rate": 9.896608971281926e-06, + "loss": 80364.2312, + "step": 79180 + }, + { + "epoch": 0.1599688102231362, + "grad_norm": 126323.1640625, + "learning_rate": 9.896538340279449e-06, + "loss": 56734.075, + "step": 79190 + }, + { + "epoch": 0.15998901085582, + "grad_norm": 264853.8125, + "learning_rate": 9.896467685411838e-06, + "loss": 76816.7875, + "step": 79200 + }, + { + "epoch": 0.16000921148850383, + "grad_norm": 973489.6875, + "learning_rate": 9.896397006679437e-06, + "loss": 87732.8125, + "step": 79210 + }, + { + "epoch": 0.16002941212118763, + "grad_norm": 1016497.5625, + "learning_rate": 9.89632630408259e-06, + "loss": 130919.625, + "step": 79220 + }, + { + "epoch": 0.16004961275387145, + "grad_norm": 357971.34375, + "learning_rate": 9.896255577621646e-06, + "loss": 114606.25, + "step": 79230 + }, + { + "epoch": 0.16006981338655527, + "grad_norm": 24342.966796875, + "learning_rate": 9.896184827296942e-06, + "loss": 41545.7656, + "step": 79240 + }, + { + "epoch": 0.16009001401923909, + "grad_norm": 657766.375, + "learning_rate": 9.89611405310883e-06, + "loss": 173221.8, + "step": 79250 + }, + { + "epoch": 0.1601102146519229, + "grad_norm": 570797.25, + "learning_rate": 9.89604325505765e-06, + "loss": 128350.3875, + "step": 79260 + }, + { + "epoch": 0.16013041528460673, + "grad_norm": 332709.5, + "learning_rate": 9.89597243314375e-06, + "loss": 88059.5688, + "step": 79270 + }, + { + "epoch": 0.16015061591729052, + "grad_norm": 2902673.25, + "learning_rate": 9.895901587367473e-06, + "loss": 92676.15, + "step": 79280 + }, + { + "epoch": 0.16017081654997434, + "grad_norm": 203652.0, + "learning_rate": 9.895830717729166e-06, + "loss": 151888.9375, + "step": 79290 + }, + { + "epoch": 0.16019101718265816, + "grad_norm": 208304.484375, + "learning_rate": 9.895759824229176e-06, + "loss": 81054.0312, + "step": 79300 + }, + { + "epoch": 0.16021121781534198, + "grad_norm": 468024.28125, + "learning_rate": 9.895688906867844e-06, + "loss": 106869.925, + "step": 79310 + }, + { + "epoch": 0.1602314184480258, + "grad_norm": 2056650.875, + "learning_rate": 9.89561796564552e-06, + "loss": 199137.3375, + "step": 79320 + }, + { + "epoch": 0.16025161908070962, + "grad_norm": 167167.46875, + "learning_rate": 9.895547000562546e-06, + "loss": 113536.8, + "step": 79330 + }, + { + "epoch": 0.1602718197133934, + "grad_norm": 45472.01953125, + "learning_rate": 9.895476011619269e-06, + "loss": 142270.0625, + "step": 79340 + }, + { + "epoch": 0.16029202034607723, + "grad_norm": 113326.125, + "learning_rate": 9.895404998816038e-06, + "loss": 194321.5, + "step": 79350 + }, + { + "epoch": 0.16031222097876105, + "grad_norm": 139980.03125, + "learning_rate": 9.895333962153195e-06, + "loss": 70580.1687, + "step": 79360 + }, + { + "epoch": 0.16033242161144487, + "grad_norm": 107360.28125, + "learning_rate": 9.895262901631088e-06, + "loss": 98611.5, + "step": 79370 + }, + { + "epoch": 0.1603526222441287, + "grad_norm": 466469.5, + "learning_rate": 9.895191817250064e-06, + "loss": 109790.725, + "step": 79380 + }, + { + "epoch": 0.1603728228768125, + "grad_norm": 891021.875, + "learning_rate": 9.89512070901047e-06, + "loss": 65576.7125, + "step": 79390 + }, + { + "epoch": 0.16039302350949633, + "grad_norm": 26025.181640625, + "learning_rate": 9.89504957691265e-06, + "loss": 70285.2063, + "step": 79400 + }, + { + "epoch": 0.16041322414218012, + "grad_norm": 117356.21875, + "learning_rate": 9.894978420956953e-06, + "loss": 52965.95, + "step": 79410 + }, + { + "epoch": 0.16043342477486394, + "grad_norm": 1354795.0, + "learning_rate": 9.894907241143722e-06, + "loss": 72548.1938, + "step": 79420 + }, + { + "epoch": 0.16045362540754776, + "grad_norm": 19997.392578125, + "learning_rate": 9.89483603747331e-06, + "loss": 114626.5875, + "step": 79430 + }, + { + "epoch": 0.16047382604023158, + "grad_norm": 21188.34765625, + "learning_rate": 9.89476480994606e-06, + "loss": 69228.1625, + "step": 79440 + }, + { + "epoch": 0.1604940266729154, + "grad_norm": 720766.4375, + "learning_rate": 9.894693558562319e-06, + "loss": 59866.0125, + "step": 79450 + }, + { + "epoch": 0.16051422730559922, + "grad_norm": 1767422.5, + "learning_rate": 9.894622283322436e-06, + "loss": 183540.1125, + "step": 79460 + }, + { + "epoch": 0.16053442793828301, + "grad_norm": 42276.87109375, + "learning_rate": 9.894550984226759e-06, + "loss": 150062.6625, + "step": 79470 + }, + { + "epoch": 0.16055462857096683, + "grad_norm": 282964.34375, + "learning_rate": 9.894479661275631e-06, + "loss": 128830.05, + "step": 79480 + }, + { + "epoch": 0.16057482920365065, + "grad_norm": 210943.625, + "learning_rate": 9.894408314469404e-06, + "loss": 82560.4438, + "step": 79490 + }, + { + "epoch": 0.16059502983633447, + "grad_norm": 726122.875, + "learning_rate": 9.894336943808426e-06, + "loss": 83747.3938, + "step": 79500 + }, + { + "epoch": 0.1606152304690183, + "grad_norm": 2970392.0, + "learning_rate": 9.894265549293043e-06, + "loss": 107214.175, + "step": 79510 + }, + { + "epoch": 0.16063543110170211, + "grad_norm": 484609.0, + "learning_rate": 9.894194130923602e-06, + "loss": 104282.2812, + "step": 79520 + }, + { + "epoch": 0.16065563173438593, + "grad_norm": 1668485.75, + "learning_rate": 9.894122688700452e-06, + "loss": 244901.85, + "step": 79530 + }, + { + "epoch": 0.16067583236706973, + "grad_norm": 201983.3125, + "learning_rate": 9.894051222623943e-06, + "loss": 30257.425, + "step": 79540 + }, + { + "epoch": 0.16069603299975355, + "grad_norm": 789343.625, + "learning_rate": 9.893979732694422e-06, + "loss": 87499.2125, + "step": 79550 + }, + { + "epoch": 0.16071623363243737, + "grad_norm": 145542.59375, + "learning_rate": 9.893908218912237e-06, + "loss": 192650.7625, + "step": 79560 + }, + { + "epoch": 0.1607364342651212, + "grad_norm": 577156.5625, + "learning_rate": 9.893836681277736e-06, + "loss": 50485.275, + "step": 79570 + }, + { + "epoch": 0.160756634897805, + "grad_norm": 694936.6875, + "learning_rate": 9.89376511979127e-06, + "loss": 38597.0938, + "step": 79580 + }, + { + "epoch": 0.16077683553048883, + "grad_norm": 56383.515625, + "learning_rate": 9.893693534453186e-06, + "loss": 120268.0125, + "step": 79590 + }, + { + "epoch": 0.16079703616317262, + "grad_norm": 420536.21875, + "learning_rate": 9.893621925263832e-06, + "loss": 45458.3375, + "step": 79600 + }, + { + "epoch": 0.16081723679585644, + "grad_norm": 24330.990234375, + "learning_rate": 9.89355029222356e-06, + "loss": 107595.825, + "step": 79610 + }, + { + "epoch": 0.16083743742854026, + "grad_norm": 220990.6875, + "learning_rate": 9.893478635332716e-06, + "loss": 119532.45, + "step": 79620 + }, + { + "epoch": 0.16085763806122408, + "grad_norm": 879985.4375, + "learning_rate": 9.893406954591651e-06, + "loss": 53345.5375, + "step": 79630 + }, + { + "epoch": 0.1608778386939079, + "grad_norm": 314541.78125, + "learning_rate": 9.893335250000715e-06, + "loss": 54257.8187, + "step": 79640 + }, + { + "epoch": 0.16089803932659172, + "grad_norm": 36568.36328125, + "learning_rate": 9.893263521560255e-06, + "loss": 120333.5875, + "step": 79650 + }, + { + "epoch": 0.1609182399592755, + "grad_norm": 482843.75, + "learning_rate": 9.893191769270624e-06, + "loss": 66573.6375, + "step": 79660 + }, + { + "epoch": 0.16093844059195933, + "grad_norm": 7145.76708984375, + "learning_rate": 9.893119993132167e-06, + "loss": 207021.5, + "step": 79670 + }, + { + "epoch": 0.16095864122464315, + "grad_norm": 428707.65625, + "learning_rate": 9.89304819314524e-06, + "loss": 174491.5, + "step": 79680 + }, + { + "epoch": 0.16097884185732697, + "grad_norm": 81171.6875, + "learning_rate": 9.892976369310188e-06, + "loss": 81379.725, + "step": 79690 + }, + { + "epoch": 0.1609990424900108, + "grad_norm": 430677.4375, + "learning_rate": 9.89290452162736e-06, + "loss": 108497.4125, + "step": 79700 + }, + { + "epoch": 0.1610192431226946, + "grad_norm": 801732.0, + "learning_rate": 9.892832650097113e-06, + "loss": 73389.8875, + "step": 79710 + }, + { + "epoch": 0.16103944375537843, + "grad_norm": 115182.09375, + "learning_rate": 9.89276075471979e-06, + "loss": 74002.4062, + "step": 79720 + }, + { + "epoch": 0.16105964438806222, + "grad_norm": 819217.0625, + "learning_rate": 9.892688835495747e-06, + "loss": 101803.6375, + "step": 79730 + }, + { + "epoch": 0.16107984502074604, + "grad_norm": 656293.5, + "learning_rate": 9.89261689242533e-06, + "loss": 101458.65, + "step": 79740 + }, + { + "epoch": 0.16110004565342986, + "grad_norm": 331421.15625, + "learning_rate": 9.892544925508894e-06, + "loss": 26254.5187, + "step": 79750 + }, + { + "epoch": 0.16112024628611368, + "grad_norm": 962033.4375, + "learning_rate": 9.892472934746784e-06, + "loss": 206135.0, + "step": 79760 + }, + { + "epoch": 0.1611404469187975, + "grad_norm": 122398.1640625, + "learning_rate": 9.892400920139357e-06, + "loss": 205358.3125, + "step": 79770 + }, + { + "epoch": 0.16116064755148132, + "grad_norm": 5446.3369140625, + "learning_rate": 9.892328881686961e-06, + "loss": 135519.6875, + "step": 79780 + }, + { + "epoch": 0.16118084818416512, + "grad_norm": 763403.5, + "learning_rate": 9.892256819389947e-06, + "loss": 109198.2, + "step": 79790 + }, + { + "epoch": 0.16120104881684894, + "grad_norm": 24556.94140625, + "learning_rate": 9.892184733248666e-06, + "loss": 153393.0, + "step": 79800 + }, + { + "epoch": 0.16122124944953276, + "grad_norm": 734436.6875, + "learning_rate": 9.89211262326347e-06, + "loss": 174219.325, + "step": 79810 + }, + { + "epoch": 0.16124145008221658, + "grad_norm": 126171.59375, + "learning_rate": 9.892040489434711e-06, + "loss": 162071.625, + "step": 79820 + }, + { + "epoch": 0.1612616507149004, + "grad_norm": 27864.185546875, + "learning_rate": 9.89196833176274e-06, + "loss": 145922.0375, + "step": 79830 + }, + { + "epoch": 0.16128185134758422, + "grad_norm": 465704.40625, + "learning_rate": 9.891896150247909e-06, + "loss": 41887.2937, + "step": 79840 + }, + { + "epoch": 0.16130205198026804, + "grad_norm": 2284400.75, + "learning_rate": 9.891823944890569e-06, + "loss": 66987.6, + "step": 79850 + }, + { + "epoch": 0.16132225261295183, + "grad_norm": 0.0, + "learning_rate": 9.891751715691071e-06, + "loss": 105082.425, + "step": 79860 + }, + { + "epoch": 0.16134245324563565, + "grad_norm": 145225.453125, + "learning_rate": 9.89167946264977e-06, + "loss": 38941.5062, + "step": 79870 + }, + { + "epoch": 0.16136265387831947, + "grad_norm": 85083.453125, + "learning_rate": 9.891607185767018e-06, + "loss": 136891.1875, + "step": 79880 + }, + { + "epoch": 0.1613828545110033, + "grad_norm": 3597054.5, + "learning_rate": 9.891534885043164e-06, + "loss": 238170.45, + "step": 79890 + }, + { + "epoch": 0.1614030551436871, + "grad_norm": 90329.1328125, + "learning_rate": 9.891462560478562e-06, + "loss": 64153.8938, + "step": 79900 + }, + { + "epoch": 0.16142325577637093, + "grad_norm": 160998.46875, + "learning_rate": 9.891390212073566e-06, + "loss": 76437.15, + "step": 79910 + }, + { + "epoch": 0.16144345640905472, + "grad_norm": 1091635.75, + "learning_rate": 9.891317839828527e-06, + "loss": 210451.725, + "step": 79920 + }, + { + "epoch": 0.16146365704173854, + "grad_norm": 63242.96484375, + "learning_rate": 9.891245443743797e-06, + "loss": 213127.925, + "step": 79930 + }, + { + "epoch": 0.16148385767442236, + "grad_norm": 435747.53125, + "learning_rate": 9.891173023819731e-06, + "loss": 61990.275, + "step": 79940 + }, + { + "epoch": 0.16150405830710618, + "grad_norm": 4585276.5, + "learning_rate": 9.891100580056681e-06, + "loss": 153689.65, + "step": 79950 + }, + { + "epoch": 0.16152425893979, + "grad_norm": 13365.2373046875, + "learning_rate": 9.891028112454998e-06, + "loss": 132346.875, + "step": 79960 + }, + { + "epoch": 0.16154445957247382, + "grad_norm": 32115.3125, + "learning_rate": 9.890955621015039e-06, + "loss": 198968.825, + "step": 79970 + }, + { + "epoch": 0.1615646602051576, + "grad_norm": 255417.9375, + "learning_rate": 9.890883105737156e-06, + "loss": 103621.7625, + "step": 79980 + }, + { + "epoch": 0.16158486083784143, + "grad_norm": 2122547.5, + "learning_rate": 9.890810566621702e-06, + "loss": 225275.15, + "step": 79990 + }, + { + "epoch": 0.16160506147052525, + "grad_norm": 147001.34375, + "learning_rate": 9.890738003669029e-06, + "loss": 78925.325, + "step": 80000 + }, + { + "epoch": 0.16162526210320907, + "grad_norm": 1355494.375, + "learning_rate": 9.890665416879492e-06, + "loss": 124409.0, + "step": 80010 + }, + { + "epoch": 0.1616454627358929, + "grad_norm": 32659.767578125, + "learning_rate": 9.890592806253447e-06, + "loss": 40068.5969, + "step": 80020 + }, + { + "epoch": 0.1616656633685767, + "grad_norm": 393934.40625, + "learning_rate": 9.890520171791244e-06, + "loss": 97367.35, + "step": 80030 + }, + { + "epoch": 0.16168586400126053, + "grad_norm": 181144.71875, + "learning_rate": 9.89044751349324e-06, + "loss": 76918.1812, + "step": 80040 + }, + { + "epoch": 0.16170606463394432, + "grad_norm": 592541.875, + "learning_rate": 9.890374831359787e-06, + "loss": 75549.5125, + "step": 80050 + }, + { + "epoch": 0.16172626526662814, + "grad_norm": 38036.4609375, + "learning_rate": 9.89030212539124e-06, + "loss": 125583.1, + "step": 80060 + }, + { + "epoch": 0.16174646589931196, + "grad_norm": 47496.29296875, + "learning_rate": 9.890229395587954e-06, + "loss": 88543.05, + "step": 80070 + }, + { + "epoch": 0.16176666653199578, + "grad_norm": 417425.75, + "learning_rate": 9.890156641950284e-06, + "loss": 66424.0875, + "step": 80080 + }, + { + "epoch": 0.1617868671646796, + "grad_norm": 16885.255859375, + "learning_rate": 9.890083864478584e-06, + "loss": 210944.75, + "step": 80090 + }, + { + "epoch": 0.16180706779736342, + "grad_norm": 1529672.125, + "learning_rate": 9.890011063173207e-06, + "loss": 199148.4625, + "step": 80100 + }, + { + "epoch": 0.16182726843004722, + "grad_norm": 5820.14794921875, + "learning_rate": 9.889938238034509e-06, + "loss": 99342.1875, + "step": 80110 + }, + { + "epoch": 0.16184746906273104, + "grad_norm": 426787.09375, + "learning_rate": 9.889865389062845e-06, + "loss": 98164.7563, + "step": 80120 + }, + { + "epoch": 0.16186766969541486, + "grad_norm": 192530.734375, + "learning_rate": 9.889792516258571e-06, + "loss": 110277.1625, + "step": 80130 + }, + { + "epoch": 0.16188787032809868, + "grad_norm": 567571.9375, + "learning_rate": 9.88971961962204e-06, + "loss": 174486.4, + "step": 80140 + }, + { + "epoch": 0.1619080709607825, + "grad_norm": 555032.75, + "learning_rate": 9.88964669915361e-06, + "loss": 44838.6312, + "step": 80150 + }, + { + "epoch": 0.16192827159346632, + "grad_norm": 3243527.0, + "learning_rate": 9.889573754853633e-06, + "loss": 129749.4375, + "step": 80160 + }, + { + "epoch": 0.16194847222615014, + "grad_norm": 122214.3203125, + "learning_rate": 9.889500786722471e-06, + "loss": 69140.9812, + "step": 80170 + }, + { + "epoch": 0.16196867285883393, + "grad_norm": 126039.5859375, + "learning_rate": 9.889427794760472e-06, + "loss": 45261.1844, + "step": 80180 + }, + { + "epoch": 0.16198887349151775, + "grad_norm": 180940.546875, + "learning_rate": 9.889354778967995e-06, + "loss": 52885.2312, + "step": 80190 + }, + { + "epoch": 0.16200907412420157, + "grad_norm": 219755.03125, + "learning_rate": 9.889281739345395e-06, + "loss": 54575.9875, + "step": 80200 + }, + { + "epoch": 0.1620292747568854, + "grad_norm": 165480.328125, + "learning_rate": 9.88920867589303e-06, + "loss": 17271.3187, + "step": 80210 + }, + { + "epoch": 0.1620494753895692, + "grad_norm": 212396.328125, + "learning_rate": 9.889135588611254e-06, + "loss": 55867.575, + "step": 80220 + }, + { + "epoch": 0.16206967602225303, + "grad_norm": 455186.1875, + "learning_rate": 9.889062477500425e-06, + "loss": 220191.325, + "step": 80230 + }, + { + "epoch": 0.16208987665493682, + "grad_norm": 28386.013671875, + "learning_rate": 9.8889893425609e-06, + "loss": 72153.475, + "step": 80240 + }, + { + "epoch": 0.16211007728762064, + "grad_norm": 1625925.0, + "learning_rate": 9.88891618379303e-06, + "loss": 104672.225, + "step": 80250 + }, + { + "epoch": 0.16213027792030446, + "grad_norm": 1125189.375, + "learning_rate": 9.88884300119718e-06, + "loss": 126570.9375, + "step": 80260 + }, + { + "epoch": 0.16215047855298828, + "grad_norm": 48517.484375, + "learning_rate": 9.888769794773699e-06, + "loss": 72518.5938, + "step": 80270 + }, + { + "epoch": 0.1621706791856721, + "grad_norm": 25718.94921875, + "learning_rate": 9.888696564522948e-06, + "loss": 156956.85, + "step": 80280 + }, + { + "epoch": 0.16219087981835592, + "grad_norm": 2659077.25, + "learning_rate": 9.888623310445282e-06, + "loss": 295526.425, + "step": 80290 + }, + { + "epoch": 0.1622110804510397, + "grad_norm": 8500.453125, + "learning_rate": 9.88855003254106e-06, + "loss": 85302.05, + "step": 80300 + }, + { + "epoch": 0.16223128108372353, + "grad_norm": 3225900.0, + "learning_rate": 9.88847673081064e-06, + "loss": 214956.65, + "step": 80310 + }, + { + "epoch": 0.16225148171640735, + "grad_norm": 112897.1796875, + "learning_rate": 9.888403405254374e-06, + "loss": 119296.0125, + "step": 80320 + }, + { + "epoch": 0.16227168234909117, + "grad_norm": 251840.8125, + "learning_rate": 9.888330055872623e-06, + "loss": 148006.9, + "step": 80330 + }, + { + "epoch": 0.162291882981775, + "grad_norm": 195925.015625, + "learning_rate": 9.888256682665744e-06, + "loss": 117945.325, + "step": 80340 + }, + { + "epoch": 0.1623120836144588, + "grad_norm": 922120.125, + "learning_rate": 9.888183285634097e-06, + "loss": 140508.075, + "step": 80350 + }, + { + "epoch": 0.16233228424714263, + "grad_norm": 212970.390625, + "learning_rate": 9.888109864778036e-06, + "loss": 68823.0938, + "step": 80360 + }, + { + "epoch": 0.16235248487982643, + "grad_norm": 276958.875, + "learning_rate": 9.88803642009792e-06, + "loss": 93389.4438, + "step": 80370 + }, + { + "epoch": 0.16237268551251025, + "grad_norm": 319222.5, + "learning_rate": 9.887962951594108e-06, + "loss": 76510.9688, + "step": 80380 + }, + { + "epoch": 0.16239288614519407, + "grad_norm": 118885.2109375, + "learning_rate": 9.887889459266957e-06, + "loss": 111366.25, + "step": 80390 + }, + { + "epoch": 0.16241308677787789, + "grad_norm": 257036.296875, + "learning_rate": 9.887815943116827e-06, + "loss": 116349.75, + "step": 80400 + }, + { + "epoch": 0.1624332874105617, + "grad_norm": 819165.5625, + "learning_rate": 9.887742403144074e-06, + "loss": 54709.5437, + "step": 80410 + }, + { + "epoch": 0.16245348804324553, + "grad_norm": 2165985.5, + "learning_rate": 9.887668839349057e-06, + "loss": 143016.0, + "step": 80420 + }, + { + "epoch": 0.16247368867592932, + "grad_norm": 982664.4375, + "learning_rate": 9.887595251732135e-06, + "loss": 112546.9, + "step": 80430 + }, + { + "epoch": 0.16249388930861314, + "grad_norm": 26665.103515625, + "learning_rate": 9.887521640293668e-06, + "loss": 57382.4125, + "step": 80440 + }, + { + "epoch": 0.16251408994129696, + "grad_norm": 1031567.0, + "learning_rate": 9.887448005034011e-06, + "loss": 87096.5063, + "step": 80450 + }, + { + "epoch": 0.16253429057398078, + "grad_norm": 2490.58447265625, + "learning_rate": 9.887374345953526e-06, + "loss": 66983.275, + "step": 80460 + }, + { + "epoch": 0.1625544912066646, + "grad_norm": 61329.6328125, + "learning_rate": 9.88730066305257e-06, + "loss": 56357.875, + "step": 80470 + }, + { + "epoch": 0.16257469183934842, + "grad_norm": 54244.15234375, + "learning_rate": 9.887226956331506e-06, + "loss": 143175.275, + "step": 80480 + }, + { + "epoch": 0.16259489247203224, + "grad_norm": 454284.375, + "learning_rate": 9.887153225790688e-06, + "loss": 146974.975, + "step": 80490 + }, + { + "epoch": 0.16261509310471603, + "grad_norm": 83178.796875, + "learning_rate": 9.887079471430481e-06, + "loss": 32932.3438, + "step": 80500 + }, + { + "epoch": 0.16263529373739985, + "grad_norm": 73216.109375, + "learning_rate": 9.88700569325124e-06, + "loss": 138165.9875, + "step": 80510 + }, + { + "epoch": 0.16265549437008367, + "grad_norm": 46176.31640625, + "learning_rate": 9.886931891253324e-06, + "loss": 73864.525, + "step": 80520 + }, + { + "epoch": 0.1626756950027675, + "grad_norm": 281950.09375, + "learning_rate": 9.886858065437097e-06, + "loss": 288319.95, + "step": 80530 + }, + { + "epoch": 0.1626958956354513, + "grad_norm": 947946.75, + "learning_rate": 9.886784215802915e-06, + "loss": 102128.425, + "step": 80540 + }, + { + "epoch": 0.16271609626813513, + "grad_norm": 10534.931640625, + "learning_rate": 9.88671034235114e-06, + "loss": 164169.4, + "step": 80550 + }, + { + "epoch": 0.16273629690081892, + "grad_norm": 153964.78125, + "learning_rate": 9.886636445082132e-06, + "loss": 115074.95, + "step": 80560 + }, + { + "epoch": 0.16275649753350274, + "grad_norm": 12217.193359375, + "learning_rate": 9.88656252399625e-06, + "loss": 83696.25, + "step": 80570 + }, + { + "epoch": 0.16277669816618656, + "grad_norm": 13204.4150390625, + "learning_rate": 9.886488579093856e-06, + "loss": 84224.6938, + "step": 80580 + }, + { + "epoch": 0.16279689879887038, + "grad_norm": 513638.53125, + "learning_rate": 9.886414610375309e-06, + "loss": 65945.3313, + "step": 80590 + }, + { + "epoch": 0.1628170994315542, + "grad_norm": 551074.0625, + "learning_rate": 9.886340617840968e-06, + "loss": 57183.425, + "step": 80600 + }, + { + "epoch": 0.16283730006423802, + "grad_norm": 7402.5576171875, + "learning_rate": 9.886266601491197e-06, + "loss": 177989.475, + "step": 80610 + }, + { + "epoch": 0.16285750069692181, + "grad_norm": 0.0, + "learning_rate": 9.886192561326356e-06, + "loss": 99249.6625, + "step": 80620 + }, + { + "epoch": 0.16287770132960563, + "grad_norm": 2377878.0, + "learning_rate": 9.886118497346804e-06, + "loss": 152076.3375, + "step": 80630 + }, + { + "epoch": 0.16289790196228945, + "grad_norm": 321205.90625, + "learning_rate": 9.886044409552902e-06, + "loss": 42391.0469, + "step": 80640 + }, + { + "epoch": 0.16291810259497327, + "grad_norm": 0.0, + "learning_rate": 9.885970297945013e-06, + "loss": 214189.525, + "step": 80650 + }, + { + "epoch": 0.1629383032276571, + "grad_norm": 823743.0, + "learning_rate": 9.885896162523498e-06, + "loss": 59543.325, + "step": 80660 + }, + { + "epoch": 0.16295850386034091, + "grad_norm": 343372.0, + "learning_rate": 9.885822003288717e-06, + "loss": 200304.25, + "step": 80670 + }, + { + "epoch": 0.16297870449302473, + "grad_norm": 1785840.625, + "learning_rate": 9.885747820241032e-06, + "loss": 105593.9125, + "step": 80680 + }, + { + "epoch": 0.16299890512570853, + "grad_norm": 536593.375, + "learning_rate": 9.885673613380806e-06, + "loss": 86191.1, + "step": 80690 + }, + { + "epoch": 0.16301910575839235, + "grad_norm": 58872.30859375, + "learning_rate": 9.8855993827084e-06, + "loss": 144646.85, + "step": 80700 + }, + { + "epoch": 0.16303930639107617, + "grad_norm": 7197.34912109375, + "learning_rate": 9.885525128224173e-06, + "loss": 63196.5563, + "step": 80710 + }, + { + "epoch": 0.16305950702376, + "grad_norm": 35142.49609375, + "learning_rate": 9.885450849928489e-06, + "loss": 128577.475, + "step": 80720 + }, + { + "epoch": 0.1630797076564438, + "grad_norm": 404434.5625, + "learning_rate": 9.885376547821711e-06, + "loss": 104742.6313, + "step": 80730 + }, + { + "epoch": 0.16309990828912763, + "grad_norm": 586669.3125, + "learning_rate": 9.885302221904201e-06, + "loss": 90594.825, + "step": 80740 + }, + { + "epoch": 0.16312010892181142, + "grad_norm": 164409.3125, + "learning_rate": 9.88522787217632e-06, + "loss": 48748.8844, + "step": 80750 + }, + { + "epoch": 0.16314030955449524, + "grad_norm": 30822.740234375, + "learning_rate": 9.88515349863843e-06, + "loss": 206897.1375, + "step": 80760 + }, + { + "epoch": 0.16316051018717906, + "grad_norm": 386714.03125, + "learning_rate": 9.885079101290894e-06, + "loss": 126506.35, + "step": 80770 + }, + { + "epoch": 0.16318071081986288, + "grad_norm": 138874.515625, + "learning_rate": 9.885004680134075e-06, + "loss": 42730.5312, + "step": 80780 + }, + { + "epoch": 0.1632009114525467, + "grad_norm": 783212.25, + "learning_rate": 9.884930235168338e-06, + "loss": 141291.1125, + "step": 80790 + }, + { + "epoch": 0.16322111208523052, + "grad_norm": 605659.0625, + "learning_rate": 9.884855766394041e-06, + "loss": 155059.475, + "step": 80800 + }, + { + "epoch": 0.16324131271791434, + "grad_norm": 154921.640625, + "learning_rate": 9.88478127381155e-06, + "loss": 42696.4031, + "step": 80810 + }, + { + "epoch": 0.16326151335059813, + "grad_norm": 1317456.125, + "learning_rate": 9.884706757421229e-06, + "loss": 73300.9375, + "step": 80820 + }, + { + "epoch": 0.16328171398328195, + "grad_norm": 223978.96875, + "learning_rate": 9.884632217223438e-06, + "loss": 123376.4, + "step": 80830 + }, + { + "epoch": 0.16330191461596577, + "grad_norm": 560414.3125, + "learning_rate": 9.884557653218544e-06, + "loss": 106723.175, + "step": 80840 + }, + { + "epoch": 0.1633221152486496, + "grad_norm": 4061317.25, + "learning_rate": 9.884483065406905e-06, + "loss": 164316.7125, + "step": 80850 + }, + { + "epoch": 0.1633423158813334, + "grad_norm": 560835.25, + "learning_rate": 9.88440845378889e-06, + "loss": 168245.8875, + "step": 80860 + }, + { + "epoch": 0.16336251651401723, + "grad_norm": 857177.125, + "learning_rate": 9.884333818364861e-06, + "loss": 99207.05, + "step": 80870 + }, + { + "epoch": 0.16338271714670102, + "grad_norm": 491290.28125, + "learning_rate": 9.88425915913518e-06, + "loss": 113250.3875, + "step": 80880 + }, + { + "epoch": 0.16340291777938484, + "grad_norm": 770312.75, + "learning_rate": 9.884184476100215e-06, + "loss": 118613.475, + "step": 80890 + }, + { + "epoch": 0.16342311841206866, + "grad_norm": 39931.28515625, + "learning_rate": 9.884109769260326e-06, + "loss": 76261.8687, + "step": 80900 + }, + { + "epoch": 0.16344331904475248, + "grad_norm": 266148.5, + "learning_rate": 9.884035038615876e-06, + "loss": 88585.1625, + "step": 80910 + }, + { + "epoch": 0.1634635196774363, + "grad_norm": 48273.4140625, + "learning_rate": 9.883960284167234e-06, + "loss": 121367.575, + "step": 80920 + }, + { + "epoch": 0.16348372031012012, + "grad_norm": 888306.25, + "learning_rate": 9.88388550591476e-06, + "loss": 62900.4812, + "step": 80930 + }, + { + "epoch": 0.16350392094280392, + "grad_norm": 1979183.375, + "learning_rate": 9.883810703858823e-06, + "loss": 126462.6875, + "step": 80940 + }, + { + "epoch": 0.16352412157548774, + "grad_norm": 0.0, + "learning_rate": 9.883735877999785e-06, + "loss": 90333.2375, + "step": 80950 + }, + { + "epoch": 0.16354432220817156, + "grad_norm": 158511.0625, + "learning_rate": 9.883661028338009e-06, + "loss": 153250.25, + "step": 80960 + }, + { + "epoch": 0.16356452284085538, + "grad_norm": 131463.140625, + "learning_rate": 9.88358615487386e-06, + "loss": 97524.2812, + "step": 80970 + }, + { + "epoch": 0.1635847234735392, + "grad_norm": 4571.771484375, + "learning_rate": 9.883511257607708e-06, + "loss": 84904.725, + "step": 80980 + }, + { + "epoch": 0.16360492410622302, + "grad_norm": 0.0, + "learning_rate": 9.883436336539913e-06, + "loss": 151016.275, + "step": 80990 + }, + { + "epoch": 0.16362512473890684, + "grad_norm": 3694132.0, + "learning_rate": 9.883361391670841e-06, + "loss": 131702.525, + "step": 81000 + }, + { + "epoch": 0.16364532537159063, + "grad_norm": 384535.625, + "learning_rate": 9.883286423000857e-06, + "loss": 59964.6813, + "step": 81010 + }, + { + "epoch": 0.16366552600427445, + "grad_norm": 514360.03125, + "learning_rate": 9.883211430530329e-06, + "loss": 55179.6875, + "step": 81020 + }, + { + "epoch": 0.16368572663695827, + "grad_norm": 82557.3984375, + "learning_rate": 9.88313641425962e-06, + "loss": 190650.6375, + "step": 81030 + }, + { + "epoch": 0.1637059272696421, + "grad_norm": 117838.6953125, + "learning_rate": 9.883061374189095e-06, + "loss": 105964.1, + "step": 81040 + }, + { + "epoch": 0.1637261279023259, + "grad_norm": 0.0, + "learning_rate": 9.882986310319124e-06, + "loss": 137458.35, + "step": 81050 + }, + { + "epoch": 0.16374632853500973, + "grad_norm": 91982.65625, + "learning_rate": 9.882911222650069e-06, + "loss": 37065.6312, + "step": 81060 + }, + { + "epoch": 0.16376652916769352, + "grad_norm": 493723.1875, + "learning_rate": 9.882836111182295e-06, + "loss": 95567.3313, + "step": 81070 + }, + { + "epoch": 0.16378672980037734, + "grad_norm": 56616.0390625, + "learning_rate": 9.882760975916173e-06, + "loss": 66999.2375, + "step": 81080 + }, + { + "epoch": 0.16380693043306116, + "grad_norm": 929104.25, + "learning_rate": 9.882685816852064e-06, + "loss": 103968.4625, + "step": 81090 + }, + { + "epoch": 0.16382713106574498, + "grad_norm": 112197.7578125, + "learning_rate": 9.882610633990337e-06, + "loss": 93284.775, + "step": 81100 + }, + { + "epoch": 0.1638473316984288, + "grad_norm": 450213.65625, + "learning_rate": 9.882535427331357e-06, + "loss": 67389.9875, + "step": 81110 + }, + { + "epoch": 0.16386753233111262, + "grad_norm": 98025.1796875, + "learning_rate": 9.882460196875495e-06, + "loss": 137559.75, + "step": 81120 + }, + { + "epoch": 0.16388773296379644, + "grad_norm": 225062.546875, + "learning_rate": 9.88238494262311e-06, + "loss": 166662.275, + "step": 81130 + }, + { + "epoch": 0.16390793359648023, + "grad_norm": 2183028.25, + "learning_rate": 9.882309664574576e-06, + "loss": 116416.675, + "step": 81140 + }, + { + "epoch": 0.16392813422916405, + "grad_norm": 15168.845703125, + "learning_rate": 9.882234362730255e-06, + "loss": 29400.5563, + "step": 81150 + }, + { + "epoch": 0.16394833486184787, + "grad_norm": 9632850.0, + "learning_rate": 9.882159037090517e-06, + "loss": 83228.2688, + "step": 81160 + }, + { + "epoch": 0.1639685354945317, + "grad_norm": 627513.3125, + "learning_rate": 9.882083687655728e-06, + "loss": 144525.175, + "step": 81170 + }, + { + "epoch": 0.1639887361272155, + "grad_norm": 499824.78125, + "learning_rate": 9.882008314426253e-06, + "loss": 97020.8, + "step": 81180 + }, + { + "epoch": 0.16400893675989933, + "grad_norm": 481439.03125, + "learning_rate": 9.881932917402464e-06, + "loss": 122602.9625, + "step": 81190 + }, + { + "epoch": 0.16402913739258312, + "grad_norm": 726015.0, + "learning_rate": 9.881857496584726e-06, + "loss": 134065.125, + "step": 81200 + }, + { + "epoch": 0.16404933802526694, + "grad_norm": 292768.375, + "learning_rate": 9.881782051973405e-06, + "loss": 98878.5063, + "step": 81210 + }, + { + "epoch": 0.16406953865795076, + "grad_norm": 64098.1953125, + "learning_rate": 9.88170658356887e-06, + "loss": 126486.1, + "step": 81220 + }, + { + "epoch": 0.16408973929063458, + "grad_norm": 106820.34375, + "learning_rate": 9.881631091371492e-06, + "loss": 176933.45, + "step": 81230 + }, + { + "epoch": 0.1641099399233184, + "grad_norm": 2211505.0, + "learning_rate": 9.881555575381635e-06, + "loss": 70493.775, + "step": 81240 + }, + { + "epoch": 0.16413014055600222, + "grad_norm": 1343391.75, + "learning_rate": 9.881480035599667e-06, + "loss": 132359.7125, + "step": 81250 + }, + { + "epoch": 0.16415034118868602, + "grad_norm": 1354980.75, + "learning_rate": 9.88140447202596e-06, + "loss": 119103.1125, + "step": 81260 + }, + { + "epoch": 0.16417054182136984, + "grad_norm": 1892822.625, + "learning_rate": 9.881328884660876e-06, + "loss": 72768.4375, + "step": 81270 + }, + { + "epoch": 0.16419074245405366, + "grad_norm": 139695.53125, + "learning_rate": 9.88125327350479e-06, + "loss": 97185.4625, + "step": 81280 + }, + { + "epoch": 0.16421094308673748, + "grad_norm": 745937.3125, + "learning_rate": 9.881177638558066e-06, + "loss": 123714.35, + "step": 81290 + }, + { + "epoch": 0.1642311437194213, + "grad_norm": 138674.171875, + "learning_rate": 9.881101979821075e-06, + "loss": 131333.725, + "step": 81300 + }, + { + "epoch": 0.16425134435210512, + "grad_norm": 202718.84375, + "learning_rate": 9.881026297294185e-06, + "loss": 103984.5437, + "step": 81310 + }, + { + "epoch": 0.16427154498478894, + "grad_norm": 167895.421875, + "learning_rate": 9.880950590977764e-06, + "loss": 123195.825, + "step": 81320 + }, + { + "epoch": 0.16429174561747273, + "grad_norm": 251470.578125, + "learning_rate": 9.880874860872183e-06, + "loss": 51752.7688, + "step": 81330 + }, + { + "epoch": 0.16431194625015655, + "grad_norm": 1705160.5, + "learning_rate": 9.88079910697781e-06, + "loss": 93168.4625, + "step": 81340 + }, + { + "epoch": 0.16433214688284037, + "grad_norm": 79462.3046875, + "learning_rate": 9.880723329295012e-06, + "loss": 98288.0688, + "step": 81350 + }, + { + "epoch": 0.1643523475155242, + "grad_norm": 253572.328125, + "learning_rate": 9.880647527824161e-06, + "loss": 70976.6125, + "step": 81360 + }, + { + "epoch": 0.164372548148208, + "grad_norm": 2180874.0, + "learning_rate": 9.880571702565627e-06, + "loss": 126989.425, + "step": 81370 + }, + { + "epoch": 0.16439274878089183, + "grad_norm": 391007.28125, + "learning_rate": 9.880495853519777e-06, + "loss": 88020.725, + "step": 81380 + }, + { + "epoch": 0.16441294941357562, + "grad_norm": 851702.8125, + "learning_rate": 9.880419980686986e-06, + "loss": 66484.1625, + "step": 81390 + }, + { + "epoch": 0.16443315004625944, + "grad_norm": 147226.90625, + "learning_rate": 9.880344084067616e-06, + "loss": 187473.0, + "step": 81400 + }, + { + "epoch": 0.16445335067894326, + "grad_norm": 268036.84375, + "learning_rate": 9.880268163662043e-06, + "loss": 78045.6, + "step": 81410 + }, + { + "epoch": 0.16447355131162708, + "grad_norm": 338536.40625, + "learning_rate": 9.880192219470633e-06, + "loss": 253586.325, + "step": 81420 + }, + { + "epoch": 0.1644937519443109, + "grad_norm": 163263.484375, + "learning_rate": 9.88011625149376e-06, + "loss": 36559.9344, + "step": 81430 + }, + { + "epoch": 0.16451395257699472, + "grad_norm": 1226643.875, + "learning_rate": 9.88004025973179e-06, + "loss": 95127.1313, + "step": 81440 + }, + { + "epoch": 0.16453415320967854, + "grad_norm": 163310.1875, + "learning_rate": 9.879964244185098e-06, + "loss": 72066.7937, + "step": 81450 + }, + { + "epoch": 0.16455435384236233, + "grad_norm": 262678.09375, + "learning_rate": 9.87988820485405e-06, + "loss": 151335.6375, + "step": 81460 + }, + { + "epoch": 0.16457455447504615, + "grad_norm": 787785.0625, + "learning_rate": 9.87981214173902e-06, + "loss": 77401.7375, + "step": 81470 + }, + { + "epoch": 0.16459475510772997, + "grad_norm": 7135990.5, + "learning_rate": 9.879736054840377e-06, + "loss": 107245.8375, + "step": 81480 + }, + { + "epoch": 0.1646149557404138, + "grad_norm": 1042081.9375, + "learning_rate": 9.879659944158493e-06, + "loss": 185728.2625, + "step": 81490 + }, + { + "epoch": 0.1646351563730976, + "grad_norm": 1987953.75, + "learning_rate": 9.879583809693737e-06, + "loss": 157876.15, + "step": 81500 + }, + { + "epoch": 0.16465535700578143, + "grad_norm": 85118.3359375, + "learning_rate": 9.879507651446482e-06, + "loss": 99568.375, + "step": 81510 + }, + { + "epoch": 0.16467555763846523, + "grad_norm": 2080111.0, + "learning_rate": 9.8794314694171e-06, + "loss": 71432.675, + "step": 81520 + }, + { + "epoch": 0.16469575827114905, + "grad_norm": 67868.5078125, + "learning_rate": 9.879355263605958e-06, + "loss": 64818.0437, + "step": 81530 + }, + { + "epoch": 0.16471595890383287, + "grad_norm": 613090.0, + "learning_rate": 9.879279034013434e-06, + "loss": 52642.7875, + "step": 81540 + }, + { + "epoch": 0.16473615953651669, + "grad_norm": 0.0, + "learning_rate": 9.879202780639893e-06, + "loss": 63706.9812, + "step": 81550 + }, + { + "epoch": 0.1647563601692005, + "grad_norm": 62476.10546875, + "learning_rate": 9.879126503485709e-06, + "loss": 101466.5625, + "step": 81560 + }, + { + "epoch": 0.16477656080188433, + "grad_norm": 566541.6875, + "learning_rate": 9.879050202551256e-06, + "loss": 186916.65, + "step": 81570 + }, + { + "epoch": 0.16479676143456812, + "grad_norm": 44061.25, + "learning_rate": 9.878973877836902e-06, + "loss": 181139.65, + "step": 81580 + }, + { + "epoch": 0.16481696206725194, + "grad_norm": 13279.3359375, + "learning_rate": 9.878897529343023e-06, + "loss": 49310.7906, + "step": 81590 + }, + { + "epoch": 0.16483716269993576, + "grad_norm": 341725.25, + "learning_rate": 9.878821157069988e-06, + "loss": 63443.1, + "step": 81600 + }, + { + "epoch": 0.16485736333261958, + "grad_norm": 1973839.75, + "learning_rate": 9.87874476101817e-06, + "loss": 128264.375, + "step": 81610 + }, + { + "epoch": 0.1648775639653034, + "grad_norm": 514348.15625, + "learning_rate": 9.878668341187944e-06, + "loss": 107146.1, + "step": 81620 + }, + { + "epoch": 0.16489776459798722, + "grad_norm": 1853423.0, + "learning_rate": 9.878591897579678e-06, + "loss": 174315.0875, + "step": 81630 + }, + { + "epoch": 0.16491796523067104, + "grad_norm": 449294.6875, + "learning_rate": 9.87851543019375e-06, + "loss": 62731.075, + "step": 81640 + }, + { + "epoch": 0.16493816586335483, + "grad_norm": 1225516.875, + "learning_rate": 9.878438939030526e-06, + "loss": 68516.7875, + "step": 81650 + }, + { + "epoch": 0.16495836649603865, + "grad_norm": 543721.0625, + "learning_rate": 9.878362424090384e-06, + "loss": 48348.7625, + "step": 81660 + }, + { + "epoch": 0.16497856712872247, + "grad_norm": 1931952.75, + "learning_rate": 9.878285885373693e-06, + "loss": 69796.8813, + "step": 81670 + }, + { + "epoch": 0.1649987677614063, + "grad_norm": 1035881.0625, + "learning_rate": 9.87820932288083e-06, + "loss": 114166.575, + "step": 81680 + }, + { + "epoch": 0.1650189683940901, + "grad_norm": 8205.8916015625, + "learning_rate": 9.878132736612167e-06, + "loss": 47733.2469, + "step": 81690 + }, + { + "epoch": 0.16503916902677393, + "grad_norm": 397616.0625, + "learning_rate": 9.878056126568077e-06, + "loss": 111960.45, + "step": 81700 + }, + { + "epoch": 0.16505936965945772, + "grad_norm": 1411958.375, + "learning_rate": 9.87797949274893e-06, + "loss": 141934.05, + "step": 81710 + }, + { + "epoch": 0.16507957029214154, + "grad_norm": 645360.9375, + "learning_rate": 9.877902835155105e-06, + "loss": 146904.35, + "step": 81720 + }, + { + "epoch": 0.16509977092482536, + "grad_norm": 85463.875, + "learning_rate": 9.877826153786973e-06, + "loss": 94991.25, + "step": 81730 + }, + { + "epoch": 0.16511997155750918, + "grad_norm": 701673.8125, + "learning_rate": 9.877749448644908e-06, + "loss": 66251.7812, + "step": 81740 + }, + { + "epoch": 0.165140172190193, + "grad_norm": 129086.7578125, + "learning_rate": 9.877672719729283e-06, + "loss": 117834.5, + "step": 81750 + }, + { + "epoch": 0.16516037282287682, + "grad_norm": 224809.296875, + "learning_rate": 9.877595967040475e-06, + "loss": 121203.0625, + "step": 81760 + }, + { + "epoch": 0.16518057345556064, + "grad_norm": 283275.96875, + "learning_rate": 9.877519190578852e-06, + "loss": 88717.5375, + "step": 81770 + }, + { + "epoch": 0.16520077408824443, + "grad_norm": 380055.375, + "learning_rate": 9.877442390344796e-06, + "loss": 257508.325, + "step": 81780 + }, + { + "epoch": 0.16522097472092825, + "grad_norm": 1678865.75, + "learning_rate": 9.877365566338675e-06, + "loss": 127536.5, + "step": 81790 + }, + { + "epoch": 0.16524117535361207, + "grad_norm": 2393004.75, + "learning_rate": 9.877288718560866e-06, + "loss": 125397.4625, + "step": 81800 + }, + { + "epoch": 0.1652613759862959, + "grad_norm": 135011.828125, + "learning_rate": 9.877211847011744e-06, + "loss": 82433.325, + "step": 81810 + }, + { + "epoch": 0.16528157661897971, + "grad_norm": 88502.59375, + "learning_rate": 9.877134951691683e-06, + "loss": 162694.5375, + "step": 81820 + }, + { + "epoch": 0.16530177725166353, + "grad_norm": 884184.5, + "learning_rate": 9.877058032601057e-06, + "loss": 240451.525, + "step": 81830 + }, + { + "epoch": 0.16532197788434733, + "grad_norm": 2599197.25, + "learning_rate": 9.876981089740242e-06, + "loss": 123191.2625, + "step": 81840 + }, + { + "epoch": 0.16534217851703115, + "grad_norm": 68046.6171875, + "learning_rate": 9.876904123109613e-06, + "loss": 166136.025, + "step": 81850 + }, + { + "epoch": 0.16536237914971497, + "grad_norm": 0.0, + "learning_rate": 9.876827132709545e-06, + "loss": 85380.275, + "step": 81860 + }, + { + "epoch": 0.1653825797823988, + "grad_norm": 1686308.625, + "learning_rate": 9.876750118540413e-06, + "loss": 138459.6875, + "step": 81870 + }, + { + "epoch": 0.1654027804150826, + "grad_norm": 137493.765625, + "learning_rate": 9.87667308060259e-06, + "loss": 158505.4125, + "step": 81880 + }, + { + "epoch": 0.16542298104776643, + "grad_norm": 58217.6875, + "learning_rate": 9.876596018896457e-06, + "loss": 46178.3844, + "step": 81890 + }, + { + "epoch": 0.16544318168045022, + "grad_norm": 23632.884765625, + "learning_rate": 9.876518933422385e-06, + "loss": 120425.1875, + "step": 81900 + }, + { + "epoch": 0.16546338231313404, + "grad_norm": 631889.6875, + "learning_rate": 9.876441824180752e-06, + "loss": 98189.9125, + "step": 81910 + }, + { + "epoch": 0.16548358294581786, + "grad_norm": 2539645.5, + "learning_rate": 9.876364691171933e-06, + "loss": 120616.9875, + "step": 81920 + }, + { + "epoch": 0.16550378357850168, + "grad_norm": 1019994.3125, + "learning_rate": 9.876287534396304e-06, + "loss": 108561.4875, + "step": 81930 + }, + { + "epoch": 0.1655239842111855, + "grad_norm": 3666514.25, + "learning_rate": 9.876210353854239e-06, + "loss": 130504.025, + "step": 81940 + }, + { + "epoch": 0.16554418484386932, + "grad_norm": 73530.9765625, + "learning_rate": 9.876133149546117e-06, + "loss": 24591.2469, + "step": 81950 + }, + { + "epoch": 0.16556438547655314, + "grad_norm": 3089628.5, + "learning_rate": 9.876055921472316e-06, + "loss": 79504.3125, + "step": 81960 + }, + { + "epoch": 0.16558458610923693, + "grad_norm": 31996.54296875, + "learning_rate": 9.875978669633206e-06, + "loss": 28857.8688, + "step": 81970 + }, + { + "epoch": 0.16560478674192075, + "grad_norm": 3283183.75, + "learning_rate": 9.87590139402917e-06, + "loss": 65535.075, + "step": 81980 + }, + { + "epoch": 0.16562498737460457, + "grad_norm": 803706.3125, + "learning_rate": 9.87582409466058e-06, + "loss": 77768.425, + "step": 81990 + }, + { + "epoch": 0.1656451880072884, + "grad_norm": 59134.84765625, + "learning_rate": 9.875746771527817e-06, + "loss": 119242.9375, + "step": 82000 + }, + { + "epoch": 0.1656653886399722, + "grad_norm": 59106.81640625, + "learning_rate": 9.875669424631255e-06, + "loss": 71319.6875, + "step": 82010 + }, + { + "epoch": 0.16568558927265603, + "grad_norm": 485883.15625, + "learning_rate": 9.87559205397127e-06, + "loss": 176392.3625, + "step": 82020 + }, + { + "epoch": 0.16570578990533982, + "grad_norm": 46823.9296875, + "learning_rate": 9.875514659548243e-06, + "loss": 171753.3375, + "step": 82030 + }, + { + "epoch": 0.16572599053802364, + "grad_norm": 14809.720703125, + "learning_rate": 9.875437241362546e-06, + "loss": 122316.75, + "step": 82040 + }, + { + "epoch": 0.16574619117070746, + "grad_norm": 74035.5078125, + "learning_rate": 9.87535979941456e-06, + "loss": 16410.4531, + "step": 82050 + }, + { + "epoch": 0.16576639180339128, + "grad_norm": 65277.93359375, + "learning_rate": 9.875282333704665e-06, + "loss": 248303.675, + "step": 82060 + }, + { + "epoch": 0.1657865924360751, + "grad_norm": 990728.0625, + "learning_rate": 9.875204844233231e-06, + "loss": 114637.05, + "step": 82070 + }, + { + "epoch": 0.16580679306875892, + "grad_norm": 122086.1328125, + "learning_rate": 9.875127331000642e-06, + "loss": 58057.8562, + "step": 82080 + }, + { + "epoch": 0.16582699370144272, + "grad_norm": 2113599.25, + "learning_rate": 9.875049794007274e-06, + "loss": 241684.75, + "step": 82090 + }, + { + "epoch": 0.16584719433412654, + "grad_norm": 677608.8125, + "learning_rate": 9.874972233253503e-06, + "loss": 91239.4, + "step": 82100 + }, + { + "epoch": 0.16586739496681036, + "grad_norm": 183202.40625, + "learning_rate": 9.87489464873971e-06, + "loss": 32380.2625, + "step": 82110 + }, + { + "epoch": 0.16588759559949418, + "grad_norm": 13609.1865234375, + "learning_rate": 9.874817040466271e-06, + "loss": 177845.875, + "step": 82120 + }, + { + "epoch": 0.165907796232178, + "grad_norm": 2677286.25, + "learning_rate": 9.874739408433565e-06, + "loss": 77077.175, + "step": 82130 + }, + { + "epoch": 0.16592799686486182, + "grad_norm": 559301.4375, + "learning_rate": 9.87466175264197e-06, + "loss": 146538.7875, + "step": 82140 + }, + { + "epoch": 0.16594819749754564, + "grad_norm": 997289.5, + "learning_rate": 9.874584073091867e-06, + "loss": 115622.8, + "step": 82150 + }, + { + "epoch": 0.16596839813022943, + "grad_norm": 717100.6875, + "learning_rate": 9.874506369783629e-06, + "loss": 105131.325, + "step": 82160 + }, + { + "epoch": 0.16598859876291325, + "grad_norm": 1137753.0, + "learning_rate": 9.874428642717641e-06, + "loss": 249186.35, + "step": 82170 + }, + { + "epoch": 0.16600879939559707, + "grad_norm": 450130.65625, + "learning_rate": 9.874350891894278e-06, + "loss": 48580.3187, + "step": 82180 + }, + { + "epoch": 0.1660290000282809, + "grad_norm": 179828.109375, + "learning_rate": 9.87427311731392e-06, + "loss": 106377.25, + "step": 82190 + }, + { + "epoch": 0.1660492006609647, + "grad_norm": 550867.4375, + "learning_rate": 9.874195318976945e-06, + "loss": 38688.925, + "step": 82200 + }, + { + "epoch": 0.16606940129364853, + "grad_norm": 2154397.5, + "learning_rate": 9.874117496883734e-06, + "loss": 73308.2312, + "step": 82210 + }, + { + "epoch": 0.16608960192633232, + "grad_norm": 1009651.8125, + "learning_rate": 9.874039651034665e-06, + "loss": 36815.3313, + "step": 82220 + }, + { + "epoch": 0.16610980255901614, + "grad_norm": 3037607.5, + "learning_rate": 9.873961781430119e-06, + "loss": 236483.15, + "step": 82230 + }, + { + "epoch": 0.16613000319169996, + "grad_norm": 121914.0, + "learning_rate": 9.873883888070474e-06, + "loss": 135257.5875, + "step": 82240 + }, + { + "epoch": 0.16615020382438378, + "grad_norm": 34809.69921875, + "learning_rate": 9.87380597095611e-06, + "loss": 25475.8469, + "step": 82250 + }, + { + "epoch": 0.1661704044570676, + "grad_norm": 1209585.125, + "learning_rate": 9.873728030087406e-06, + "loss": 84102.2063, + "step": 82260 + }, + { + "epoch": 0.16619060508975142, + "grad_norm": 2552711.75, + "learning_rate": 9.873650065464744e-06, + "loss": 70102.65, + "step": 82270 + }, + { + "epoch": 0.16621080572243524, + "grad_norm": 312153.28125, + "learning_rate": 9.873572077088502e-06, + "loss": 55017.5625, + "step": 82280 + }, + { + "epoch": 0.16623100635511903, + "grad_norm": 17461.22265625, + "learning_rate": 9.87349406495906e-06, + "loss": 141004.525, + "step": 82290 + }, + { + "epoch": 0.16625120698780285, + "grad_norm": 35259.33984375, + "learning_rate": 9.873416029076801e-06, + "loss": 148392.025, + "step": 82300 + }, + { + "epoch": 0.16627140762048667, + "grad_norm": 55050.37109375, + "learning_rate": 9.873337969442102e-06, + "loss": 134163.5125, + "step": 82310 + }, + { + "epoch": 0.1662916082531705, + "grad_norm": 650343.6875, + "learning_rate": 9.873259886055344e-06, + "loss": 138277.425, + "step": 82320 + }, + { + "epoch": 0.1663118088858543, + "grad_norm": 91123.25, + "learning_rate": 9.873181778916911e-06, + "loss": 81204.025, + "step": 82330 + }, + { + "epoch": 0.16633200951853813, + "grad_norm": 2879795.0, + "learning_rate": 9.873103648027178e-06, + "loss": 96774.9312, + "step": 82340 + }, + { + "epoch": 0.16635221015122192, + "grad_norm": 1571516.75, + "learning_rate": 9.873025493386531e-06, + "loss": 70803.7437, + "step": 82350 + }, + { + "epoch": 0.16637241078390574, + "grad_norm": 394022.40625, + "learning_rate": 9.872947314995348e-06, + "loss": 226690.1, + "step": 82360 + }, + { + "epoch": 0.16639261141658956, + "grad_norm": 524096.5, + "learning_rate": 9.872869112854011e-06, + "loss": 60008.5312, + "step": 82370 + }, + { + "epoch": 0.16641281204927338, + "grad_norm": 227741.203125, + "learning_rate": 9.872790886962901e-06, + "loss": 199162.5625, + "step": 82380 + }, + { + "epoch": 0.1664330126819572, + "grad_norm": 569204.125, + "learning_rate": 9.8727126373224e-06, + "loss": 103173.125, + "step": 82390 + }, + { + "epoch": 0.16645321331464102, + "grad_norm": 7973001.5, + "learning_rate": 9.872634363932887e-06, + "loss": 212651.75, + "step": 82400 + }, + { + "epoch": 0.16647341394732482, + "grad_norm": 1526105.875, + "learning_rate": 9.872556066794745e-06, + "loss": 207492.925, + "step": 82410 + }, + { + "epoch": 0.16649361458000864, + "grad_norm": 214803.296875, + "learning_rate": 9.872477745908356e-06, + "loss": 153947.2375, + "step": 82420 + }, + { + "epoch": 0.16651381521269246, + "grad_norm": 2935245.5, + "learning_rate": 9.872399401274103e-06, + "loss": 159420.5375, + "step": 82430 + }, + { + "epoch": 0.16653401584537628, + "grad_norm": 15262.5146484375, + "learning_rate": 9.872321032892364e-06, + "loss": 122513.125, + "step": 82440 + }, + { + "epoch": 0.1665542164780601, + "grad_norm": 145403.0625, + "learning_rate": 9.872242640763525e-06, + "loss": 87428.1062, + "step": 82450 + }, + { + "epoch": 0.16657441711074392, + "grad_norm": 136356.234375, + "learning_rate": 9.872164224887966e-06, + "loss": 112479.875, + "step": 82460 + }, + { + "epoch": 0.16659461774342774, + "grad_norm": 1064944.0, + "learning_rate": 9.872085785266069e-06, + "loss": 114474.3, + "step": 82470 + }, + { + "epoch": 0.16661481837611153, + "grad_norm": 34793.515625, + "learning_rate": 9.872007321898218e-06, + "loss": 78845.4625, + "step": 82480 + }, + { + "epoch": 0.16663501900879535, + "grad_norm": 104293.5859375, + "learning_rate": 9.871928834784793e-06, + "loss": 106186.0375, + "step": 82490 + }, + { + "epoch": 0.16665521964147917, + "grad_norm": 262409.875, + "learning_rate": 9.871850323926178e-06, + "loss": 42409.7844, + "step": 82500 + }, + { + "epoch": 0.166675420274163, + "grad_norm": 86698.671875, + "learning_rate": 9.871771789322754e-06, + "loss": 63941.65, + "step": 82510 + }, + { + "epoch": 0.1666956209068468, + "grad_norm": 41534.6640625, + "learning_rate": 9.871693230974907e-06, + "loss": 116499.7875, + "step": 82520 + }, + { + "epoch": 0.16671582153953063, + "grad_norm": 0.0, + "learning_rate": 9.871614648883017e-06, + "loss": 59009.1375, + "step": 82530 + }, + { + "epoch": 0.16673602217221442, + "grad_norm": 163772.109375, + "learning_rate": 9.87153604304747e-06, + "loss": 49924.8969, + "step": 82540 + }, + { + "epoch": 0.16675622280489824, + "grad_norm": 1001450.1875, + "learning_rate": 9.871457413468645e-06, + "loss": 94684.1625, + "step": 82550 + }, + { + "epoch": 0.16677642343758206, + "grad_norm": 23012.53515625, + "learning_rate": 9.871378760146928e-06, + "loss": 80492.6313, + "step": 82560 + }, + { + "epoch": 0.16679662407026588, + "grad_norm": 4923556.0, + "learning_rate": 9.871300083082702e-06, + "loss": 135288.9, + "step": 82570 + }, + { + "epoch": 0.1668168247029497, + "grad_norm": 908830.9375, + "learning_rate": 9.87122138227635e-06, + "loss": 102563.575, + "step": 82580 + }, + { + "epoch": 0.16683702533563352, + "grad_norm": 44601.09765625, + "learning_rate": 9.871142657728257e-06, + "loss": 221884.525, + "step": 82590 + }, + { + "epoch": 0.16685722596831734, + "grad_norm": 181678.5, + "learning_rate": 9.871063909438803e-06, + "loss": 145932.7125, + "step": 82600 + }, + { + "epoch": 0.16687742660100113, + "grad_norm": 7108.47119140625, + "learning_rate": 9.870985137408375e-06, + "loss": 127678.9375, + "step": 82610 + }, + { + "epoch": 0.16689762723368495, + "grad_norm": 1191036.25, + "learning_rate": 9.870906341637358e-06, + "loss": 128914.35, + "step": 82620 + }, + { + "epoch": 0.16691782786636877, + "grad_norm": 44495.2734375, + "learning_rate": 9.870827522126134e-06, + "loss": 124870.4, + "step": 82630 + }, + { + "epoch": 0.1669380284990526, + "grad_norm": 288733.875, + "learning_rate": 9.870748678875086e-06, + "loss": 66195.425, + "step": 82640 + }, + { + "epoch": 0.1669582291317364, + "grad_norm": 5804426.5, + "learning_rate": 9.8706698118846e-06, + "loss": 192554.1, + "step": 82650 + }, + { + "epoch": 0.16697842976442023, + "grad_norm": 471143.1875, + "learning_rate": 9.870590921155062e-06, + "loss": 135301.0625, + "step": 82660 + }, + { + "epoch": 0.16699863039710403, + "grad_norm": 1249305.0, + "learning_rate": 9.870512006686852e-06, + "loss": 92560.0312, + "step": 82670 + }, + { + "epoch": 0.16701883102978785, + "grad_norm": 625529.3125, + "learning_rate": 9.870433068480359e-06, + "loss": 107171.5625, + "step": 82680 + }, + { + "epoch": 0.16703903166247167, + "grad_norm": 38044.3984375, + "learning_rate": 9.870354106535964e-06, + "loss": 125915.8625, + "step": 82690 + }, + { + "epoch": 0.16705923229515549, + "grad_norm": 3915169.75, + "learning_rate": 9.870275120854055e-06, + "loss": 184022.6125, + "step": 82700 + }, + { + "epoch": 0.1670794329278393, + "grad_norm": 344934.5625, + "learning_rate": 9.870196111435016e-06, + "loss": 87166.1875, + "step": 82710 + }, + { + "epoch": 0.16709963356052313, + "grad_norm": 119305.3984375, + "learning_rate": 9.870117078279231e-06, + "loss": 214088.875, + "step": 82720 + }, + { + "epoch": 0.16711983419320692, + "grad_norm": 291021.0625, + "learning_rate": 9.870038021387087e-06, + "loss": 96210.1062, + "step": 82730 + }, + { + "epoch": 0.16714003482589074, + "grad_norm": 63164.41015625, + "learning_rate": 9.869958940758968e-06, + "loss": 65970.3125, + "step": 82740 + }, + { + "epoch": 0.16716023545857456, + "grad_norm": 289403.84375, + "learning_rate": 9.86987983639526e-06, + "loss": 78773.775, + "step": 82750 + }, + { + "epoch": 0.16718043609125838, + "grad_norm": 813905.0625, + "learning_rate": 9.869800708296347e-06, + "loss": 167932.9375, + "step": 82760 + }, + { + "epoch": 0.1672006367239422, + "grad_norm": 169241.4375, + "learning_rate": 9.869721556462617e-06, + "loss": 110521.5375, + "step": 82770 + }, + { + "epoch": 0.16722083735662602, + "grad_norm": 93532.7421875, + "learning_rate": 9.869642380894454e-06, + "loss": 187332.1375, + "step": 82780 + }, + { + "epoch": 0.16724103798930984, + "grad_norm": 1410895.375, + "learning_rate": 9.869563181592246e-06, + "loss": 87922.525, + "step": 82790 + }, + { + "epoch": 0.16726123862199363, + "grad_norm": 282875.71875, + "learning_rate": 9.869483958556376e-06, + "loss": 131821.025, + "step": 82800 + }, + { + "epoch": 0.16728143925467745, + "grad_norm": 417856.0625, + "learning_rate": 9.869404711787234e-06, + "loss": 91325.375, + "step": 82810 + }, + { + "epoch": 0.16730163988736127, + "grad_norm": 291480.59375, + "learning_rate": 9.869325441285203e-06, + "loss": 135393.5125, + "step": 82820 + }, + { + "epoch": 0.1673218405200451, + "grad_norm": 439417.3125, + "learning_rate": 9.869246147050669e-06, + "loss": 102248.925, + "step": 82830 + }, + { + "epoch": 0.1673420411527289, + "grad_norm": 458216.0625, + "learning_rate": 9.869166829084023e-06, + "loss": 91340.9625, + "step": 82840 + }, + { + "epoch": 0.16736224178541273, + "grad_norm": 655241.5625, + "learning_rate": 9.869087487385644e-06, + "loss": 184133.4625, + "step": 82850 + }, + { + "epoch": 0.16738244241809652, + "grad_norm": 430614.625, + "learning_rate": 9.869008121955928e-06, + "loss": 124794.175, + "step": 82860 + }, + { + "epoch": 0.16740264305078034, + "grad_norm": 4295527.5, + "learning_rate": 9.868928732795253e-06, + "loss": 121550.375, + "step": 82870 + }, + { + "epoch": 0.16742284368346416, + "grad_norm": 2380153.75, + "learning_rate": 9.868849319904012e-06, + "loss": 121725.2875, + "step": 82880 + }, + { + "epoch": 0.16744304431614798, + "grad_norm": 88742.5, + "learning_rate": 9.86876988328259e-06, + "loss": 95743.1687, + "step": 82890 + }, + { + "epoch": 0.1674632449488318, + "grad_norm": 278785.09375, + "learning_rate": 9.868690422931372e-06, + "loss": 160891.9, + "step": 82900 + }, + { + "epoch": 0.16748344558151562, + "grad_norm": 361366.34375, + "learning_rate": 9.86861093885075e-06, + "loss": 45161.4781, + "step": 82910 + }, + { + "epoch": 0.16750364621419944, + "grad_norm": 466514.46875, + "learning_rate": 9.868531431041108e-06, + "loss": 45639.8812, + "step": 82920 + }, + { + "epoch": 0.16752384684688323, + "grad_norm": 4848734.5, + "learning_rate": 9.868451899502833e-06, + "loss": 80150.525, + "step": 82930 + }, + { + "epoch": 0.16754404747956705, + "grad_norm": 1481402.0, + "learning_rate": 9.868372344236314e-06, + "loss": 60635.4437, + "step": 82940 + }, + { + "epoch": 0.16756424811225087, + "grad_norm": 361177.375, + "learning_rate": 9.86829276524194e-06, + "loss": 89329.4438, + "step": 82950 + }, + { + "epoch": 0.1675844487449347, + "grad_norm": 0.0, + "learning_rate": 9.868213162520097e-06, + "loss": 52419.4625, + "step": 82960 + }, + { + "epoch": 0.16760464937761851, + "grad_norm": 162297.21875, + "learning_rate": 9.868133536071174e-06, + "loss": 55786.475, + "step": 82970 + }, + { + "epoch": 0.16762485001030233, + "grad_norm": 450048.625, + "learning_rate": 9.868053885895559e-06, + "loss": 42079.3406, + "step": 82980 + }, + { + "epoch": 0.16764505064298613, + "grad_norm": 71081.3984375, + "learning_rate": 9.867974211993639e-06, + "loss": 105246.4875, + "step": 82990 + }, + { + "epoch": 0.16766525127566995, + "grad_norm": 853536.875, + "learning_rate": 9.867894514365802e-06, + "loss": 125078.1375, + "step": 83000 + }, + { + "epoch": 0.16768545190835377, + "grad_norm": 166014.484375, + "learning_rate": 9.867814793012437e-06, + "loss": 58335.5312, + "step": 83010 + }, + { + "epoch": 0.1677056525410376, + "grad_norm": 7268017.5, + "learning_rate": 9.867735047933936e-06, + "loss": 119581.925, + "step": 83020 + }, + { + "epoch": 0.1677258531737214, + "grad_norm": 99418.6015625, + "learning_rate": 9.867655279130684e-06, + "loss": 142705.35, + "step": 83030 + }, + { + "epoch": 0.16774605380640523, + "grad_norm": 1152464.125, + "learning_rate": 9.86757548660307e-06, + "loss": 68863.3125, + "step": 83040 + }, + { + "epoch": 0.16776625443908902, + "grad_norm": 447562.3125, + "learning_rate": 9.867495670351483e-06, + "loss": 120215.0, + "step": 83050 + }, + { + "epoch": 0.16778645507177284, + "grad_norm": 1902092.0, + "learning_rate": 9.867415830376313e-06, + "loss": 92384.5625, + "step": 83060 + }, + { + "epoch": 0.16780665570445666, + "grad_norm": 43342.390625, + "learning_rate": 9.867335966677949e-06, + "loss": 99820.075, + "step": 83070 + }, + { + "epoch": 0.16782685633714048, + "grad_norm": 5175549.0, + "learning_rate": 9.867256079256779e-06, + "loss": 88086.4375, + "step": 83080 + }, + { + "epoch": 0.1678470569698243, + "grad_norm": 547841.25, + "learning_rate": 9.867176168113193e-06, + "loss": 145768.625, + "step": 83090 + }, + { + "epoch": 0.16786725760250812, + "grad_norm": 9459.6904296875, + "learning_rate": 9.867096233247581e-06, + "loss": 64411.425, + "step": 83100 + }, + { + "epoch": 0.16788745823519194, + "grad_norm": 28495.181640625, + "learning_rate": 9.867016274660333e-06, + "loss": 74881.9812, + "step": 83110 + }, + { + "epoch": 0.16790765886787573, + "grad_norm": 939022.9375, + "learning_rate": 9.866936292351837e-06, + "loss": 107564.175, + "step": 83120 + }, + { + "epoch": 0.16792785950055955, + "grad_norm": 987935.6875, + "learning_rate": 9.866856286322484e-06, + "loss": 42204.3656, + "step": 83130 + }, + { + "epoch": 0.16794806013324337, + "grad_norm": 1631564.25, + "learning_rate": 9.866776256572662e-06, + "loss": 244655.5, + "step": 83140 + }, + { + "epoch": 0.1679682607659272, + "grad_norm": 396054.4375, + "learning_rate": 9.866696203102765e-06, + "loss": 113127.6, + "step": 83150 + }, + { + "epoch": 0.167988461398611, + "grad_norm": 555707.9375, + "learning_rate": 9.866616125913182e-06, + "loss": 71540.875, + "step": 83160 + }, + { + "epoch": 0.16800866203129483, + "grad_norm": 1675425.0, + "learning_rate": 9.8665360250043e-06, + "loss": 90603.575, + "step": 83170 + }, + { + "epoch": 0.16802886266397862, + "grad_norm": 1254231.0, + "learning_rate": 9.866455900376514e-06, + "loss": 110749.0375, + "step": 83180 + }, + { + "epoch": 0.16804906329666244, + "grad_norm": 2272694.25, + "learning_rate": 9.86637575203021e-06, + "loss": 79059.975, + "step": 83190 + }, + { + "epoch": 0.16806926392934626, + "grad_norm": 1418710.125, + "learning_rate": 9.866295579965782e-06, + "loss": 162469.275, + "step": 83200 + }, + { + "epoch": 0.16808946456203008, + "grad_norm": 6375885.5, + "learning_rate": 9.86621538418362e-06, + "loss": 146549.775, + "step": 83210 + }, + { + "epoch": 0.1681096651947139, + "grad_norm": 1444814.75, + "learning_rate": 9.866135164684112e-06, + "loss": 147394.225, + "step": 83220 + }, + { + "epoch": 0.16812986582739772, + "grad_norm": 38424.1171875, + "learning_rate": 9.866054921467654e-06, + "loss": 157328.0, + "step": 83230 + }, + { + "epoch": 0.16815006646008154, + "grad_norm": 3901098.5, + "learning_rate": 9.865974654534634e-06, + "loss": 98969.8875, + "step": 83240 + }, + { + "epoch": 0.16817026709276534, + "grad_norm": 366303.0, + "learning_rate": 9.865894363885442e-06, + "loss": 65326.0813, + "step": 83250 + }, + { + "epoch": 0.16819046772544916, + "grad_norm": 173534.4375, + "learning_rate": 9.865814049520473e-06, + "loss": 62248.825, + "step": 83260 + }, + { + "epoch": 0.16821066835813298, + "grad_norm": 85237.0859375, + "learning_rate": 9.865733711440116e-06, + "loss": 95586.7375, + "step": 83270 + }, + { + "epoch": 0.1682308689908168, + "grad_norm": 946932.4375, + "learning_rate": 9.865653349644761e-06, + "loss": 80900.525, + "step": 83280 + }, + { + "epoch": 0.16825106962350062, + "grad_norm": 2437577.0, + "learning_rate": 9.865572964134804e-06, + "loss": 144455.9125, + "step": 83290 + }, + { + "epoch": 0.16827127025618444, + "grad_norm": 28866.326171875, + "learning_rate": 9.865492554910634e-06, + "loss": 72686.475, + "step": 83300 + }, + { + "epoch": 0.16829147088886823, + "grad_norm": 499666.90625, + "learning_rate": 9.865412121972643e-06, + "loss": 92586.6938, + "step": 83310 + }, + { + "epoch": 0.16831167152155205, + "grad_norm": 27860.041015625, + "learning_rate": 9.865331665321222e-06, + "loss": 182273.7, + "step": 83320 + }, + { + "epoch": 0.16833187215423587, + "grad_norm": 4476829.5, + "learning_rate": 9.865251184956767e-06, + "loss": 187884.3625, + "step": 83330 + }, + { + "epoch": 0.1683520727869197, + "grad_norm": 292540.3125, + "learning_rate": 9.865170680879667e-06, + "loss": 73242.5688, + "step": 83340 + }, + { + "epoch": 0.1683722734196035, + "grad_norm": 549779.25, + "learning_rate": 9.865090153090315e-06, + "loss": 133751.125, + "step": 83350 + }, + { + "epoch": 0.16839247405228733, + "grad_norm": 55845.72265625, + "learning_rate": 9.865009601589105e-06, + "loss": 94469.2125, + "step": 83360 + }, + { + "epoch": 0.16841267468497112, + "grad_norm": 960383.1875, + "learning_rate": 9.864929026376427e-06, + "loss": 120745.925, + "step": 83370 + }, + { + "epoch": 0.16843287531765494, + "grad_norm": 859529.8125, + "learning_rate": 9.864848427452675e-06, + "loss": 116670.675, + "step": 83380 + }, + { + "epoch": 0.16845307595033876, + "grad_norm": 2911474.75, + "learning_rate": 9.864767804818242e-06, + "loss": 104920.55, + "step": 83390 + }, + { + "epoch": 0.16847327658302258, + "grad_norm": 120435.4375, + "learning_rate": 9.86468715847352e-06, + "loss": 52877.7188, + "step": 83400 + }, + { + "epoch": 0.1684934772157064, + "grad_norm": 1818579.125, + "learning_rate": 9.864606488418905e-06, + "loss": 168302.925, + "step": 83410 + }, + { + "epoch": 0.16851367784839022, + "grad_norm": 34655.9375, + "learning_rate": 9.864525794654786e-06, + "loss": 48137.3812, + "step": 83420 + }, + { + "epoch": 0.16853387848107404, + "grad_norm": 637568.625, + "learning_rate": 9.864445077181559e-06, + "loss": 125205.8875, + "step": 83430 + }, + { + "epoch": 0.16855407911375783, + "grad_norm": 6636.90869140625, + "learning_rate": 9.864364335999615e-06, + "loss": 118781.825, + "step": 83440 + }, + { + "epoch": 0.16857427974644165, + "grad_norm": 1919756.5, + "learning_rate": 9.864283571109352e-06, + "loss": 102075.2875, + "step": 83450 + }, + { + "epoch": 0.16859448037912547, + "grad_norm": 43123.65625, + "learning_rate": 9.864202782511158e-06, + "loss": 121728.0375, + "step": 83460 + }, + { + "epoch": 0.1686146810118093, + "grad_norm": 225780.4375, + "learning_rate": 9.864121970205431e-06, + "loss": 83602.8625, + "step": 83470 + }, + { + "epoch": 0.1686348816444931, + "grad_norm": 724931.875, + "learning_rate": 9.864041134192563e-06, + "loss": 43021.9, + "step": 83480 + }, + { + "epoch": 0.16865508227717693, + "grad_norm": 490165.78125, + "learning_rate": 9.86396027447295e-06, + "loss": 98260.925, + "step": 83490 + }, + { + "epoch": 0.16867528290986072, + "grad_norm": 3232144.75, + "learning_rate": 9.863879391046985e-06, + "loss": 69233.0375, + "step": 83500 + }, + { + "epoch": 0.16869548354254454, + "grad_norm": 102680.5078125, + "learning_rate": 9.863798483915059e-06, + "loss": 61723.1, + "step": 83510 + }, + { + "epoch": 0.16871568417522836, + "grad_norm": 471924.71875, + "learning_rate": 9.86371755307757e-06, + "loss": 90565.4563, + "step": 83520 + }, + { + "epoch": 0.16873588480791218, + "grad_norm": 1760580.0, + "learning_rate": 9.863636598534912e-06, + "loss": 139432.775, + "step": 83530 + }, + { + "epoch": 0.168756085440596, + "grad_norm": 105564.921875, + "learning_rate": 9.863555620287479e-06, + "loss": 68492.1562, + "step": 83540 + }, + { + "epoch": 0.16877628607327982, + "grad_norm": 92811.6875, + "learning_rate": 9.863474618335666e-06, + "loss": 88858.1062, + "step": 83550 + }, + { + "epoch": 0.16879648670596364, + "grad_norm": 125408.2421875, + "learning_rate": 9.863393592679867e-06, + "loss": 62404.8562, + "step": 83560 + }, + { + "epoch": 0.16881668733864744, + "grad_norm": 2309191.5, + "learning_rate": 9.863312543320479e-06, + "loss": 90759.6187, + "step": 83570 + }, + { + "epoch": 0.16883688797133126, + "grad_norm": 1745795.5, + "learning_rate": 9.863231470257893e-06, + "loss": 77945.6313, + "step": 83580 + }, + { + "epoch": 0.16885708860401508, + "grad_norm": 1638406.75, + "learning_rate": 9.863150373492509e-06, + "loss": 83223.2063, + "step": 83590 + }, + { + "epoch": 0.1688772892366989, + "grad_norm": 52640.58203125, + "learning_rate": 9.863069253024719e-06, + "loss": 119772.3125, + "step": 83600 + }, + { + "epoch": 0.16889748986938272, + "grad_norm": 1068412.625, + "learning_rate": 9.862988108854919e-06, + "loss": 48183.5375, + "step": 83610 + }, + { + "epoch": 0.16891769050206654, + "grad_norm": 1745868.5, + "learning_rate": 9.862906940983505e-06, + "loss": 97310.5437, + "step": 83620 + }, + { + "epoch": 0.16893789113475033, + "grad_norm": 801480.0625, + "learning_rate": 9.862825749410872e-06, + "loss": 78471.5063, + "step": 83630 + }, + { + "epoch": 0.16895809176743415, + "grad_norm": 519553.84375, + "learning_rate": 9.862744534137416e-06, + "loss": 30595.4719, + "step": 83640 + }, + { + "epoch": 0.16897829240011797, + "grad_norm": 431911.0625, + "learning_rate": 9.862663295163533e-06, + "loss": 178076.5875, + "step": 83650 + }, + { + "epoch": 0.1689984930328018, + "grad_norm": 91787.546875, + "learning_rate": 9.862582032489621e-06, + "loss": 40032.3406, + "step": 83660 + }, + { + "epoch": 0.1690186936654856, + "grad_norm": 483074.84375, + "learning_rate": 9.86250074611607e-06, + "loss": 91159.5813, + "step": 83670 + }, + { + "epoch": 0.16903889429816943, + "grad_norm": 32867.2578125, + "learning_rate": 9.862419436043284e-06, + "loss": 175489.15, + "step": 83680 + }, + { + "epoch": 0.16905909493085322, + "grad_norm": 625618.3125, + "learning_rate": 9.862338102271654e-06, + "loss": 65558.6125, + "step": 83690 + }, + { + "epoch": 0.16907929556353704, + "grad_norm": 333182.3125, + "learning_rate": 9.862256744801576e-06, + "loss": 27640.9219, + "step": 83700 + }, + { + "epoch": 0.16909949619622086, + "grad_norm": 4397496.0, + "learning_rate": 9.86217536363345e-06, + "loss": 126311.4875, + "step": 83710 + }, + { + "epoch": 0.16911969682890468, + "grad_norm": 65765.15625, + "learning_rate": 9.862093958767671e-06, + "loss": 26382.0281, + "step": 83720 + }, + { + "epoch": 0.1691398974615885, + "grad_norm": 888267.1875, + "learning_rate": 9.862012530204636e-06, + "loss": 133856.4, + "step": 83730 + }, + { + "epoch": 0.16916009809427232, + "grad_norm": 1129595.375, + "learning_rate": 9.86193107794474e-06, + "loss": 99374.75, + "step": 83740 + }, + { + "epoch": 0.16918029872695614, + "grad_norm": 58900.55078125, + "learning_rate": 9.861849601988384e-06, + "loss": 66697.3562, + "step": 83750 + }, + { + "epoch": 0.16920049935963993, + "grad_norm": 522933.0625, + "learning_rate": 9.861768102335961e-06, + "loss": 150171.325, + "step": 83760 + }, + { + "epoch": 0.16922069999232375, + "grad_norm": 26614.982421875, + "learning_rate": 9.861686578987871e-06, + "loss": 165935.775, + "step": 83770 + }, + { + "epoch": 0.16924090062500757, + "grad_norm": 20050.623046875, + "learning_rate": 9.86160503194451e-06, + "loss": 46295.0844, + "step": 83780 + }, + { + "epoch": 0.1692611012576914, + "grad_norm": 848056.1875, + "learning_rate": 9.861523461206275e-06, + "loss": 175643.5, + "step": 83790 + }, + { + "epoch": 0.1692813018903752, + "grad_norm": 383296.4375, + "learning_rate": 9.861441866773564e-06, + "loss": 64989.2625, + "step": 83800 + }, + { + "epoch": 0.16930150252305903, + "grad_norm": 26184.853515625, + "learning_rate": 9.861360248646777e-06, + "loss": 73425.675, + "step": 83810 + }, + { + "epoch": 0.16932170315574283, + "grad_norm": 587583.0, + "learning_rate": 9.861278606826307e-06, + "loss": 188360.125, + "step": 83820 + }, + { + "epoch": 0.16934190378842665, + "grad_norm": 334100.96875, + "learning_rate": 9.861196941312556e-06, + "loss": 58336.9, + "step": 83830 + }, + { + "epoch": 0.16936210442111047, + "grad_norm": 16570.2890625, + "learning_rate": 9.861115252105922e-06, + "loss": 93611.3687, + "step": 83840 + }, + { + "epoch": 0.16938230505379429, + "grad_norm": 1017471.875, + "learning_rate": 9.8610335392068e-06, + "loss": 87399.325, + "step": 83850 + }, + { + "epoch": 0.1694025056864781, + "grad_norm": 499400.46875, + "learning_rate": 9.86095180261559e-06, + "loss": 81505.0562, + "step": 83860 + }, + { + "epoch": 0.16942270631916193, + "grad_norm": 91602.390625, + "learning_rate": 9.860870042332693e-06, + "loss": 51534.6687, + "step": 83870 + }, + { + "epoch": 0.16944290695184575, + "grad_norm": 34108.3203125, + "learning_rate": 9.860788258358503e-06, + "loss": 56554.0938, + "step": 83880 + }, + { + "epoch": 0.16946310758452954, + "grad_norm": 45031.7734375, + "learning_rate": 9.86070645069342e-06, + "loss": 52404.3594, + "step": 83890 + }, + { + "epoch": 0.16948330821721336, + "grad_norm": 279474.65625, + "learning_rate": 9.860624619337844e-06, + "loss": 135909.8125, + "step": 83900 + }, + { + "epoch": 0.16950350884989718, + "grad_norm": 48753.62109375, + "learning_rate": 9.860542764292173e-06, + "loss": 78801.925, + "step": 83910 + }, + { + "epoch": 0.169523709482581, + "grad_norm": 316534.125, + "learning_rate": 9.860460885556806e-06, + "loss": 67890.7125, + "step": 83920 + }, + { + "epoch": 0.16954391011526482, + "grad_norm": 326741.0625, + "learning_rate": 9.860378983132144e-06, + "loss": 85157.5125, + "step": 83930 + }, + { + "epoch": 0.16956411074794864, + "grad_norm": 0.0, + "learning_rate": 9.860297057018581e-06, + "loss": 86139.825, + "step": 83940 + }, + { + "epoch": 0.16958431138063243, + "grad_norm": 208670.453125, + "learning_rate": 9.860215107216523e-06, + "loss": 116000.3625, + "step": 83950 + }, + { + "epoch": 0.16960451201331625, + "grad_norm": 8221832.0, + "learning_rate": 9.860133133726364e-06, + "loss": 102301.5375, + "step": 83960 + }, + { + "epoch": 0.16962471264600007, + "grad_norm": 3562947.25, + "learning_rate": 9.860051136548506e-06, + "loss": 121673.475, + "step": 83970 + }, + { + "epoch": 0.1696449132786839, + "grad_norm": 342470.25, + "learning_rate": 9.859969115683348e-06, + "loss": 175105.375, + "step": 83980 + }, + { + "epoch": 0.1696651139113677, + "grad_norm": 1157447.375, + "learning_rate": 9.85988707113129e-06, + "loss": 97376.8375, + "step": 83990 + }, + { + "epoch": 0.16968531454405153, + "grad_norm": 188022.34375, + "learning_rate": 9.859805002892733e-06, + "loss": 45533.1813, + "step": 84000 + }, + { + "epoch": 0.16970551517673532, + "grad_norm": 3439770.75, + "learning_rate": 9.859722910968073e-06, + "loss": 111690.9625, + "step": 84010 + }, + { + "epoch": 0.16972571580941914, + "grad_norm": 557055.375, + "learning_rate": 9.859640795357716e-06, + "loss": 46027.1906, + "step": 84020 + }, + { + "epoch": 0.16974591644210296, + "grad_norm": 2361064.25, + "learning_rate": 9.859558656062057e-06, + "loss": 58837.9688, + "step": 84030 + }, + { + "epoch": 0.16976611707478678, + "grad_norm": 17330.50390625, + "learning_rate": 9.8594764930815e-06, + "loss": 30096.4156, + "step": 84040 + }, + { + "epoch": 0.1697863177074706, + "grad_norm": 939664.5625, + "learning_rate": 9.859394306416443e-06, + "loss": 71680.6375, + "step": 84050 + }, + { + "epoch": 0.16980651834015442, + "grad_norm": 779754.5, + "learning_rate": 9.859312096067289e-06, + "loss": 75373.0875, + "step": 84060 + }, + { + "epoch": 0.16982671897283824, + "grad_norm": 2978953.5, + "learning_rate": 9.859229862034436e-06, + "loss": 108909.7625, + "step": 84070 + }, + { + "epoch": 0.16984691960552203, + "grad_norm": 10051308.0, + "learning_rate": 9.859147604318286e-06, + "loss": 147104.5, + "step": 84080 + }, + { + "epoch": 0.16986712023820585, + "grad_norm": 2637596.5, + "learning_rate": 9.859065322919239e-06, + "loss": 52179.9094, + "step": 84090 + }, + { + "epoch": 0.16988732087088967, + "grad_norm": 35130.83203125, + "learning_rate": 9.8589830178377e-06, + "loss": 229019.85, + "step": 84100 + }, + { + "epoch": 0.1699075215035735, + "grad_norm": 14701.9326171875, + "learning_rate": 9.858900689074065e-06, + "loss": 50161.1344, + "step": 84110 + }, + { + "epoch": 0.16992772213625731, + "grad_norm": 322314.15625, + "learning_rate": 9.858818336628737e-06, + "loss": 158364.625, + "step": 84120 + }, + { + "epoch": 0.16994792276894113, + "grad_norm": 2372326.5, + "learning_rate": 9.858735960502118e-06, + "loss": 181191.2375, + "step": 84130 + }, + { + "epoch": 0.16996812340162493, + "grad_norm": 92201.671875, + "learning_rate": 9.858653560694609e-06, + "loss": 63669.9, + "step": 84140 + }, + { + "epoch": 0.16998832403430875, + "grad_norm": 187578.6875, + "learning_rate": 9.858571137206611e-06, + "loss": 41019.0844, + "step": 84150 + }, + { + "epoch": 0.17000852466699257, + "grad_norm": 7246691.5, + "learning_rate": 9.858488690038529e-06, + "loss": 126237.3625, + "step": 84160 + }, + { + "epoch": 0.1700287252996764, + "grad_norm": 22435.263671875, + "learning_rate": 9.858406219190761e-06, + "loss": 122565.7625, + "step": 84170 + }, + { + "epoch": 0.1700489259323602, + "grad_norm": 1697321.75, + "learning_rate": 9.858323724663712e-06, + "loss": 122111.675, + "step": 84180 + }, + { + "epoch": 0.17006912656504403, + "grad_norm": 115578.6875, + "learning_rate": 9.85824120645778e-06, + "loss": 51351.6937, + "step": 84190 + }, + { + "epoch": 0.17008932719772785, + "grad_norm": 96842.6796875, + "learning_rate": 9.85815866457337e-06, + "loss": 113260.45, + "step": 84200 + }, + { + "epoch": 0.17010952783041164, + "grad_norm": 50179.9921875, + "learning_rate": 9.858076099010885e-06, + "loss": 70752.825, + "step": 84210 + }, + { + "epoch": 0.17012972846309546, + "grad_norm": 1766334.875, + "learning_rate": 9.857993509770725e-06, + "loss": 25045.4859, + "step": 84220 + }, + { + "epoch": 0.17014992909577928, + "grad_norm": 118022.2734375, + "learning_rate": 9.857910896853296e-06, + "loss": 43569.65, + "step": 84230 + }, + { + "epoch": 0.1701701297284631, + "grad_norm": 4935398.0, + "learning_rate": 9.857828260258997e-06, + "loss": 92138.475, + "step": 84240 + }, + { + "epoch": 0.17019033036114692, + "grad_norm": 968136.0, + "learning_rate": 9.857745599988231e-06, + "loss": 80024.5125, + "step": 84250 + }, + { + "epoch": 0.17021053099383074, + "grad_norm": 2009933.875, + "learning_rate": 9.857662916041404e-06, + "loss": 91997.05, + "step": 84260 + }, + { + "epoch": 0.17023073162651453, + "grad_norm": 2187787.25, + "learning_rate": 9.857580208418917e-06, + "loss": 71534.25, + "step": 84270 + }, + { + "epoch": 0.17025093225919835, + "grad_norm": 221175.5625, + "learning_rate": 9.857497477121172e-06, + "loss": 123165.225, + "step": 84280 + }, + { + "epoch": 0.17027113289188217, + "grad_norm": 99829.78125, + "learning_rate": 9.857414722148574e-06, + "loss": 101726.8687, + "step": 84290 + }, + { + "epoch": 0.170291333524566, + "grad_norm": 2483548.5, + "learning_rate": 9.857331943501527e-06, + "loss": 112511.9, + "step": 84300 + }, + { + "epoch": 0.1703115341572498, + "grad_norm": 794453.125, + "learning_rate": 9.857249141180431e-06, + "loss": 66541.6375, + "step": 84310 + }, + { + "epoch": 0.17033173478993363, + "grad_norm": 1289359.875, + "learning_rate": 9.857166315185693e-06, + "loss": 53662.025, + "step": 84320 + }, + { + "epoch": 0.17035193542261742, + "grad_norm": 2600407.25, + "learning_rate": 9.857083465517716e-06, + "loss": 109995.575, + "step": 84330 + }, + { + "epoch": 0.17037213605530124, + "grad_norm": 2137476.75, + "learning_rate": 9.857000592176902e-06, + "loss": 74023.1625, + "step": 84340 + }, + { + "epoch": 0.17039233668798506, + "grad_norm": 83517.5859375, + "learning_rate": 9.856917695163659e-06, + "loss": 110858.375, + "step": 84350 + }, + { + "epoch": 0.17041253732066888, + "grad_norm": 510062.6875, + "learning_rate": 9.856834774478385e-06, + "loss": 143997.5, + "step": 84360 + }, + { + "epoch": 0.1704327379533527, + "grad_norm": 29111320.0, + "learning_rate": 9.85675183012149e-06, + "loss": 208659.95, + "step": 84370 + }, + { + "epoch": 0.17045293858603652, + "grad_norm": 147309.90625, + "learning_rate": 9.856668862093372e-06, + "loss": 70070.8687, + "step": 84380 + }, + { + "epoch": 0.17047313921872034, + "grad_norm": 0.0, + "learning_rate": 9.856585870394442e-06, + "loss": 183415.5125, + "step": 84390 + }, + { + "epoch": 0.17049333985140414, + "grad_norm": 445365.9375, + "learning_rate": 9.856502855025101e-06, + "loss": 120635.2875, + "step": 84400 + }, + { + "epoch": 0.17051354048408796, + "grad_norm": 1464321.125, + "learning_rate": 9.856419815985754e-06, + "loss": 75231.95, + "step": 84410 + }, + { + "epoch": 0.17053374111677178, + "grad_norm": 1596258.0, + "learning_rate": 9.856336753276804e-06, + "loss": 75830.5375, + "step": 84420 + }, + { + "epoch": 0.1705539417494556, + "grad_norm": 4617557.5, + "learning_rate": 9.85625366689866e-06, + "loss": 120093.075, + "step": 84430 + }, + { + "epoch": 0.17057414238213942, + "grad_norm": 44742456.0, + "learning_rate": 9.856170556851725e-06, + "loss": 90521.9812, + "step": 84440 + }, + { + "epoch": 0.17059434301482324, + "grad_norm": 39104.91796875, + "learning_rate": 9.856087423136403e-06, + "loss": 67295.775, + "step": 84450 + }, + { + "epoch": 0.17061454364750703, + "grad_norm": 6920226.0, + "learning_rate": 9.856004265753099e-06, + "loss": 65411.1937, + "step": 84460 + }, + { + "epoch": 0.17063474428019085, + "grad_norm": 835597.6875, + "learning_rate": 9.85592108470222e-06, + "loss": 48385.725, + "step": 84470 + }, + { + "epoch": 0.17065494491287467, + "grad_norm": 11886.7890625, + "learning_rate": 9.85583787998417e-06, + "loss": 159009.15, + "step": 84480 + }, + { + "epoch": 0.1706751455455585, + "grad_norm": 875399.25, + "learning_rate": 9.855754651599355e-06, + "loss": 67464.5, + "step": 84490 + }, + { + "epoch": 0.1706953461782423, + "grad_norm": 5647970.5, + "learning_rate": 9.85567139954818e-06, + "loss": 97258.4062, + "step": 84500 + }, + { + "epoch": 0.17071554681092613, + "grad_norm": 366014.71875, + "learning_rate": 9.855588123831053e-06, + "loss": 70582.625, + "step": 84510 + }, + { + "epoch": 0.17073574744360995, + "grad_norm": 19085.451171875, + "learning_rate": 9.855504824448379e-06, + "loss": 109395.8625, + "step": 84520 + }, + { + "epoch": 0.17075594807629374, + "grad_norm": 4234970.5, + "learning_rate": 9.855421501400562e-06, + "loss": 166055.7875, + "step": 84530 + }, + { + "epoch": 0.17077614870897756, + "grad_norm": 262751.96875, + "learning_rate": 9.85533815468801e-06, + "loss": 154113.5875, + "step": 84540 + }, + { + "epoch": 0.17079634934166138, + "grad_norm": 198071.25, + "learning_rate": 9.85525478431113e-06, + "loss": 150415.575, + "step": 84550 + }, + { + "epoch": 0.1708165499743452, + "grad_norm": 134853.125, + "learning_rate": 9.855171390270325e-06, + "loss": 59713.5625, + "step": 84560 + }, + { + "epoch": 0.17083675060702902, + "grad_norm": 2856370.75, + "learning_rate": 9.855087972566004e-06, + "loss": 131097.7375, + "step": 84570 + }, + { + "epoch": 0.17085695123971284, + "grad_norm": 178566.546875, + "learning_rate": 9.855004531198573e-06, + "loss": 126264.9875, + "step": 84580 + }, + { + "epoch": 0.17087715187239663, + "grad_norm": 100678.0859375, + "learning_rate": 9.854921066168439e-06, + "loss": 69493.9563, + "step": 84590 + }, + { + "epoch": 0.17089735250508045, + "grad_norm": 476674.03125, + "learning_rate": 9.854837577476008e-06, + "loss": 115441.2875, + "step": 84600 + }, + { + "epoch": 0.17091755313776427, + "grad_norm": 145367.234375, + "learning_rate": 9.854754065121689e-06, + "loss": 54584.8812, + "step": 84610 + }, + { + "epoch": 0.1709377537704481, + "grad_norm": 486515.78125, + "learning_rate": 9.854670529105887e-06, + "loss": 51870.5, + "step": 84620 + }, + { + "epoch": 0.1709579544031319, + "grad_norm": 820538.125, + "learning_rate": 9.854586969429009e-06, + "loss": 72587.6062, + "step": 84630 + }, + { + "epoch": 0.17097815503581573, + "grad_norm": 16521.333984375, + "learning_rate": 9.854503386091463e-06, + "loss": 103002.9563, + "step": 84640 + }, + { + "epoch": 0.17099835566849952, + "grad_norm": 17575.087890625, + "learning_rate": 9.854419779093656e-06, + "loss": 109701.7375, + "step": 84650 + }, + { + "epoch": 0.17101855630118334, + "grad_norm": 852768.3125, + "learning_rate": 9.854336148435997e-06, + "loss": 63486.4062, + "step": 84660 + }, + { + "epoch": 0.17103875693386716, + "grad_norm": 56029.56640625, + "learning_rate": 9.85425249411889e-06, + "loss": 59760.9625, + "step": 84670 + }, + { + "epoch": 0.17105895756655098, + "grad_norm": 125707.328125, + "learning_rate": 9.854168816142747e-06, + "loss": 85299.15, + "step": 84680 + }, + { + "epoch": 0.1710791581992348, + "grad_norm": 3248313.25, + "learning_rate": 9.854085114507974e-06, + "loss": 68453.025, + "step": 84690 + }, + { + "epoch": 0.17109935883191862, + "grad_norm": 1466322.875, + "learning_rate": 9.854001389214979e-06, + "loss": 89037.5188, + "step": 84700 + }, + { + "epoch": 0.17111955946460244, + "grad_norm": 2464463.0, + "learning_rate": 9.853917640264169e-06, + "loss": 100505.1687, + "step": 84710 + }, + { + "epoch": 0.17113976009728624, + "grad_norm": 177597.65625, + "learning_rate": 9.853833867655954e-06, + "loss": 73353.0, + "step": 84720 + }, + { + "epoch": 0.17115996072997006, + "grad_norm": 144750.265625, + "learning_rate": 9.853750071390739e-06, + "loss": 121049.0125, + "step": 84730 + }, + { + "epoch": 0.17118016136265388, + "grad_norm": 2751823.25, + "learning_rate": 9.853666251468938e-06, + "loss": 93993.2375, + "step": 84740 + }, + { + "epoch": 0.1712003619953377, + "grad_norm": 24790046.0, + "learning_rate": 9.853582407890954e-06, + "loss": 193577.1125, + "step": 84750 + }, + { + "epoch": 0.17122056262802152, + "grad_norm": 34723.57421875, + "learning_rate": 9.853498540657201e-06, + "loss": 101007.0625, + "step": 84760 + }, + { + "epoch": 0.17124076326070534, + "grad_norm": 2328934.75, + "learning_rate": 9.853414649768082e-06, + "loss": 70734.7375, + "step": 84770 + }, + { + "epoch": 0.17126096389338913, + "grad_norm": 628602.75, + "learning_rate": 9.85333073522401e-06, + "loss": 114200.6625, + "step": 84780 + }, + { + "epoch": 0.17128116452607295, + "grad_norm": 4158398.25, + "learning_rate": 9.853246797025391e-06, + "loss": 144494.3, + "step": 84790 + }, + { + "epoch": 0.17130136515875677, + "grad_norm": 60020.38671875, + "learning_rate": 9.853162835172638e-06, + "loss": 109114.3, + "step": 84800 + }, + { + "epoch": 0.1713215657914406, + "grad_norm": 5833.9423828125, + "learning_rate": 9.853078849666156e-06, + "loss": 89660.2625, + "step": 84810 + }, + { + "epoch": 0.1713417664241244, + "grad_norm": 286674.78125, + "learning_rate": 9.852994840506357e-06, + "loss": 53563.2625, + "step": 84820 + }, + { + "epoch": 0.17136196705680823, + "grad_norm": 649941.75, + "learning_rate": 9.85291080769365e-06, + "loss": 104399.1938, + "step": 84830 + }, + { + "epoch": 0.17138216768949205, + "grad_norm": 83106.59375, + "learning_rate": 9.852826751228445e-06, + "loss": 39842.65, + "step": 84840 + }, + { + "epoch": 0.17140236832217584, + "grad_norm": 2690189.75, + "learning_rate": 9.852742671111151e-06, + "loss": 144653.725, + "step": 84850 + }, + { + "epoch": 0.17142256895485966, + "grad_norm": 1581758.875, + "learning_rate": 9.852658567342177e-06, + "loss": 51022.2, + "step": 84860 + }, + { + "epoch": 0.17144276958754348, + "grad_norm": 3350173.5, + "learning_rate": 9.852574439921933e-06, + "loss": 195175.075, + "step": 84870 + }, + { + "epoch": 0.1714629702202273, + "grad_norm": 447886.53125, + "learning_rate": 9.85249028885083e-06, + "loss": 70406.4062, + "step": 84880 + }, + { + "epoch": 0.17148317085291112, + "grad_norm": 1946.927001953125, + "learning_rate": 9.852406114129277e-06, + "loss": 51857.3063, + "step": 84890 + }, + { + "epoch": 0.17150337148559494, + "grad_norm": 1842751.875, + "learning_rate": 9.852321915757688e-06, + "loss": 210761.575, + "step": 84900 + }, + { + "epoch": 0.17152357211827873, + "grad_norm": 1191426.75, + "learning_rate": 9.852237693736469e-06, + "loss": 132318.825, + "step": 84910 + }, + { + "epoch": 0.17154377275096255, + "grad_norm": 861368.625, + "learning_rate": 9.852153448066031e-06, + "loss": 86951.3, + "step": 84920 + }, + { + "epoch": 0.17156397338364637, + "grad_norm": 1706922.25, + "learning_rate": 9.852069178746786e-06, + "loss": 155760.775, + "step": 84930 + }, + { + "epoch": 0.1715841740163302, + "grad_norm": 18503838.0, + "learning_rate": 9.851984885779147e-06, + "loss": 124654.6, + "step": 84940 + }, + { + "epoch": 0.171604374649014, + "grad_norm": 3477432.0, + "learning_rate": 9.85190056916352e-06, + "loss": 129700.85, + "step": 84950 + }, + { + "epoch": 0.17162457528169783, + "grad_norm": 614868.1875, + "learning_rate": 9.851816228900317e-06, + "loss": 94076.1812, + "step": 84960 + }, + { + "epoch": 0.17164477591438163, + "grad_norm": 6310544.0, + "learning_rate": 9.85173186498995e-06, + "loss": 29510.3187, + "step": 84970 + }, + { + "epoch": 0.17166497654706545, + "grad_norm": 7500.48974609375, + "learning_rate": 9.851647477432834e-06, + "loss": 74728.7875, + "step": 84980 + }, + { + "epoch": 0.17168517717974927, + "grad_norm": 483119.75, + "learning_rate": 9.851563066229373e-06, + "loss": 63881.6813, + "step": 84990 + }, + { + "epoch": 0.17170537781243309, + "grad_norm": 1111393.625, + "learning_rate": 9.851478631379982e-06, + "loss": 63195.925, + "step": 85000 + }, + { + "epoch": 0.1717255784451169, + "grad_norm": 1194037.375, + "learning_rate": 9.851394172885075e-06, + "loss": 84346.8562, + "step": 85010 + }, + { + "epoch": 0.17174577907780073, + "grad_norm": 1172453.25, + "learning_rate": 9.85130969074506e-06, + "loss": 56916.9375, + "step": 85020 + }, + { + "epoch": 0.17176597971048455, + "grad_norm": 365894.5625, + "learning_rate": 9.851225184960349e-06, + "loss": 183606.8125, + "step": 85030 + }, + { + "epoch": 0.17178618034316834, + "grad_norm": 1408969.375, + "learning_rate": 9.851140655531357e-06, + "loss": 50224.9219, + "step": 85040 + }, + { + "epoch": 0.17180638097585216, + "grad_norm": 692126.0625, + "learning_rate": 9.851056102458492e-06, + "loss": 72058.625, + "step": 85050 + }, + { + "epoch": 0.17182658160853598, + "grad_norm": 742686.6875, + "learning_rate": 9.85097152574217e-06, + "loss": 96524.4875, + "step": 85060 + }, + { + "epoch": 0.1718467822412198, + "grad_norm": 1113386.75, + "learning_rate": 9.8508869253828e-06, + "loss": 126269.9625, + "step": 85070 + }, + { + "epoch": 0.17186698287390362, + "grad_norm": 3148118.0, + "learning_rate": 9.850802301380793e-06, + "loss": 69791.9438, + "step": 85080 + }, + { + "epoch": 0.17188718350658744, + "grad_norm": 21995.1171875, + "learning_rate": 9.850717653736566e-06, + "loss": 116240.525, + "step": 85090 + }, + { + "epoch": 0.17190738413927123, + "grad_norm": 152668.578125, + "learning_rate": 9.85063298245053e-06, + "loss": 132257.2125, + "step": 85100 + }, + { + "epoch": 0.17192758477195505, + "grad_norm": 537737.8125, + "learning_rate": 9.850548287523096e-06, + "loss": 56667.4125, + "step": 85110 + }, + { + "epoch": 0.17194778540463887, + "grad_norm": 310421.8125, + "learning_rate": 9.850463568954679e-06, + "loss": 86326.2812, + "step": 85120 + }, + { + "epoch": 0.1719679860373227, + "grad_norm": 688733.5625, + "learning_rate": 9.85037882674569e-06, + "loss": 83105.8625, + "step": 85130 + }, + { + "epoch": 0.1719881866700065, + "grad_norm": 450114.6875, + "learning_rate": 9.850294060896544e-06, + "loss": 100517.4375, + "step": 85140 + }, + { + "epoch": 0.17200838730269033, + "grad_norm": 968789.875, + "learning_rate": 9.850209271407653e-06, + "loss": 68094.7188, + "step": 85150 + }, + { + "epoch": 0.17202858793537412, + "grad_norm": 0.0, + "learning_rate": 9.850124458279429e-06, + "loss": 76017.6375, + "step": 85160 + }, + { + "epoch": 0.17204878856805794, + "grad_norm": 1335090.875, + "learning_rate": 9.850039621512287e-06, + "loss": 84252.6125, + "step": 85170 + }, + { + "epoch": 0.17206898920074176, + "grad_norm": 1394244.625, + "learning_rate": 9.849954761106642e-06, + "loss": 89932.3875, + "step": 85180 + }, + { + "epoch": 0.17208918983342558, + "grad_norm": 3431741.0, + "learning_rate": 9.849869877062903e-06, + "loss": 65082.8875, + "step": 85190 + }, + { + "epoch": 0.1721093904661094, + "grad_norm": 1427562.875, + "learning_rate": 9.849784969381488e-06, + "loss": 69620.6125, + "step": 85200 + }, + { + "epoch": 0.17212959109879322, + "grad_norm": 807341.25, + "learning_rate": 9.849700038062808e-06, + "loss": 51709.6031, + "step": 85210 + }, + { + "epoch": 0.17214979173147704, + "grad_norm": 29820.162109375, + "learning_rate": 9.849615083107279e-06, + "loss": 66728.5938, + "step": 85220 + }, + { + "epoch": 0.17216999236416083, + "grad_norm": 4381195.5, + "learning_rate": 9.849530104515314e-06, + "loss": 101284.9875, + "step": 85230 + }, + { + "epoch": 0.17219019299684465, + "grad_norm": 50167.27734375, + "learning_rate": 9.849445102287328e-06, + "loss": 39925.2906, + "step": 85240 + }, + { + "epoch": 0.17221039362952847, + "grad_norm": 5579139.5, + "learning_rate": 9.849360076423736e-06, + "loss": 124157.3125, + "step": 85250 + }, + { + "epoch": 0.1722305942622123, + "grad_norm": 7292891.0, + "learning_rate": 9.849275026924949e-06, + "loss": 128749.45, + "step": 85260 + }, + { + "epoch": 0.17225079489489611, + "grad_norm": 14139391.0, + "learning_rate": 9.849189953791385e-06, + "loss": 80630.7625, + "step": 85270 + }, + { + "epoch": 0.17227099552757993, + "grad_norm": 126275.4453125, + "learning_rate": 9.849104857023455e-06, + "loss": 110161.3875, + "step": 85280 + }, + { + "epoch": 0.17229119616026373, + "grad_norm": 4425069.5, + "learning_rate": 9.849019736621578e-06, + "loss": 182887.5125, + "step": 85290 + }, + { + "epoch": 0.17231139679294755, + "grad_norm": 2852624.0, + "learning_rate": 9.848934592586165e-06, + "loss": 118773.475, + "step": 85300 + }, + { + "epoch": 0.17233159742563137, + "grad_norm": 3706841.5, + "learning_rate": 9.848849424917636e-06, + "loss": 111329.7, + "step": 85310 + }, + { + "epoch": 0.1723517980583152, + "grad_norm": 8075.9873046875, + "learning_rate": 9.848764233616401e-06, + "loss": 227949.5, + "step": 85320 + }, + { + "epoch": 0.172371998690999, + "grad_norm": 0.0, + "learning_rate": 9.848679018682879e-06, + "loss": 65189.0188, + "step": 85330 + }, + { + "epoch": 0.17239219932368283, + "grad_norm": 365706.6875, + "learning_rate": 9.848593780117482e-06, + "loss": 144885.2375, + "step": 85340 + }, + { + "epoch": 0.17241239995636665, + "grad_norm": 84373.40625, + "learning_rate": 9.848508517920626e-06, + "loss": 67783.0938, + "step": 85350 + }, + { + "epoch": 0.17243260058905044, + "grad_norm": 940009.6875, + "learning_rate": 9.84842323209273e-06, + "loss": 121909.9875, + "step": 85360 + }, + { + "epoch": 0.17245280122173426, + "grad_norm": 3720907.75, + "learning_rate": 9.848337922634205e-06, + "loss": 93305.3687, + "step": 85370 + }, + { + "epoch": 0.17247300185441808, + "grad_norm": 1807751.375, + "learning_rate": 9.84825258954547e-06, + "loss": 129701.275, + "step": 85380 + }, + { + "epoch": 0.1724932024871019, + "grad_norm": 322752.8125, + "learning_rate": 9.84816723282694e-06, + "loss": 88766.55, + "step": 85390 + }, + { + "epoch": 0.17251340311978572, + "grad_norm": 7632821.5, + "learning_rate": 9.84808185247903e-06, + "loss": 132029.225, + "step": 85400 + }, + { + "epoch": 0.17253360375246954, + "grad_norm": 251622.875, + "learning_rate": 9.847996448502159e-06, + "loss": 120342.6, + "step": 85410 + }, + { + "epoch": 0.17255380438515333, + "grad_norm": 7271594.5, + "learning_rate": 9.84791102089674e-06, + "loss": 156756.025, + "step": 85420 + }, + { + "epoch": 0.17257400501783715, + "grad_norm": 233988.28125, + "learning_rate": 9.84782556966319e-06, + "loss": 130790.525, + "step": 85430 + }, + { + "epoch": 0.17259420565052097, + "grad_norm": 1584128.0, + "learning_rate": 9.847740094801928e-06, + "loss": 103175.925, + "step": 85440 + }, + { + "epoch": 0.1726144062832048, + "grad_norm": 0.0, + "learning_rate": 9.847654596313368e-06, + "loss": 179568.4125, + "step": 85450 + }, + { + "epoch": 0.1726346069158886, + "grad_norm": 3144208.25, + "learning_rate": 9.847569074197927e-06, + "loss": 97317.4438, + "step": 85460 + }, + { + "epoch": 0.17265480754857243, + "grad_norm": 1277935.125, + "learning_rate": 9.847483528456021e-06, + "loss": 106218.3375, + "step": 85470 + }, + { + "epoch": 0.17267500818125622, + "grad_norm": 5128646.0, + "learning_rate": 9.84739795908807e-06, + "loss": 103216.2063, + "step": 85480 + }, + { + "epoch": 0.17269520881394004, + "grad_norm": 519933.0625, + "learning_rate": 9.84731236609449e-06, + "loss": 113175.625, + "step": 85490 + }, + { + "epoch": 0.17271540944662386, + "grad_norm": 4610229.0, + "learning_rate": 9.847226749475696e-06, + "loss": 140267.4125, + "step": 85500 + }, + { + "epoch": 0.17273561007930768, + "grad_norm": 9888.8505859375, + "learning_rate": 9.847141109232105e-06, + "loss": 109112.0375, + "step": 85510 + }, + { + "epoch": 0.1727558107119915, + "grad_norm": 50808.40625, + "learning_rate": 9.84705544536414e-06, + "loss": 66286.8062, + "step": 85520 + }, + { + "epoch": 0.17277601134467532, + "grad_norm": 168924.015625, + "learning_rate": 9.846969757872212e-06, + "loss": 122172.3125, + "step": 85530 + }, + { + "epoch": 0.17279621197735914, + "grad_norm": 40785.48046875, + "learning_rate": 9.846884046756742e-06, + "loss": 164315.9, + "step": 85540 + }, + { + "epoch": 0.17281641261004294, + "grad_norm": 256235.046875, + "learning_rate": 9.846798312018147e-06, + "loss": 66955.95, + "step": 85550 + }, + { + "epoch": 0.17283661324272676, + "grad_norm": 968630.0, + "learning_rate": 9.846712553656845e-06, + "loss": 42554.4125, + "step": 85560 + }, + { + "epoch": 0.17285681387541058, + "grad_norm": 3979826.25, + "learning_rate": 9.846626771673254e-06, + "loss": 118298.0875, + "step": 85570 + }, + { + "epoch": 0.1728770145080944, + "grad_norm": 245272.25, + "learning_rate": 9.846540966067793e-06, + "loss": 73550.0, + "step": 85580 + }, + { + "epoch": 0.17289721514077822, + "grad_norm": 200445.0, + "learning_rate": 9.846455136840876e-06, + "loss": 51470.3594, + "step": 85590 + }, + { + "epoch": 0.17291741577346204, + "grad_norm": 153196.0, + "learning_rate": 9.846369283992927e-06, + "loss": 28700.2562, + "step": 85600 + }, + { + "epoch": 0.17293761640614583, + "grad_norm": 111539.0625, + "learning_rate": 9.846283407524362e-06, + "loss": 75469.0875, + "step": 85610 + }, + { + "epoch": 0.17295781703882965, + "grad_norm": 192198.65625, + "learning_rate": 9.846197507435598e-06, + "loss": 94139.4375, + "step": 85620 + }, + { + "epoch": 0.17297801767151347, + "grad_norm": 97552.1484375, + "learning_rate": 9.846111583727056e-06, + "loss": 54134.2812, + "step": 85630 + }, + { + "epoch": 0.1729982183041973, + "grad_norm": 86617.4375, + "learning_rate": 9.846025636399152e-06, + "loss": 276544.575, + "step": 85640 + }, + { + "epoch": 0.1730184189368811, + "grad_norm": 772961.75, + "learning_rate": 9.845939665452309e-06, + "loss": 81564.1875, + "step": 85650 + }, + { + "epoch": 0.17303861956956493, + "grad_norm": 8130781.5, + "learning_rate": 9.845853670886945e-06, + "loss": 79921.475, + "step": 85660 + }, + { + "epoch": 0.17305882020224875, + "grad_norm": 2017464.0, + "learning_rate": 9.845767652703475e-06, + "loss": 198128.5875, + "step": 85670 + }, + { + "epoch": 0.17307902083493254, + "grad_norm": 165970.4375, + "learning_rate": 9.845681610902323e-06, + "loss": 158418.2, + "step": 85680 + }, + { + "epoch": 0.17309922146761636, + "grad_norm": 495162.25, + "learning_rate": 9.845595545483906e-06, + "loss": 87334.975, + "step": 85690 + }, + { + "epoch": 0.17311942210030018, + "grad_norm": 78695.140625, + "learning_rate": 9.845509456448642e-06, + "loss": 83167.9625, + "step": 85700 + }, + { + "epoch": 0.173139622732984, + "grad_norm": 375500.21875, + "learning_rate": 9.845423343796957e-06, + "loss": 238119.5, + "step": 85710 + }, + { + "epoch": 0.17315982336566782, + "grad_norm": 39618.234375, + "learning_rate": 9.845337207529264e-06, + "loss": 103442.825, + "step": 85720 + }, + { + "epoch": 0.17318002399835164, + "grad_norm": 872171.625, + "learning_rate": 9.845251047645984e-06, + "loss": 72964.4375, + "step": 85730 + }, + { + "epoch": 0.17320022463103543, + "grad_norm": 3311772.5, + "learning_rate": 9.84516486414754e-06, + "loss": 149301.3, + "step": 85740 + }, + { + "epoch": 0.17322042526371925, + "grad_norm": 383501.65625, + "learning_rate": 9.845078657034348e-06, + "loss": 44277.4938, + "step": 85750 + }, + { + "epoch": 0.17324062589640307, + "grad_norm": 2166347.25, + "learning_rate": 9.844992426306832e-06, + "loss": 71839.975, + "step": 85760 + }, + { + "epoch": 0.1732608265290869, + "grad_norm": 2204905.25, + "learning_rate": 9.84490617196541e-06, + "loss": 32986.4594, + "step": 85770 + }, + { + "epoch": 0.1732810271617707, + "grad_norm": 2265100.25, + "learning_rate": 9.844819894010502e-06, + "loss": 56036.0875, + "step": 85780 + }, + { + "epoch": 0.17330122779445453, + "grad_norm": 1972158.25, + "learning_rate": 9.84473359244253e-06, + "loss": 21570.2844, + "step": 85790 + }, + { + "epoch": 0.17332142842713832, + "grad_norm": 1615913.75, + "learning_rate": 9.844647267261915e-06, + "loss": 36768.8656, + "step": 85800 + }, + { + "epoch": 0.17334162905982214, + "grad_norm": 134558.46875, + "learning_rate": 9.844560918469076e-06, + "loss": 152868.0125, + "step": 85810 + }, + { + "epoch": 0.17336182969250596, + "grad_norm": 744732.1875, + "learning_rate": 9.844474546064436e-06, + "loss": 140645.875, + "step": 85820 + }, + { + "epoch": 0.17338203032518978, + "grad_norm": 952572.8125, + "learning_rate": 9.844388150048413e-06, + "loss": 209378.425, + "step": 85830 + }, + { + "epoch": 0.1734022309578736, + "grad_norm": 3718508.0, + "learning_rate": 9.844301730421431e-06, + "loss": 72200.475, + "step": 85840 + }, + { + "epoch": 0.17342243159055742, + "grad_norm": 123384.9765625, + "learning_rate": 9.84421528718391e-06, + "loss": 77344.0813, + "step": 85850 + }, + { + "epoch": 0.17344263222324124, + "grad_norm": 192309.40625, + "learning_rate": 9.844128820336269e-06, + "loss": 99619.1812, + "step": 85860 + }, + { + "epoch": 0.17346283285592504, + "grad_norm": 301132.4375, + "learning_rate": 9.844042329878934e-06, + "loss": 106352.4875, + "step": 85870 + }, + { + "epoch": 0.17348303348860886, + "grad_norm": 1700730.875, + "learning_rate": 9.843955815812322e-06, + "loss": 105558.3625, + "step": 85880 + }, + { + "epoch": 0.17350323412129268, + "grad_norm": 1828972.25, + "learning_rate": 9.843869278136857e-06, + "loss": 100935.6687, + "step": 85890 + }, + { + "epoch": 0.1735234347539765, + "grad_norm": 3997630.0, + "learning_rate": 9.843782716852963e-06, + "loss": 129154.4875, + "step": 85900 + }, + { + "epoch": 0.17354363538666032, + "grad_norm": 5638356.0, + "learning_rate": 9.843696131961058e-06, + "loss": 96059.25, + "step": 85910 + }, + { + "epoch": 0.17356383601934414, + "grad_norm": 671609.375, + "learning_rate": 9.843609523461565e-06, + "loss": 94776.7188, + "step": 85920 + }, + { + "epoch": 0.17358403665202793, + "grad_norm": 10854.08203125, + "learning_rate": 9.843522891354908e-06, + "loss": 61528.6188, + "step": 85930 + }, + { + "epoch": 0.17360423728471175, + "grad_norm": 225718.515625, + "learning_rate": 9.843436235641506e-06, + "loss": 147844.9875, + "step": 85940 + }, + { + "epoch": 0.17362443791739557, + "grad_norm": 18769310.0, + "learning_rate": 9.843349556321787e-06, + "loss": 75158.8687, + "step": 85950 + }, + { + "epoch": 0.1736446385500794, + "grad_norm": 340385.46875, + "learning_rate": 9.843262853396164e-06, + "loss": 85099.9688, + "step": 85960 + }, + { + "epoch": 0.1736648391827632, + "grad_norm": 135110.609375, + "learning_rate": 9.84317612686507e-06, + "loss": 143877.1875, + "step": 85970 + }, + { + "epoch": 0.17368503981544703, + "grad_norm": 7779738.0, + "learning_rate": 9.843089376728922e-06, + "loss": 115764.8875, + "step": 85980 + }, + { + "epoch": 0.17370524044813085, + "grad_norm": 2572429.75, + "learning_rate": 9.843002602988143e-06, + "loss": 50530.9031, + "step": 85990 + }, + { + "epoch": 0.17372544108081464, + "grad_norm": 808074.5625, + "learning_rate": 9.842915805643156e-06, + "loss": 109705.8125, + "step": 86000 + }, + { + "epoch": 0.17374564171349846, + "grad_norm": 458423.3125, + "learning_rate": 9.842828984694385e-06, + "loss": 70361.8, + "step": 86010 + }, + { + "epoch": 0.17376584234618228, + "grad_norm": 463002.125, + "learning_rate": 9.842742140142255e-06, + "loss": 80232.075, + "step": 86020 + }, + { + "epoch": 0.1737860429788661, + "grad_norm": 128225.0859375, + "learning_rate": 9.842655271987185e-06, + "loss": 54690.3313, + "step": 86030 + }, + { + "epoch": 0.17380624361154992, + "grad_norm": 4375779.5, + "learning_rate": 9.8425683802296e-06, + "loss": 215332.5, + "step": 86040 + }, + { + "epoch": 0.17382644424423374, + "grad_norm": 418276.21875, + "learning_rate": 9.842481464869926e-06, + "loss": 179482.8875, + "step": 86050 + }, + { + "epoch": 0.17384664487691753, + "grad_norm": 5283059.0, + "learning_rate": 9.842394525908585e-06, + "loss": 224107.55, + "step": 86060 + }, + { + "epoch": 0.17386684550960135, + "grad_norm": 3819024.5, + "learning_rate": 9.842307563345999e-06, + "loss": 88339.0875, + "step": 86070 + }, + { + "epoch": 0.17388704614228517, + "grad_norm": 1857526.375, + "learning_rate": 9.842220577182592e-06, + "loss": 217493.4, + "step": 86080 + }, + { + "epoch": 0.173907246774969, + "grad_norm": 42686.7578125, + "learning_rate": 9.842133567418793e-06, + "loss": 119138.125, + "step": 86090 + }, + { + "epoch": 0.1739274474076528, + "grad_norm": 8475.736328125, + "learning_rate": 9.84204653405502e-06, + "loss": 168696.875, + "step": 86100 + }, + { + "epoch": 0.17394764804033663, + "grad_norm": 3055774.25, + "learning_rate": 9.841959477091698e-06, + "loss": 35967.0281, + "step": 86110 + }, + { + "epoch": 0.17396784867302043, + "grad_norm": 226582.84375, + "learning_rate": 9.841872396529255e-06, + "loss": 126266.4, + "step": 86120 + }, + { + "epoch": 0.17398804930570425, + "grad_norm": 520492.21875, + "learning_rate": 9.841785292368113e-06, + "loss": 137520.2875, + "step": 86130 + }, + { + "epoch": 0.17400824993838807, + "grad_norm": 0.0, + "learning_rate": 9.841698164608696e-06, + "loss": 36409.6156, + "step": 86140 + }, + { + "epoch": 0.17402845057107189, + "grad_norm": 56357.6484375, + "learning_rate": 9.841611013251428e-06, + "loss": 99575.8062, + "step": 86150 + }, + { + "epoch": 0.1740486512037557, + "grad_norm": 1492906.125, + "learning_rate": 9.841523838296738e-06, + "loss": 107987.6625, + "step": 86160 + }, + { + "epoch": 0.17406885183643953, + "grad_norm": 567534.125, + "learning_rate": 9.841436639745046e-06, + "loss": 164879.4375, + "step": 86170 + }, + { + "epoch": 0.17408905246912335, + "grad_norm": 82981.3203125, + "learning_rate": 9.84134941759678e-06, + "loss": 76626.4937, + "step": 86180 + }, + { + "epoch": 0.17410925310180714, + "grad_norm": 4849842.5, + "learning_rate": 9.841262171852364e-06, + "loss": 124923.675, + "step": 86190 + }, + { + "epoch": 0.17412945373449096, + "grad_norm": 461199.21875, + "learning_rate": 9.841174902512223e-06, + "loss": 42693.8156, + "step": 86200 + }, + { + "epoch": 0.17414965436717478, + "grad_norm": 605059.3125, + "learning_rate": 9.841087609576782e-06, + "loss": 58251.9437, + "step": 86210 + }, + { + "epoch": 0.1741698549998586, + "grad_norm": 909409.0, + "learning_rate": 9.841000293046469e-06, + "loss": 48137.6312, + "step": 86220 + }, + { + "epoch": 0.17419005563254242, + "grad_norm": 4253616.5, + "learning_rate": 9.840912952921707e-06, + "loss": 147105.85, + "step": 86230 + }, + { + "epoch": 0.17421025626522624, + "grad_norm": 2370944.0, + "learning_rate": 9.840825589202922e-06, + "loss": 173242.2, + "step": 86240 + }, + { + "epoch": 0.17423045689791003, + "grad_norm": 2024092.875, + "learning_rate": 9.84073820189054e-06, + "loss": 117015.45, + "step": 86250 + }, + { + "epoch": 0.17425065753059385, + "grad_norm": 428530.3125, + "learning_rate": 9.840650790984988e-06, + "loss": 131423.95, + "step": 86260 + }, + { + "epoch": 0.17427085816327767, + "grad_norm": 4461692.0, + "learning_rate": 9.84056335648669e-06, + "loss": 146419.55, + "step": 86270 + }, + { + "epoch": 0.1742910587959615, + "grad_norm": 32324.92578125, + "learning_rate": 9.840475898396073e-06, + "loss": 257077.975, + "step": 86280 + }, + { + "epoch": 0.1743112594286453, + "grad_norm": 848975.5, + "learning_rate": 9.840388416713564e-06, + "loss": 51899.5844, + "step": 86290 + }, + { + "epoch": 0.17433146006132913, + "grad_norm": 1322904.625, + "learning_rate": 9.84030091143959e-06, + "loss": 87381.525, + "step": 86300 + }, + { + "epoch": 0.17435166069401295, + "grad_norm": 2306742.25, + "learning_rate": 9.840213382574575e-06, + "loss": 113771.8625, + "step": 86310 + }, + { + "epoch": 0.17437186132669674, + "grad_norm": 6117734.0, + "learning_rate": 9.840125830118949e-06, + "loss": 82300.7063, + "step": 86320 + }, + { + "epoch": 0.17439206195938056, + "grad_norm": 17911900.0, + "learning_rate": 9.840038254073136e-06, + "loss": 139886.25, + "step": 86330 + }, + { + "epoch": 0.17441226259206438, + "grad_norm": 53640.91796875, + "learning_rate": 9.839950654437563e-06, + "loss": 59365.525, + "step": 86340 + }, + { + "epoch": 0.1744324632247482, + "grad_norm": 18285528.0, + "learning_rate": 9.839863031212657e-06, + "loss": 126396.6875, + "step": 86350 + }, + { + "epoch": 0.17445266385743202, + "grad_norm": 5651682.0, + "learning_rate": 9.839775384398846e-06, + "loss": 64094.3125, + "step": 86360 + }, + { + "epoch": 0.17447286449011584, + "grad_norm": 75721.6796875, + "learning_rate": 9.839687713996558e-06, + "loss": 35411.325, + "step": 86370 + }, + { + "epoch": 0.17449306512279963, + "grad_norm": 915977.5, + "learning_rate": 9.839600020006217e-06, + "loss": 136615.8125, + "step": 86380 + }, + { + "epoch": 0.17451326575548345, + "grad_norm": 13731347.0, + "learning_rate": 9.839512302428254e-06, + "loss": 120968.6, + "step": 86390 + }, + { + "epoch": 0.17453346638816727, + "grad_norm": 7538212.0, + "learning_rate": 9.839424561263094e-06, + "loss": 158344.425, + "step": 86400 + }, + { + "epoch": 0.1745536670208511, + "grad_norm": 451413.65625, + "learning_rate": 9.839336796511167e-06, + "loss": 108750.9, + "step": 86410 + }, + { + "epoch": 0.17457386765353491, + "grad_norm": 325859.90625, + "learning_rate": 9.839249008172897e-06, + "loss": 45138.3344, + "step": 86420 + }, + { + "epoch": 0.17459406828621873, + "grad_norm": 11582937.0, + "learning_rate": 9.839161196248717e-06, + "loss": 177064.2875, + "step": 86430 + }, + { + "epoch": 0.17461426891890253, + "grad_norm": 511572.34375, + "learning_rate": 9.839073360739052e-06, + "loss": 83594.6062, + "step": 86440 + }, + { + "epoch": 0.17463446955158635, + "grad_norm": 3401265.25, + "learning_rate": 9.838985501644329e-06, + "loss": 72314.2437, + "step": 86450 + }, + { + "epoch": 0.17465467018427017, + "grad_norm": 15031716.0, + "learning_rate": 9.838897618964978e-06, + "loss": 150814.975, + "step": 86460 + }, + { + "epoch": 0.174674870816954, + "grad_norm": 2034426.875, + "learning_rate": 9.838809712701426e-06, + "loss": 162476.5125, + "step": 86470 + }, + { + "epoch": 0.1746950714496378, + "grad_norm": 1440028.875, + "learning_rate": 9.838721782854103e-06, + "loss": 66996.4, + "step": 86480 + }, + { + "epoch": 0.17471527208232163, + "grad_norm": 688292.4375, + "learning_rate": 9.838633829423437e-06, + "loss": 108946.5625, + "step": 86490 + }, + { + "epoch": 0.17473547271500545, + "grad_norm": 131168.625, + "learning_rate": 9.838545852409857e-06, + "loss": 225305.7, + "step": 86500 + }, + { + "epoch": 0.17475567334768924, + "grad_norm": 10912206.0, + "learning_rate": 9.83845785181379e-06, + "loss": 83616.2125, + "step": 86510 + }, + { + "epoch": 0.17477587398037306, + "grad_norm": 0.0, + "learning_rate": 9.838369827635668e-06, + "loss": 48104.7312, + "step": 86520 + }, + { + "epoch": 0.17479607461305688, + "grad_norm": 468932.25, + "learning_rate": 9.838281779875918e-06, + "loss": 55739.85, + "step": 86530 + }, + { + "epoch": 0.1748162752457407, + "grad_norm": 1649284.5, + "learning_rate": 9.838193708534969e-06, + "loss": 212046.85, + "step": 86540 + }, + { + "epoch": 0.17483647587842452, + "grad_norm": 996969.9375, + "learning_rate": 9.83810561361325e-06, + "loss": 173649.5625, + "step": 86550 + }, + { + "epoch": 0.17485667651110834, + "grad_norm": 1467969.125, + "learning_rate": 9.838017495111191e-06, + "loss": 97892.125, + "step": 86560 + }, + { + "epoch": 0.17487687714379213, + "grad_norm": 296274.375, + "learning_rate": 9.837929353029223e-06, + "loss": 35742.1406, + "step": 86570 + }, + { + "epoch": 0.17489707777647595, + "grad_norm": 723392.0, + "learning_rate": 9.837841187367774e-06, + "loss": 40408.4688, + "step": 86580 + }, + { + "epoch": 0.17491727840915977, + "grad_norm": 125030.9296875, + "learning_rate": 9.837752998127272e-06, + "loss": 86308.8875, + "step": 86590 + }, + { + "epoch": 0.1749374790418436, + "grad_norm": 12857.5908203125, + "learning_rate": 9.83766478530815e-06, + "loss": 68905.0938, + "step": 86600 + }, + { + "epoch": 0.1749576796745274, + "grad_norm": 1734312.125, + "learning_rate": 9.837576548910836e-06, + "loss": 153756.1125, + "step": 86610 + }, + { + "epoch": 0.17497788030721123, + "grad_norm": 1468473.625, + "learning_rate": 9.837488288935761e-06, + "loss": 127244.2375, + "step": 86620 + }, + { + "epoch": 0.17499808093989505, + "grad_norm": 187187.59375, + "learning_rate": 9.837400005383355e-06, + "loss": 81272.375, + "step": 86630 + }, + { + "epoch": 0.17501828157257884, + "grad_norm": 117524.921875, + "learning_rate": 9.837311698254048e-06, + "loss": 71235.5813, + "step": 86640 + }, + { + "epoch": 0.17503848220526266, + "grad_norm": 0.0, + "learning_rate": 9.837223367548271e-06, + "loss": 157908.75, + "step": 86650 + }, + { + "epoch": 0.17505868283794648, + "grad_norm": 18317902.0, + "learning_rate": 9.837135013266452e-06, + "loss": 188816.4875, + "step": 86660 + }, + { + "epoch": 0.1750788834706303, + "grad_norm": 235204.921875, + "learning_rate": 9.837046635409026e-06, + "loss": 61511.9375, + "step": 86670 + }, + { + "epoch": 0.17509908410331412, + "grad_norm": 63302.91015625, + "learning_rate": 9.83695823397642e-06, + "loss": 62661.3875, + "step": 86680 + }, + { + "epoch": 0.17511928473599794, + "grad_norm": 66332.1015625, + "learning_rate": 9.836869808969068e-06, + "loss": 116235.85, + "step": 86690 + }, + { + "epoch": 0.17513948536868174, + "grad_norm": 13822.4189453125, + "learning_rate": 9.836781360387396e-06, + "loss": 48369.8625, + "step": 86700 + }, + { + "epoch": 0.17515968600136556, + "grad_norm": 1167251.75, + "learning_rate": 9.83669288823184e-06, + "loss": 113735.2625, + "step": 86710 + }, + { + "epoch": 0.17517988663404938, + "grad_norm": 1695114.875, + "learning_rate": 9.836604392502829e-06, + "loss": 160166.9125, + "step": 86720 + }, + { + "epoch": 0.1752000872667332, + "grad_norm": 464004.78125, + "learning_rate": 9.836515873200796e-06, + "loss": 124492.35, + "step": 86730 + }, + { + "epoch": 0.17522028789941702, + "grad_norm": 0.0, + "learning_rate": 9.83642733032617e-06, + "loss": 114468.975, + "step": 86740 + }, + { + "epoch": 0.17524048853210084, + "grad_norm": 6553453.0, + "learning_rate": 9.836338763879386e-06, + "loss": 124937.1, + "step": 86750 + }, + { + "epoch": 0.17526068916478463, + "grad_norm": 680203.875, + "learning_rate": 9.83625017386087e-06, + "loss": 147060.7625, + "step": 86760 + }, + { + "epoch": 0.17528088979746845, + "grad_norm": 164746.859375, + "learning_rate": 9.836161560271058e-06, + "loss": 78290.5188, + "step": 86770 + }, + { + "epoch": 0.17530109043015227, + "grad_norm": 1861044.0, + "learning_rate": 9.836072923110384e-06, + "loss": 101656.65, + "step": 86780 + }, + { + "epoch": 0.1753212910628361, + "grad_norm": 711220.5, + "learning_rate": 9.835984262379275e-06, + "loss": 145840.2125, + "step": 86790 + }, + { + "epoch": 0.1753414916955199, + "grad_norm": 31348.037109375, + "learning_rate": 9.835895578078165e-06, + "loss": 93141.3813, + "step": 86800 + }, + { + "epoch": 0.17536169232820373, + "grad_norm": 4772386.0, + "learning_rate": 9.835806870207487e-06, + "loss": 47452.1813, + "step": 86810 + }, + { + "epoch": 0.17538189296088755, + "grad_norm": 1031204.75, + "learning_rate": 9.835718138767672e-06, + "loss": 85874.475, + "step": 86820 + }, + { + "epoch": 0.17540209359357134, + "grad_norm": 1240233.125, + "learning_rate": 9.835629383759155e-06, + "loss": 28398.0875, + "step": 86830 + }, + { + "epoch": 0.17542229422625516, + "grad_norm": 35816.7421875, + "learning_rate": 9.835540605182366e-06, + "loss": 116295.85, + "step": 86840 + }, + { + "epoch": 0.17544249485893898, + "grad_norm": 8240268.0, + "learning_rate": 9.835451803037738e-06, + "loss": 274861.25, + "step": 86850 + }, + { + "epoch": 0.1754626954916228, + "grad_norm": 33075.2265625, + "learning_rate": 9.835362977325703e-06, + "loss": 83196.7437, + "step": 86860 + }, + { + "epoch": 0.17548289612430662, + "grad_norm": 2089644.0, + "learning_rate": 9.835274128046698e-06, + "loss": 48376.5031, + "step": 86870 + }, + { + "epoch": 0.17550309675699044, + "grad_norm": 15020932.0, + "learning_rate": 9.835185255201153e-06, + "loss": 94613.0562, + "step": 86880 + }, + { + "epoch": 0.17552329738967423, + "grad_norm": 14477169.0, + "learning_rate": 9.835096358789501e-06, + "loss": 174070.05, + "step": 86890 + }, + { + "epoch": 0.17554349802235805, + "grad_norm": 36845.98828125, + "learning_rate": 9.835007438812177e-06, + "loss": 86594.2937, + "step": 86900 + }, + { + "epoch": 0.17556369865504187, + "grad_norm": 785761.3125, + "learning_rate": 9.834918495269611e-06, + "loss": 56233.5813, + "step": 86910 + }, + { + "epoch": 0.1755838992877257, + "grad_norm": 1088767.0, + "learning_rate": 9.83482952816224e-06, + "loss": 78245.4438, + "step": 86920 + }, + { + "epoch": 0.1756040999204095, + "grad_norm": 141640.15625, + "learning_rate": 9.834740537490495e-06, + "loss": 163226.5, + "step": 86930 + }, + { + "epoch": 0.17562430055309333, + "grad_norm": 126636.6640625, + "learning_rate": 9.834651523254812e-06, + "loss": 147693.9875, + "step": 86940 + }, + { + "epoch": 0.17564450118577715, + "grad_norm": 5141.890625, + "learning_rate": 9.834562485455622e-06, + "loss": 26204.8125, + "step": 86950 + }, + { + "epoch": 0.17566470181846094, + "grad_norm": 2949435.25, + "learning_rate": 9.834473424093364e-06, + "loss": 104989.275, + "step": 86960 + }, + { + "epoch": 0.17568490245114476, + "grad_norm": 705700.0, + "learning_rate": 9.834384339168468e-06, + "loss": 57318.6625, + "step": 86970 + }, + { + "epoch": 0.17570510308382858, + "grad_norm": 2450244.0, + "learning_rate": 9.834295230681368e-06, + "loss": 117234.775, + "step": 86980 + }, + { + "epoch": 0.1757253037165124, + "grad_norm": 483808.375, + "learning_rate": 9.834206098632499e-06, + "loss": 96425.8562, + "step": 86990 + }, + { + "epoch": 0.17574550434919622, + "grad_norm": 1101204.875, + "learning_rate": 9.834116943022299e-06, + "loss": 87051.4438, + "step": 87000 + }, + { + "epoch": 0.17576570498188004, + "grad_norm": 325665.78125, + "learning_rate": 9.834027763851196e-06, + "loss": 118003.1, + "step": 87010 + }, + { + "epoch": 0.17578590561456384, + "grad_norm": 2356508.25, + "learning_rate": 9.833938561119629e-06, + "loss": 227500.7, + "step": 87020 + }, + { + "epoch": 0.17580610624724766, + "grad_norm": 64777.19921875, + "learning_rate": 9.833849334828033e-06, + "loss": 110772.2875, + "step": 87030 + }, + { + "epoch": 0.17582630687993148, + "grad_norm": 553325.5625, + "learning_rate": 9.833760084976838e-06, + "loss": 33744.8438, + "step": 87040 + }, + { + "epoch": 0.1758465075126153, + "grad_norm": 8709613.0, + "learning_rate": 9.833670811566485e-06, + "loss": 180638.7625, + "step": 87050 + }, + { + "epoch": 0.17586670814529912, + "grad_norm": 49198.921875, + "learning_rate": 9.833581514597408e-06, + "loss": 45254.1719, + "step": 87060 + }, + { + "epoch": 0.17588690877798294, + "grad_norm": 521808.75, + "learning_rate": 9.833492194070039e-06, + "loss": 83625.9187, + "step": 87070 + }, + { + "epoch": 0.17590710941066673, + "grad_norm": 3886424.0, + "learning_rate": 9.833402849984815e-06, + "loss": 140323.825, + "step": 87080 + }, + { + "epoch": 0.17592731004335055, + "grad_norm": 970534.6875, + "learning_rate": 9.833313482342173e-06, + "loss": 28691.1031, + "step": 87090 + }, + { + "epoch": 0.17594751067603437, + "grad_norm": 331999.0625, + "learning_rate": 9.833224091142548e-06, + "loss": 140306.6125, + "step": 87100 + }, + { + "epoch": 0.1759677113087182, + "grad_norm": 1626454.5, + "learning_rate": 9.833134676386373e-06, + "loss": 111163.1875, + "step": 87110 + }, + { + "epoch": 0.175987911941402, + "grad_norm": 71527.1640625, + "learning_rate": 9.833045238074085e-06, + "loss": 188155.85, + "step": 87120 + }, + { + "epoch": 0.17600811257408583, + "grad_norm": 112705.125, + "learning_rate": 9.832955776206123e-06, + "loss": 123316.1625, + "step": 87130 + }, + { + "epoch": 0.17602831320676965, + "grad_norm": 1484869.0, + "learning_rate": 9.832866290782922e-06, + "loss": 65465.025, + "step": 87140 + }, + { + "epoch": 0.17604851383945344, + "grad_norm": 2117150.5, + "learning_rate": 9.832776781804913e-06, + "loss": 20958.3156, + "step": 87150 + }, + { + "epoch": 0.17606871447213726, + "grad_norm": 1149343.0, + "learning_rate": 9.83268724927254e-06, + "loss": 85647.7188, + "step": 87160 + }, + { + "epoch": 0.17608891510482108, + "grad_norm": 2274704.5, + "learning_rate": 9.832597693186233e-06, + "loss": 88613.3, + "step": 87170 + }, + { + "epoch": 0.1761091157375049, + "grad_norm": 507913.0, + "learning_rate": 9.83250811354643e-06, + "loss": 78999.1125, + "step": 87180 + }, + { + "epoch": 0.17612931637018872, + "grad_norm": 24099806.0, + "learning_rate": 9.832418510353572e-06, + "loss": 86601.7625, + "step": 87190 + }, + { + "epoch": 0.17614951700287254, + "grad_norm": 641376.9375, + "learning_rate": 9.832328883608088e-06, + "loss": 98460.325, + "step": 87200 + }, + { + "epoch": 0.17616971763555633, + "grad_norm": 3808882.25, + "learning_rate": 9.832239233310421e-06, + "loss": 74898.5125, + "step": 87210 + }, + { + "epoch": 0.17618991826824015, + "grad_norm": 37805.1171875, + "learning_rate": 9.832149559461009e-06, + "loss": 72978.2312, + "step": 87220 + }, + { + "epoch": 0.17621011890092397, + "grad_norm": 473191.09375, + "learning_rate": 9.832059862060282e-06, + "loss": 178351.975, + "step": 87230 + }, + { + "epoch": 0.1762303195336078, + "grad_norm": 3041736.75, + "learning_rate": 9.831970141108684e-06, + "loss": 87074.575, + "step": 87240 + }, + { + "epoch": 0.1762505201662916, + "grad_norm": 6444904.5, + "learning_rate": 9.831880396606649e-06, + "loss": 59247.55, + "step": 87250 + }, + { + "epoch": 0.17627072079897543, + "grad_norm": 6208454.0, + "learning_rate": 9.831790628554613e-06, + "loss": 70910.9875, + "step": 87260 + }, + { + "epoch": 0.17629092143165925, + "grad_norm": 270012.9375, + "learning_rate": 9.831700836953017e-06, + "loss": 74887.7437, + "step": 87270 + }, + { + "epoch": 0.17631112206434305, + "grad_norm": 430161.8125, + "learning_rate": 9.831611021802297e-06, + "loss": 132254.875, + "step": 87280 + }, + { + "epoch": 0.17633132269702687, + "grad_norm": 1440557.125, + "learning_rate": 9.83152118310289e-06, + "loss": 186127.325, + "step": 87290 + }, + { + "epoch": 0.17635152332971069, + "grad_norm": 329283.1875, + "learning_rate": 9.831431320855235e-06, + "loss": 88113.4062, + "step": 87300 + }, + { + "epoch": 0.1763717239623945, + "grad_norm": 1520548.875, + "learning_rate": 9.831341435059772e-06, + "loss": 49077.9125, + "step": 87310 + }, + { + "epoch": 0.17639192459507833, + "grad_norm": 5628238.5, + "learning_rate": 9.831251525716934e-06, + "loss": 132873.0, + "step": 87320 + }, + { + "epoch": 0.17641212522776215, + "grad_norm": 784058.5625, + "learning_rate": 9.831161592827164e-06, + "loss": 171018.5625, + "step": 87330 + }, + { + "epoch": 0.17643232586044594, + "grad_norm": 1352816.25, + "learning_rate": 9.831071636390899e-06, + "loss": 56201.9187, + "step": 87340 + }, + { + "epoch": 0.17645252649312976, + "grad_norm": 193924.546875, + "learning_rate": 9.830981656408575e-06, + "loss": 104387.4375, + "step": 87350 + }, + { + "epoch": 0.17647272712581358, + "grad_norm": 7530664.0, + "learning_rate": 9.830891652880632e-06, + "loss": 77231.6313, + "step": 87360 + }, + { + "epoch": 0.1764929277584974, + "grad_norm": 262596.6875, + "learning_rate": 9.83080162580751e-06, + "loss": 90260.75, + "step": 87370 + }, + { + "epoch": 0.17651312839118122, + "grad_norm": 255340.03125, + "learning_rate": 9.830711575189646e-06, + "loss": 121423.375, + "step": 87380 + }, + { + "epoch": 0.17653332902386504, + "grad_norm": 1058119.875, + "learning_rate": 9.83062150102748e-06, + "loss": 41230.1531, + "step": 87390 + }, + { + "epoch": 0.17655352965654883, + "grad_norm": 0.0, + "learning_rate": 9.830531403321451e-06, + "loss": 47961.4781, + "step": 87400 + }, + { + "epoch": 0.17657373028923265, + "grad_norm": 614748.625, + "learning_rate": 9.830441282071999e-06, + "loss": 47465.3688, + "step": 87410 + }, + { + "epoch": 0.17659393092191647, + "grad_norm": 4105801.75, + "learning_rate": 9.830351137279559e-06, + "loss": 114176.2875, + "step": 87420 + }, + { + "epoch": 0.1766141315546003, + "grad_norm": 361561.84375, + "learning_rate": 9.830260968944577e-06, + "loss": 57792.1062, + "step": 87430 + }, + { + "epoch": 0.1766343321872841, + "grad_norm": 802321.875, + "learning_rate": 9.830170777067486e-06, + "loss": 142588.9375, + "step": 87440 + }, + { + "epoch": 0.17665453281996793, + "grad_norm": 415498.28125, + "learning_rate": 9.83008056164873e-06, + "loss": 70532.6875, + "step": 87450 + }, + { + "epoch": 0.17667473345265175, + "grad_norm": 2232953.75, + "learning_rate": 9.829990322688746e-06, + "loss": 95454.1187, + "step": 87460 + }, + { + "epoch": 0.17669493408533554, + "grad_norm": 11671390.0, + "learning_rate": 9.829900060187976e-06, + "loss": 93501.225, + "step": 87470 + }, + { + "epoch": 0.17671513471801936, + "grad_norm": 61315.34375, + "learning_rate": 9.82980977414686e-06, + "loss": 52905.4375, + "step": 87480 + }, + { + "epoch": 0.17673533535070318, + "grad_norm": 817796.625, + "learning_rate": 9.829719464565834e-06, + "loss": 94264.2312, + "step": 87490 + }, + { + "epoch": 0.176755535983387, + "grad_norm": 38579.3515625, + "learning_rate": 9.829629131445342e-06, + "loss": 58505.825, + "step": 87500 + }, + { + "epoch": 0.17677573661607082, + "grad_norm": 515511.78125, + "learning_rate": 9.829538774785825e-06, + "loss": 145477.9375, + "step": 87510 + }, + { + "epoch": 0.17679593724875464, + "grad_norm": 253993.625, + "learning_rate": 9.82944839458772e-06, + "loss": 52839.6562, + "step": 87520 + }, + { + "epoch": 0.17681613788143843, + "grad_norm": 490402.9375, + "learning_rate": 9.82935799085147e-06, + "loss": 80775.175, + "step": 87530 + }, + { + "epoch": 0.17683633851412225, + "grad_norm": 629926.1875, + "learning_rate": 9.829267563577514e-06, + "loss": 95000.55, + "step": 87540 + }, + { + "epoch": 0.17685653914680607, + "grad_norm": 2473902.0, + "learning_rate": 9.829177112766295e-06, + "loss": 83892.825, + "step": 87550 + }, + { + "epoch": 0.1768767397794899, + "grad_norm": 244855.890625, + "learning_rate": 9.829086638418252e-06, + "loss": 57329.95, + "step": 87560 + }, + { + "epoch": 0.17689694041217371, + "grad_norm": 1047293.9375, + "learning_rate": 9.828996140533826e-06, + "loss": 137510.7625, + "step": 87570 + }, + { + "epoch": 0.17691714104485753, + "grad_norm": 0.0, + "learning_rate": 9.82890561911346e-06, + "loss": 52838.2125, + "step": 87580 + }, + { + "epoch": 0.17693734167754135, + "grad_norm": 37141.24609375, + "learning_rate": 9.828815074157591e-06, + "loss": 61652.025, + "step": 87590 + }, + { + "epoch": 0.17695754231022515, + "grad_norm": 100018.3359375, + "learning_rate": 9.828724505666664e-06, + "loss": 84531.175, + "step": 87600 + }, + { + "epoch": 0.17697774294290897, + "grad_norm": 1304774.875, + "learning_rate": 9.82863391364112e-06, + "loss": 139855.925, + "step": 87610 + }, + { + "epoch": 0.1769979435755928, + "grad_norm": 1533170.0, + "learning_rate": 9.828543298081401e-06, + "loss": 86984.425, + "step": 87620 + }, + { + "epoch": 0.1770181442082766, + "grad_norm": 3295930.25, + "learning_rate": 9.828452658987946e-06, + "loss": 72385.8625, + "step": 87630 + }, + { + "epoch": 0.17703834484096043, + "grad_norm": 1344295.5, + "learning_rate": 9.828361996361199e-06, + "loss": 58126.2125, + "step": 87640 + }, + { + "epoch": 0.17705854547364425, + "grad_norm": 8780655.0, + "learning_rate": 9.828271310201601e-06, + "loss": 141580.475, + "step": 87650 + }, + { + "epoch": 0.17707874610632804, + "grad_norm": 627611.75, + "learning_rate": 9.828180600509595e-06, + "loss": 75011.9187, + "step": 87660 + }, + { + "epoch": 0.17709894673901186, + "grad_norm": 11259.9697265625, + "learning_rate": 9.828089867285622e-06, + "loss": 27812.0594, + "step": 87670 + }, + { + "epoch": 0.17711914737169568, + "grad_norm": 430098.28125, + "learning_rate": 9.827999110530124e-06, + "loss": 83087.5125, + "step": 87680 + }, + { + "epoch": 0.1771393480043795, + "grad_norm": 507727.65625, + "learning_rate": 9.827908330243545e-06, + "loss": 202861.675, + "step": 87690 + }, + { + "epoch": 0.17715954863706332, + "grad_norm": 1774631.125, + "learning_rate": 9.827817526426324e-06, + "loss": 123600.15, + "step": 87700 + }, + { + "epoch": 0.17717974926974714, + "grad_norm": 0.0, + "learning_rate": 9.827726699078907e-06, + "loss": 33065.6344, + "step": 87710 + }, + { + "epoch": 0.17719994990243093, + "grad_norm": 1874734.875, + "learning_rate": 9.827635848201737e-06, + "loss": 68323.9125, + "step": 87720 + }, + { + "epoch": 0.17722015053511475, + "grad_norm": 377234.71875, + "learning_rate": 9.827544973795254e-06, + "loss": 40162.5125, + "step": 87730 + }, + { + "epoch": 0.17724035116779857, + "grad_norm": 27508160.0, + "learning_rate": 9.827454075859904e-06, + "loss": 142135.075, + "step": 87740 + }, + { + "epoch": 0.1772605518004824, + "grad_norm": 3504268.75, + "learning_rate": 9.827363154396126e-06, + "loss": 137829.0625, + "step": 87750 + }, + { + "epoch": 0.1772807524331662, + "grad_norm": 494779.9375, + "learning_rate": 9.827272209404366e-06, + "loss": 123067.1125, + "step": 87760 + }, + { + "epoch": 0.17730095306585003, + "grad_norm": 539578.6875, + "learning_rate": 9.827181240885068e-06, + "loss": 45713.1781, + "step": 87770 + }, + { + "epoch": 0.17732115369853385, + "grad_norm": 1863918.875, + "learning_rate": 9.827090248838673e-06, + "loss": 57211.4, + "step": 87780 + }, + { + "epoch": 0.17734135433121764, + "grad_norm": 385770.75, + "learning_rate": 9.826999233265626e-06, + "loss": 87122.375, + "step": 87790 + }, + { + "epoch": 0.17736155496390146, + "grad_norm": 89345.3984375, + "learning_rate": 9.82690819416637e-06, + "loss": 123409.9, + "step": 87800 + }, + { + "epoch": 0.17738175559658528, + "grad_norm": 4773240.0, + "learning_rate": 9.826817131541349e-06, + "loss": 68819.0688, + "step": 87810 + }, + { + "epoch": 0.1774019562292691, + "grad_norm": 5113494.5, + "learning_rate": 9.826726045391006e-06, + "loss": 120508.3125, + "step": 87820 + }, + { + "epoch": 0.17742215686195292, + "grad_norm": 851541.0625, + "learning_rate": 9.826634935715787e-06, + "loss": 44523.6188, + "step": 87830 + }, + { + "epoch": 0.17744235749463674, + "grad_norm": 6606.6630859375, + "learning_rate": 9.826543802516135e-06, + "loss": 117629.2875, + "step": 87840 + }, + { + "epoch": 0.17746255812732054, + "grad_norm": 399863.75, + "learning_rate": 9.826452645792493e-06, + "loss": 61329.775, + "step": 87850 + }, + { + "epoch": 0.17748275876000436, + "grad_norm": 2488984.0, + "learning_rate": 9.826361465545306e-06, + "loss": 120222.7, + "step": 87860 + }, + { + "epoch": 0.17750295939268818, + "grad_norm": 153708.796875, + "learning_rate": 9.826270261775018e-06, + "loss": 175857.7875, + "step": 87870 + }, + { + "epoch": 0.177523160025372, + "grad_norm": 9680835.0, + "learning_rate": 9.826179034482074e-06, + "loss": 115138.7375, + "step": 87880 + }, + { + "epoch": 0.17754336065805582, + "grad_norm": 3408799.0, + "learning_rate": 9.82608778366692e-06, + "loss": 56353.2125, + "step": 87890 + }, + { + "epoch": 0.17756356129073964, + "grad_norm": 2759787.25, + "learning_rate": 9.825996509330001e-06, + "loss": 113706.9125, + "step": 87900 + }, + { + "epoch": 0.17758376192342346, + "grad_norm": 198035.140625, + "learning_rate": 9.825905211471757e-06, + "loss": 120675.9125, + "step": 87910 + }, + { + "epoch": 0.17760396255610725, + "grad_norm": 76491.09375, + "learning_rate": 9.825813890092639e-06, + "loss": 57800.3125, + "step": 87920 + }, + { + "epoch": 0.17762416318879107, + "grad_norm": 3637263.25, + "learning_rate": 9.825722545193087e-06, + "loss": 68991.8562, + "step": 87930 + }, + { + "epoch": 0.1776443638214749, + "grad_norm": 1125920.5, + "learning_rate": 9.82563117677355e-06, + "loss": 90234.3875, + "step": 87940 + }, + { + "epoch": 0.1776645644541587, + "grad_norm": 389263.5, + "learning_rate": 9.825539784834472e-06, + "loss": 79566.4563, + "step": 87950 + }, + { + "epoch": 0.17768476508684253, + "grad_norm": 631028.25, + "learning_rate": 9.825448369376298e-06, + "loss": 140451.4, + "step": 87960 + }, + { + "epoch": 0.17770496571952635, + "grad_norm": 1620725.0, + "learning_rate": 9.825356930399474e-06, + "loss": 59349.0625, + "step": 87970 + }, + { + "epoch": 0.17772516635221014, + "grad_norm": 7437803.0, + "learning_rate": 9.825265467904446e-06, + "loss": 77149.3625, + "step": 87980 + }, + { + "epoch": 0.17774536698489396, + "grad_norm": 9382148.0, + "learning_rate": 9.825173981891658e-06, + "loss": 192049.3625, + "step": 87990 + }, + { + "epoch": 0.17776556761757778, + "grad_norm": 10877518.0, + "learning_rate": 9.825082472361558e-06, + "loss": 137825.6875, + "step": 88000 + }, + { + "epoch": 0.1777857682502616, + "grad_norm": 3956570.75, + "learning_rate": 9.82499093931459e-06, + "loss": 52483.5062, + "step": 88010 + }, + { + "epoch": 0.17780596888294542, + "grad_norm": 10491219.0, + "learning_rate": 9.824899382751204e-06, + "loss": 72579.625, + "step": 88020 + }, + { + "epoch": 0.17782616951562924, + "grad_norm": 826513.0, + "learning_rate": 9.824807802671843e-06, + "loss": 73281.2937, + "step": 88030 + }, + { + "epoch": 0.17784637014831303, + "grad_norm": 18055698.0, + "learning_rate": 9.824716199076952e-06, + "loss": 116736.95, + "step": 88040 + }, + { + "epoch": 0.17786657078099685, + "grad_norm": 2216371.5, + "learning_rate": 9.824624571966982e-06, + "loss": 72153.425, + "step": 88050 + }, + { + "epoch": 0.17788677141368067, + "grad_norm": 1005439.9375, + "learning_rate": 9.824532921342375e-06, + "loss": 69530.1375, + "step": 88060 + }, + { + "epoch": 0.1779069720463645, + "grad_norm": 7131221.5, + "learning_rate": 9.82444124720358e-06, + "loss": 169313.225, + "step": 88070 + }, + { + "epoch": 0.1779271726790483, + "grad_norm": 655127.5625, + "learning_rate": 9.824349549551045e-06, + "loss": 123188.7, + "step": 88080 + }, + { + "epoch": 0.17794737331173213, + "grad_norm": 435865.21875, + "learning_rate": 9.824257828385213e-06, + "loss": 41345.85, + "step": 88090 + }, + { + "epoch": 0.17796757394441595, + "grad_norm": 744260.4375, + "learning_rate": 9.824166083706534e-06, + "loss": 32401.7281, + "step": 88100 + }, + { + "epoch": 0.17798777457709974, + "grad_norm": 212538.859375, + "learning_rate": 9.824074315515457e-06, + "loss": 69413.9875, + "step": 88110 + }, + { + "epoch": 0.17800797520978356, + "grad_norm": 20282938.0, + "learning_rate": 9.823982523812424e-06, + "loss": 170825.5375, + "step": 88120 + }, + { + "epoch": 0.17802817584246738, + "grad_norm": 1287899.0, + "learning_rate": 9.823890708597887e-06, + "loss": 39551.7781, + "step": 88130 + }, + { + "epoch": 0.1780483764751512, + "grad_norm": 6052351.0, + "learning_rate": 9.823798869872291e-06, + "loss": 53566.9375, + "step": 88140 + }, + { + "epoch": 0.17806857710783502, + "grad_norm": 9672551.0, + "learning_rate": 9.823707007636085e-06, + "loss": 77616.0813, + "step": 88150 + }, + { + "epoch": 0.17808877774051884, + "grad_norm": 45960.98046875, + "learning_rate": 9.823615121889716e-06, + "loss": 99297.4125, + "step": 88160 + }, + { + "epoch": 0.17810897837320264, + "grad_norm": 84570.1796875, + "learning_rate": 9.82352321263363e-06, + "loss": 65546.3687, + "step": 88170 + }, + { + "epoch": 0.17812917900588646, + "grad_norm": 46707.6796875, + "learning_rate": 9.823431279868278e-06, + "loss": 102650.8813, + "step": 88180 + }, + { + "epoch": 0.17814937963857028, + "grad_norm": 7258533.5, + "learning_rate": 9.823339323594107e-06, + "loss": 150509.15, + "step": 88190 + }, + { + "epoch": 0.1781695802712541, + "grad_norm": 11355108.0, + "learning_rate": 9.823247343811567e-06, + "loss": 68148.1687, + "step": 88200 + }, + { + "epoch": 0.17818978090393792, + "grad_norm": 388587.875, + "learning_rate": 9.823155340521104e-06, + "loss": 129362.3625, + "step": 88210 + }, + { + "epoch": 0.17820998153662174, + "grad_norm": 315456.75, + "learning_rate": 9.823063313723165e-06, + "loss": 49707.0344, + "step": 88220 + }, + { + "epoch": 0.17823018216930553, + "grad_norm": 494777.59375, + "learning_rate": 9.822971263418202e-06, + "loss": 63486.675, + "step": 88230 + }, + { + "epoch": 0.17825038280198935, + "grad_norm": 687479.9375, + "learning_rate": 9.82287918960666e-06, + "loss": 54691.075, + "step": 88240 + }, + { + "epoch": 0.17827058343467317, + "grad_norm": 162016.9375, + "learning_rate": 9.822787092288991e-06, + "loss": 123537.3375, + "step": 88250 + }, + { + "epoch": 0.178290784067357, + "grad_norm": 701956.3125, + "learning_rate": 9.822694971465643e-06, + "loss": 130371.1375, + "step": 88260 + }, + { + "epoch": 0.1783109847000408, + "grad_norm": 5135008.0, + "learning_rate": 9.822602827137065e-06, + "loss": 146526.575, + "step": 88270 + }, + { + "epoch": 0.17833118533272463, + "grad_norm": 1532398.25, + "learning_rate": 9.822510659303704e-06, + "loss": 47339.8094, + "step": 88280 + }, + { + "epoch": 0.17835138596540845, + "grad_norm": 3306258.0, + "learning_rate": 9.822418467966013e-06, + "loss": 170306.8375, + "step": 88290 + }, + { + "epoch": 0.17837158659809224, + "grad_norm": 13357180.0, + "learning_rate": 9.822326253124436e-06, + "loss": 121919.025, + "step": 88300 + }, + { + "epoch": 0.17839178723077606, + "grad_norm": 4387.37255859375, + "learning_rate": 9.82223401477943e-06, + "loss": 76549.25, + "step": 88310 + }, + { + "epoch": 0.17841198786345988, + "grad_norm": 547851.625, + "learning_rate": 9.822141752931438e-06, + "loss": 156488.925, + "step": 88320 + }, + { + "epoch": 0.1784321884961437, + "grad_norm": 1939500.75, + "learning_rate": 9.822049467580912e-06, + "loss": 105203.5625, + "step": 88330 + }, + { + "epoch": 0.17845238912882752, + "grad_norm": 10085220.0, + "learning_rate": 9.821957158728302e-06, + "loss": 103386.6375, + "step": 88340 + }, + { + "epoch": 0.17847258976151134, + "grad_norm": 4139.42919921875, + "learning_rate": 9.821864826374057e-06, + "loss": 41057.475, + "step": 88350 + }, + { + "epoch": 0.17849279039419513, + "grad_norm": 1553845.0, + "learning_rate": 9.82177247051863e-06, + "loss": 44315.6656, + "step": 88360 + }, + { + "epoch": 0.17851299102687895, + "grad_norm": 9467470.0, + "learning_rate": 9.821680091162466e-06, + "loss": 194011.5, + "step": 88370 + }, + { + "epoch": 0.17853319165956277, + "grad_norm": 8162918.5, + "learning_rate": 9.821587688306017e-06, + "loss": 50581.5594, + "step": 88380 + }, + { + "epoch": 0.1785533922922466, + "grad_norm": 1394337.5, + "learning_rate": 9.821495261949739e-06, + "loss": 32951.4656, + "step": 88390 + }, + { + "epoch": 0.1785735929249304, + "grad_norm": 5324667.0, + "learning_rate": 9.821402812094074e-06, + "loss": 56750.3, + "step": 88400 + }, + { + "epoch": 0.17859379355761423, + "grad_norm": 4551317.5, + "learning_rate": 9.821310338739478e-06, + "loss": 62632.975, + "step": 88410 + }, + { + "epoch": 0.17861399419029805, + "grad_norm": 497291.4375, + "learning_rate": 9.821217841886399e-06, + "loss": 104448.2063, + "step": 88420 + }, + { + "epoch": 0.17863419482298185, + "grad_norm": 6458404.0, + "learning_rate": 9.82112532153529e-06, + "loss": 269226.25, + "step": 88430 + }, + { + "epoch": 0.17865439545566567, + "grad_norm": 308737.84375, + "learning_rate": 9.821032777686601e-06, + "loss": 148153.95, + "step": 88440 + }, + { + "epoch": 0.17867459608834949, + "grad_norm": 762567.8125, + "learning_rate": 9.820940210340784e-06, + "loss": 160938.8, + "step": 88450 + }, + { + "epoch": 0.1786947967210333, + "grad_norm": 11906.7216796875, + "learning_rate": 9.820847619498288e-06, + "loss": 73396.325, + "step": 88460 + }, + { + "epoch": 0.17871499735371713, + "grad_norm": 735374.125, + "learning_rate": 9.820755005159565e-06, + "loss": 56813.8125, + "step": 88470 + }, + { + "epoch": 0.17873519798640095, + "grad_norm": 1481360.125, + "learning_rate": 9.820662367325067e-06, + "loss": 42252.05, + "step": 88480 + }, + { + "epoch": 0.17875539861908474, + "grad_norm": 128418.296875, + "learning_rate": 9.820569705995244e-06, + "loss": 68915.8313, + "step": 88490 + }, + { + "epoch": 0.17877559925176856, + "grad_norm": 7732297.5, + "learning_rate": 9.82047702117055e-06, + "loss": 78644.2563, + "step": 88500 + }, + { + "epoch": 0.17879579988445238, + "grad_norm": 64164.98828125, + "learning_rate": 9.820384312851437e-06, + "loss": 48830.9625, + "step": 88510 + }, + { + "epoch": 0.1788160005171362, + "grad_norm": 869629.9375, + "learning_rate": 9.820291581038354e-06, + "loss": 156209.3625, + "step": 88520 + }, + { + "epoch": 0.17883620114982002, + "grad_norm": 1359469.25, + "learning_rate": 9.820198825731757e-06, + "loss": 149279.0125, + "step": 88530 + }, + { + "epoch": 0.17885640178250384, + "grad_norm": 1836774.875, + "learning_rate": 9.820106046932092e-06, + "loss": 206072.3625, + "step": 88540 + }, + { + "epoch": 0.17887660241518763, + "grad_norm": 539462.3125, + "learning_rate": 9.820013244639817e-06, + "loss": 76931.95, + "step": 88550 + }, + { + "epoch": 0.17889680304787145, + "grad_norm": 270871.75, + "learning_rate": 9.81992041885538e-06, + "loss": 87912.0625, + "step": 88560 + }, + { + "epoch": 0.17891700368055527, + "grad_norm": 3305519.5, + "learning_rate": 9.819827569579237e-06, + "loss": 163756.2875, + "step": 88570 + }, + { + "epoch": 0.1789372043132391, + "grad_norm": 956274.1875, + "learning_rate": 9.819734696811839e-06, + "loss": 73618.825, + "step": 88580 + }, + { + "epoch": 0.1789574049459229, + "grad_norm": 4124541.0, + "learning_rate": 9.81964180055364e-06, + "loss": 138098.2125, + "step": 88590 + }, + { + "epoch": 0.17897760557860673, + "grad_norm": 1860355.125, + "learning_rate": 9.819548880805087e-06, + "loss": 78098.2563, + "step": 88600 + }, + { + "epoch": 0.17899780621129055, + "grad_norm": 15828338.0, + "learning_rate": 9.819455937566642e-06, + "loss": 96337.875, + "step": 88610 + }, + { + "epoch": 0.17901800684397434, + "grad_norm": 50871.76953125, + "learning_rate": 9.819362970838751e-06, + "loss": 147316.575, + "step": 88620 + }, + { + "epoch": 0.17903820747665816, + "grad_norm": 5999677.5, + "learning_rate": 9.819269980621869e-06, + "loss": 91961.4563, + "step": 88630 + }, + { + "epoch": 0.17905840810934198, + "grad_norm": 1489525.5, + "learning_rate": 9.819176966916451e-06, + "loss": 131118.4875, + "step": 88640 + }, + { + "epoch": 0.1790786087420258, + "grad_norm": 918248.6875, + "learning_rate": 9.819083929722947e-06, + "loss": 218318.7, + "step": 88650 + }, + { + "epoch": 0.17909880937470962, + "grad_norm": 12062.0, + "learning_rate": 9.818990869041816e-06, + "loss": 59212.1375, + "step": 88660 + }, + { + "epoch": 0.17911901000739344, + "grad_norm": 0.0, + "learning_rate": 9.818897784873504e-06, + "loss": 136083.725, + "step": 88670 + }, + { + "epoch": 0.17913921064007723, + "grad_norm": 475748.46875, + "learning_rate": 9.818804677218472e-06, + "loss": 67516.45, + "step": 88680 + }, + { + "epoch": 0.17915941127276105, + "grad_norm": 210885.734375, + "learning_rate": 9.818711546077169e-06, + "loss": 88000.0, + "step": 88690 + }, + { + "epoch": 0.17917961190544487, + "grad_norm": 1384098.125, + "learning_rate": 9.81861839145005e-06, + "loss": 181382.3625, + "step": 88700 + }, + { + "epoch": 0.1791998125381287, + "grad_norm": 21899330.0, + "learning_rate": 9.818525213337568e-06, + "loss": 196689.325, + "step": 88710 + }, + { + "epoch": 0.17922001317081251, + "grad_norm": 673448.625, + "learning_rate": 9.818432011740181e-06, + "loss": 31547.75, + "step": 88720 + }, + { + "epoch": 0.17924021380349633, + "grad_norm": 977884.8125, + "learning_rate": 9.81833878665834e-06, + "loss": 86187.3375, + "step": 88730 + }, + { + "epoch": 0.17926041443618015, + "grad_norm": 1999688.5, + "learning_rate": 9.8182455380925e-06, + "loss": 91120.475, + "step": 88740 + }, + { + "epoch": 0.17928061506886395, + "grad_norm": 2733824.25, + "learning_rate": 9.818152266043115e-06, + "loss": 61257.5875, + "step": 88750 + }, + { + "epoch": 0.17930081570154777, + "grad_norm": 297565.0, + "learning_rate": 9.818058970510642e-06, + "loss": 113727.0, + "step": 88760 + }, + { + "epoch": 0.1793210163342316, + "grad_norm": 1833304.75, + "learning_rate": 9.817965651495533e-06, + "loss": 51678.6219, + "step": 88770 + }, + { + "epoch": 0.1793412169669154, + "grad_norm": 10912434.0, + "learning_rate": 9.817872308998242e-06, + "loss": 67602.0375, + "step": 88780 + }, + { + "epoch": 0.17936141759959923, + "grad_norm": 8589189.0, + "learning_rate": 9.817778943019228e-06, + "loss": 83054.3375, + "step": 88790 + }, + { + "epoch": 0.17938161823228305, + "grad_norm": 5412696.5, + "learning_rate": 9.817685553558945e-06, + "loss": 155025.325, + "step": 88800 + }, + { + "epoch": 0.17940181886496684, + "grad_norm": 29198.642578125, + "learning_rate": 9.817592140617844e-06, + "loss": 163890.35, + "step": 88810 + }, + { + "epoch": 0.17942201949765066, + "grad_norm": 3004811.75, + "learning_rate": 9.817498704196384e-06, + "loss": 83504.7688, + "step": 88820 + }, + { + "epoch": 0.17944222013033448, + "grad_norm": 2888104.5, + "learning_rate": 9.81740524429502e-06, + "loss": 149388.3125, + "step": 88830 + }, + { + "epoch": 0.1794624207630183, + "grad_norm": 1777254.875, + "learning_rate": 9.817311760914206e-06, + "loss": 72477.5, + "step": 88840 + }, + { + "epoch": 0.17948262139570212, + "grad_norm": 106149.0859375, + "learning_rate": 9.8172182540544e-06, + "loss": 163320.575, + "step": 88850 + }, + { + "epoch": 0.17950282202838594, + "grad_norm": 977337.5, + "learning_rate": 9.817124723716057e-06, + "loss": 67937.875, + "step": 88860 + }, + { + "epoch": 0.17952302266106973, + "grad_norm": 3496922.0, + "learning_rate": 9.817031169899631e-06, + "loss": 90521.0188, + "step": 88870 + }, + { + "epoch": 0.17954322329375355, + "grad_norm": 5177572.5, + "learning_rate": 9.81693759260558e-06, + "loss": 113890.2375, + "step": 88880 + }, + { + "epoch": 0.17956342392643737, + "grad_norm": 0.0, + "learning_rate": 9.81684399183436e-06, + "loss": 96833.3875, + "step": 88890 + }, + { + "epoch": 0.1795836245591212, + "grad_norm": 6074084.0, + "learning_rate": 9.816750367586424e-06, + "loss": 195156.55, + "step": 88900 + }, + { + "epoch": 0.179603825191805, + "grad_norm": 3064107.25, + "learning_rate": 9.816656719862234e-06, + "loss": 60297.7688, + "step": 88910 + }, + { + "epoch": 0.17962402582448883, + "grad_norm": 261443.59375, + "learning_rate": 9.816563048662242e-06, + "loss": 25215.5938, + "step": 88920 + }, + { + "epoch": 0.17964422645717265, + "grad_norm": 976161.1875, + "learning_rate": 9.816469353986905e-06, + "loss": 70094.1562, + "step": 88930 + }, + { + "epoch": 0.17966442708985644, + "grad_norm": 647582.625, + "learning_rate": 9.816375635836683e-06, + "loss": 88496.375, + "step": 88940 + }, + { + "epoch": 0.17968462772254026, + "grad_norm": 479616.6875, + "learning_rate": 9.816281894212028e-06, + "loss": 85340.4812, + "step": 88950 + }, + { + "epoch": 0.17970482835522408, + "grad_norm": 352016.1875, + "learning_rate": 9.8161881291134e-06, + "loss": 49062.6906, + "step": 88960 + }, + { + "epoch": 0.1797250289879079, + "grad_norm": 442382.6875, + "learning_rate": 9.816094340541256e-06, + "loss": 131146.85, + "step": 88970 + }, + { + "epoch": 0.17974522962059172, + "grad_norm": 25848.390625, + "learning_rate": 9.81600052849605e-06, + "loss": 128233.9875, + "step": 88980 + }, + { + "epoch": 0.17976543025327554, + "grad_norm": 888895.9375, + "learning_rate": 9.815906692978244e-06, + "loss": 72115.5625, + "step": 88990 + }, + { + "epoch": 0.17978563088595934, + "grad_norm": 584607.6875, + "learning_rate": 9.815812833988292e-06, + "loss": 59609.725, + "step": 89000 + }, + { + "epoch": 0.17980583151864316, + "grad_norm": 168161.8125, + "learning_rate": 9.815718951526651e-06, + "loss": 53100.875, + "step": 89010 + }, + { + "epoch": 0.17982603215132698, + "grad_norm": 3548334.25, + "learning_rate": 9.815625045593783e-06, + "loss": 135556.125, + "step": 89020 + }, + { + "epoch": 0.1798462327840108, + "grad_norm": 245859.765625, + "learning_rate": 9.81553111619014e-06, + "loss": 176504.2375, + "step": 89030 + }, + { + "epoch": 0.17986643341669462, + "grad_norm": 6271373.0, + "learning_rate": 9.815437163316182e-06, + "loss": 103744.3, + "step": 89040 + }, + { + "epoch": 0.17988663404937844, + "grad_norm": 435054.34375, + "learning_rate": 9.815343186972369e-06, + "loss": 71640.9312, + "step": 89050 + }, + { + "epoch": 0.17990683468206226, + "grad_norm": 137674.046875, + "learning_rate": 9.815249187159158e-06, + "loss": 87765.15, + "step": 89060 + }, + { + "epoch": 0.17992703531474605, + "grad_norm": 551518.9375, + "learning_rate": 9.815155163877003e-06, + "loss": 84015.6125, + "step": 89070 + }, + { + "epoch": 0.17994723594742987, + "grad_norm": 1052305.375, + "learning_rate": 9.81506111712637e-06, + "loss": 88208.4812, + "step": 89080 + }, + { + "epoch": 0.1799674365801137, + "grad_norm": 2344731.0, + "learning_rate": 9.81496704690771e-06, + "loss": 57236.075, + "step": 89090 + }, + { + "epoch": 0.1799876372127975, + "grad_norm": 421202.40625, + "learning_rate": 9.814872953221487e-06, + "loss": 48863.7031, + "step": 89100 + }, + { + "epoch": 0.18000783784548133, + "grad_norm": 114788.7578125, + "learning_rate": 9.814778836068154e-06, + "loss": 25007.9781, + "step": 89110 + }, + { + "epoch": 0.18002803847816515, + "grad_norm": 3345118.5, + "learning_rate": 9.814684695448176e-06, + "loss": 87111.2188, + "step": 89120 + }, + { + "epoch": 0.18004823911084894, + "grad_norm": 3370475.0, + "learning_rate": 9.814590531362006e-06, + "loss": 99943.925, + "step": 89130 + }, + { + "epoch": 0.18006843974353276, + "grad_norm": 3482309.25, + "learning_rate": 9.814496343810109e-06, + "loss": 91956.2375, + "step": 89140 + }, + { + "epoch": 0.18008864037621658, + "grad_norm": 4419446.0, + "learning_rate": 9.814402132792939e-06, + "loss": 83941.6938, + "step": 89150 + }, + { + "epoch": 0.1801088410089004, + "grad_norm": 1464741.875, + "learning_rate": 9.814307898310957e-06, + "loss": 207704.45, + "step": 89160 + }, + { + "epoch": 0.18012904164158422, + "grad_norm": 99389.828125, + "learning_rate": 9.814213640364623e-06, + "loss": 76468.35, + "step": 89170 + }, + { + "epoch": 0.18014924227426804, + "grad_norm": 134160.96875, + "learning_rate": 9.814119358954394e-06, + "loss": 35334.3375, + "step": 89180 + }, + { + "epoch": 0.18016944290695183, + "grad_norm": 3670235.25, + "learning_rate": 9.81402505408073e-06, + "loss": 95872.4563, + "step": 89190 + }, + { + "epoch": 0.18018964353963565, + "grad_norm": 229134.1875, + "learning_rate": 9.813930725744095e-06, + "loss": 128708.275, + "step": 89200 + }, + { + "epoch": 0.18020984417231947, + "grad_norm": 1024472.75, + "learning_rate": 9.813836373944945e-06, + "loss": 124981.4875, + "step": 89210 + }, + { + "epoch": 0.1802300448050033, + "grad_norm": 210478.59375, + "learning_rate": 9.813741998683738e-06, + "loss": 94990.725, + "step": 89220 + }, + { + "epoch": 0.1802502454376871, + "grad_norm": 208545.84375, + "learning_rate": 9.813647599960938e-06, + "loss": 74141.0188, + "step": 89230 + }, + { + "epoch": 0.18027044607037093, + "grad_norm": 31747.90234375, + "learning_rate": 9.813553177777005e-06, + "loss": 34696.725, + "step": 89240 + }, + { + "epoch": 0.18029064670305475, + "grad_norm": 2304568.75, + "learning_rate": 9.813458732132395e-06, + "loss": 161553.75, + "step": 89250 + }, + { + "epoch": 0.18031084733573854, + "grad_norm": 278094.84375, + "learning_rate": 9.813364263027572e-06, + "loss": 130670.0125, + "step": 89260 + }, + { + "epoch": 0.18033104796842236, + "grad_norm": 0.0, + "learning_rate": 9.813269770462995e-06, + "loss": 104484.1938, + "step": 89270 + }, + { + "epoch": 0.18035124860110618, + "grad_norm": 3990670.75, + "learning_rate": 9.813175254439125e-06, + "loss": 75766.1625, + "step": 89280 + }, + { + "epoch": 0.18037144923379, + "grad_norm": 372834.8125, + "learning_rate": 9.813080714956422e-06, + "loss": 28396.7, + "step": 89290 + }, + { + "epoch": 0.18039164986647382, + "grad_norm": 5268528.5, + "learning_rate": 9.812986152015349e-06, + "loss": 73495.7312, + "step": 89300 + }, + { + "epoch": 0.18041185049915764, + "grad_norm": 18210.107421875, + "learning_rate": 9.812891565616363e-06, + "loss": 138039.3375, + "step": 89310 + }, + { + "epoch": 0.18043205113184144, + "grad_norm": 2369000.0, + "learning_rate": 9.812796955759929e-06, + "loss": 129787.75, + "step": 89320 + }, + { + "epoch": 0.18045225176452526, + "grad_norm": 3377455.75, + "learning_rate": 9.812702322446506e-06, + "loss": 72827.4125, + "step": 89330 + }, + { + "epoch": 0.18047245239720908, + "grad_norm": 187460.671875, + "learning_rate": 9.812607665676555e-06, + "loss": 37692.7281, + "step": 89340 + }, + { + "epoch": 0.1804926530298929, + "grad_norm": 695349.0, + "learning_rate": 9.812512985450539e-06, + "loss": 124188.4125, + "step": 89350 + }, + { + "epoch": 0.18051285366257672, + "grad_norm": 209895.328125, + "learning_rate": 9.812418281768919e-06, + "loss": 75667.9312, + "step": 89360 + }, + { + "epoch": 0.18053305429526054, + "grad_norm": 1437616.625, + "learning_rate": 9.812323554632153e-06, + "loss": 116136.775, + "step": 89370 + }, + { + "epoch": 0.18055325492794436, + "grad_norm": 3144472.5, + "learning_rate": 9.812228804040708e-06, + "loss": 124927.65, + "step": 89380 + }, + { + "epoch": 0.18057345556062815, + "grad_norm": 1352282.0, + "learning_rate": 9.812134029995043e-06, + "loss": 46911.5125, + "step": 89390 + }, + { + "epoch": 0.18059365619331197, + "grad_norm": 786448.9375, + "learning_rate": 9.81203923249562e-06, + "loss": 81610.2375, + "step": 89400 + }, + { + "epoch": 0.1806138568259958, + "grad_norm": 9366410.0, + "learning_rate": 9.811944411542903e-06, + "loss": 79129.5, + "step": 89410 + }, + { + "epoch": 0.1806340574586796, + "grad_norm": 15245854.0, + "learning_rate": 9.811849567137351e-06, + "loss": 82725.925, + "step": 89420 + }, + { + "epoch": 0.18065425809136343, + "grad_norm": 128510.859375, + "learning_rate": 9.811754699279428e-06, + "loss": 129490.7875, + "step": 89430 + }, + { + "epoch": 0.18067445872404725, + "grad_norm": 20859.44921875, + "learning_rate": 9.811659807969596e-06, + "loss": 106422.3125, + "step": 89440 + }, + { + "epoch": 0.18069465935673104, + "grad_norm": 426114.71875, + "learning_rate": 9.811564893208317e-06, + "loss": 166912.225, + "step": 89450 + }, + { + "epoch": 0.18071485998941486, + "grad_norm": 437735.40625, + "learning_rate": 9.811469954996056e-06, + "loss": 85467.325, + "step": 89460 + }, + { + "epoch": 0.18073506062209868, + "grad_norm": 616757.6875, + "learning_rate": 9.811374993333274e-06, + "loss": 90584.8188, + "step": 89470 + }, + { + "epoch": 0.1807552612547825, + "grad_norm": 748371.3125, + "learning_rate": 9.811280008220432e-06, + "loss": 126958.3875, + "step": 89480 + }, + { + "epoch": 0.18077546188746632, + "grad_norm": 3568918.0, + "learning_rate": 9.811184999657996e-06, + "loss": 49574.1094, + "step": 89490 + }, + { + "epoch": 0.18079566252015014, + "grad_norm": 2500097.5, + "learning_rate": 9.811089967646427e-06, + "loss": 84114.125, + "step": 89500 + }, + { + "epoch": 0.18081586315283393, + "grad_norm": 2277338.0, + "learning_rate": 9.81099491218619e-06, + "loss": 58958.5375, + "step": 89510 + }, + { + "epoch": 0.18083606378551775, + "grad_norm": 270978.53125, + "learning_rate": 9.810899833277747e-06, + "loss": 79720.3375, + "step": 89520 + }, + { + "epoch": 0.18085626441820157, + "grad_norm": 1284333.25, + "learning_rate": 9.810804730921561e-06, + "loss": 74970.1375, + "step": 89530 + }, + { + "epoch": 0.1808764650508854, + "grad_norm": 2533807.75, + "learning_rate": 9.810709605118098e-06, + "loss": 76072.3, + "step": 89540 + }, + { + "epoch": 0.1808966656835692, + "grad_norm": 662452.25, + "learning_rate": 9.810614455867818e-06, + "loss": 44273.825, + "step": 89550 + }, + { + "epoch": 0.18091686631625303, + "grad_norm": 269282.9375, + "learning_rate": 9.810519283171189e-06, + "loss": 43757.1094, + "step": 89560 + }, + { + "epoch": 0.18093706694893685, + "grad_norm": 368944.59375, + "learning_rate": 9.810424087028669e-06, + "loss": 142003.175, + "step": 89570 + }, + { + "epoch": 0.18095726758162065, + "grad_norm": 200387.484375, + "learning_rate": 9.810328867440729e-06, + "loss": 51739.7719, + "step": 89580 + }, + { + "epoch": 0.18097746821430447, + "grad_norm": 99757.3125, + "learning_rate": 9.810233624407827e-06, + "loss": 143512.6375, + "step": 89590 + }, + { + "epoch": 0.18099766884698829, + "grad_norm": 32161.96484375, + "learning_rate": 9.81013835793043e-06, + "loss": 81484.8938, + "step": 89600 + }, + { + "epoch": 0.1810178694796721, + "grad_norm": 41007.203125, + "learning_rate": 9.810043068009002e-06, + "loss": 65825.3875, + "step": 89610 + }, + { + "epoch": 0.18103807011235593, + "grad_norm": 127564.78125, + "learning_rate": 9.809947754644009e-06, + "loss": 107584.8125, + "step": 89620 + }, + { + "epoch": 0.18105827074503975, + "grad_norm": 436511.40625, + "learning_rate": 9.809852417835913e-06, + "loss": 61895.3562, + "step": 89630 + }, + { + "epoch": 0.18107847137772354, + "grad_norm": 28572.412109375, + "learning_rate": 9.80975705758518e-06, + "loss": 47441.1969, + "step": 89640 + }, + { + "epoch": 0.18109867201040736, + "grad_norm": 325980.375, + "learning_rate": 9.809661673892274e-06, + "loss": 155590.5125, + "step": 89650 + }, + { + "epoch": 0.18111887264309118, + "grad_norm": 75501.71875, + "learning_rate": 9.80956626675766e-06, + "loss": 50750.1656, + "step": 89660 + }, + { + "epoch": 0.181139073275775, + "grad_norm": 7890255.5, + "learning_rate": 9.809470836181804e-06, + "loss": 88425.0312, + "step": 89670 + }, + { + "epoch": 0.18115927390845882, + "grad_norm": 178785.90625, + "learning_rate": 9.80937538216517e-06, + "loss": 128509.75, + "step": 89680 + }, + { + "epoch": 0.18117947454114264, + "grad_norm": 798995.5, + "learning_rate": 9.809279904708224e-06, + "loss": 137469.5, + "step": 89690 + }, + { + "epoch": 0.18119967517382646, + "grad_norm": 555256.75, + "learning_rate": 9.809184403811432e-06, + "loss": 90622.3562, + "step": 89700 + }, + { + "epoch": 0.18121987580651025, + "grad_norm": 50101.55859375, + "learning_rate": 9.809088879475257e-06, + "loss": 111030.15, + "step": 89710 + }, + { + "epoch": 0.18124007643919407, + "grad_norm": 415602.8125, + "learning_rate": 9.808993331700167e-06, + "loss": 47470.7438, + "step": 89720 + }, + { + "epoch": 0.1812602770718779, + "grad_norm": 3596961.25, + "learning_rate": 9.808897760486626e-06, + "loss": 169250.625, + "step": 89730 + }, + { + "epoch": 0.1812804777045617, + "grad_norm": 1274203.25, + "learning_rate": 9.808802165835101e-06, + "loss": 73464.075, + "step": 89740 + }, + { + "epoch": 0.18130067833724553, + "grad_norm": 2300204.75, + "learning_rate": 9.808706547746057e-06, + "loss": 67465.7625, + "step": 89750 + }, + { + "epoch": 0.18132087896992935, + "grad_norm": 2159103.75, + "learning_rate": 9.808610906219963e-06, + "loss": 59944.1312, + "step": 89760 + }, + { + "epoch": 0.18134107960261314, + "grad_norm": 1046730.8125, + "learning_rate": 9.80851524125728e-06, + "loss": 89832.475, + "step": 89770 + }, + { + "epoch": 0.18136128023529696, + "grad_norm": 872960.875, + "learning_rate": 9.808419552858477e-06, + "loss": 85610.5625, + "step": 89780 + }, + { + "epoch": 0.18138148086798078, + "grad_norm": 193798.390625, + "learning_rate": 9.808323841024021e-06, + "loss": 225754.15, + "step": 89790 + }, + { + "epoch": 0.1814016815006646, + "grad_norm": 4632880.5, + "learning_rate": 9.808228105754378e-06, + "loss": 60600.7125, + "step": 89800 + }, + { + "epoch": 0.18142188213334842, + "grad_norm": 273577.1875, + "learning_rate": 9.808132347050013e-06, + "loss": 48613.8562, + "step": 89810 + }, + { + "epoch": 0.18144208276603224, + "grad_norm": 1347654.375, + "learning_rate": 9.808036564911396e-06, + "loss": 65930.7312, + "step": 89820 + }, + { + "epoch": 0.18146228339871603, + "grad_norm": 119891.265625, + "learning_rate": 9.80794075933899e-06, + "loss": 46743.0125, + "step": 89830 + }, + { + "epoch": 0.18148248403139985, + "grad_norm": 99357.703125, + "learning_rate": 9.807844930333266e-06, + "loss": 39754.7375, + "step": 89840 + }, + { + "epoch": 0.18150268466408367, + "grad_norm": 3951823.0, + "learning_rate": 9.807749077894686e-06, + "loss": 125785.025, + "step": 89850 + }, + { + "epoch": 0.1815228852967675, + "grad_norm": 1541159.25, + "learning_rate": 9.807653202023723e-06, + "loss": 58028.4812, + "step": 89860 + }, + { + "epoch": 0.18154308592945131, + "grad_norm": 505737.6875, + "learning_rate": 9.80755730272084e-06, + "loss": 145485.9625, + "step": 89870 + }, + { + "epoch": 0.18156328656213513, + "grad_norm": 1038804.9375, + "learning_rate": 9.807461379986506e-06, + "loss": 36515.1687, + "step": 89880 + }, + { + "epoch": 0.18158348719481895, + "grad_norm": 1370274.125, + "learning_rate": 9.807365433821188e-06, + "loss": 35783.9656, + "step": 89890 + }, + { + "epoch": 0.18160368782750275, + "grad_norm": 154300.59375, + "learning_rate": 9.807269464225355e-06, + "loss": 27337.7375, + "step": 89900 + }, + { + "epoch": 0.18162388846018657, + "grad_norm": 2460595.25, + "learning_rate": 9.807173471199474e-06, + "loss": 100298.1187, + "step": 89910 + }, + { + "epoch": 0.1816440890928704, + "grad_norm": 2368849.0, + "learning_rate": 9.80707745474401e-06, + "loss": 57030.075, + "step": 89920 + }, + { + "epoch": 0.1816642897255542, + "grad_norm": 1553646.875, + "learning_rate": 9.806981414859435e-06, + "loss": 84571.7375, + "step": 89930 + }, + { + "epoch": 0.18168449035823803, + "grad_norm": 9750639.0, + "learning_rate": 9.806885351546215e-06, + "loss": 106193.25, + "step": 89940 + }, + { + "epoch": 0.18170469099092185, + "grad_norm": 96626.0078125, + "learning_rate": 9.806789264804821e-06, + "loss": 42854.5594, + "step": 89950 + }, + { + "epoch": 0.18172489162360564, + "grad_norm": 7760990.0, + "learning_rate": 9.806693154635719e-06, + "loss": 177059.175, + "step": 89960 + }, + { + "epoch": 0.18174509225628946, + "grad_norm": 964160.3125, + "learning_rate": 9.806597021039374e-06, + "loss": 52980.4563, + "step": 89970 + }, + { + "epoch": 0.18176529288897328, + "grad_norm": 330916.1875, + "learning_rate": 9.806500864016261e-06, + "loss": 47817.2031, + "step": 89980 + }, + { + "epoch": 0.1817854935216571, + "grad_norm": 2148338.75, + "learning_rate": 9.806404683566845e-06, + "loss": 73165.225, + "step": 89990 + }, + { + "epoch": 0.18180569415434092, + "grad_norm": 276317.28125, + "learning_rate": 9.806308479691595e-06, + "loss": 68712.0063, + "step": 90000 + }, + { + "epoch": 0.18182589478702474, + "grad_norm": 177122.890625, + "learning_rate": 9.80621225239098e-06, + "loss": 49250.7875, + "step": 90010 + }, + { + "epoch": 0.18184609541970856, + "grad_norm": 6888617.5, + "learning_rate": 9.806116001665471e-06, + "loss": 63568.4125, + "step": 90020 + }, + { + "epoch": 0.18186629605239235, + "grad_norm": 378049.84375, + "learning_rate": 9.806019727515534e-06, + "loss": 113221.6875, + "step": 90030 + }, + { + "epoch": 0.18188649668507617, + "grad_norm": 3884096.5, + "learning_rate": 9.805923429941642e-06, + "loss": 74195.2063, + "step": 90040 + }, + { + "epoch": 0.18190669731776, + "grad_norm": 18616810.0, + "learning_rate": 9.80582710894426e-06, + "loss": 168801.0625, + "step": 90050 + }, + { + "epoch": 0.1819268979504438, + "grad_norm": 1561681.875, + "learning_rate": 9.805730764523861e-06, + "loss": 78522.6125, + "step": 90060 + }, + { + "epoch": 0.18194709858312763, + "grad_norm": 1721541.5, + "learning_rate": 9.805634396680912e-06, + "loss": 98558.0375, + "step": 90070 + }, + { + "epoch": 0.18196729921581145, + "grad_norm": 3731703.75, + "learning_rate": 9.805538005415885e-06, + "loss": 61978.8812, + "step": 90080 + }, + { + "epoch": 0.18198749984849524, + "grad_norm": 4172699.25, + "learning_rate": 9.805441590729246e-06, + "loss": 101892.0875, + "step": 90090 + }, + { + "epoch": 0.18200770048117906, + "grad_norm": 679823.3125, + "learning_rate": 9.80534515262147e-06, + "loss": 115403.6125, + "step": 90100 + }, + { + "epoch": 0.18202790111386288, + "grad_norm": 191333.5, + "learning_rate": 9.805248691093023e-06, + "loss": 96589.1062, + "step": 90110 + }, + { + "epoch": 0.1820481017465467, + "grad_norm": 10212229.0, + "learning_rate": 9.805152206144378e-06, + "loss": 177423.9, + "step": 90120 + }, + { + "epoch": 0.18206830237923052, + "grad_norm": 2564155.25, + "learning_rate": 9.805055697776003e-06, + "loss": 70737.525, + "step": 90130 + }, + { + "epoch": 0.18208850301191434, + "grad_norm": 1224478.125, + "learning_rate": 9.80495916598837e-06, + "loss": 133039.525, + "step": 90140 + }, + { + "epoch": 0.18210870364459814, + "grad_norm": 5247399.5, + "learning_rate": 9.804862610781949e-06, + "loss": 90199.225, + "step": 90150 + }, + { + "epoch": 0.18212890427728196, + "grad_norm": 661914.25, + "learning_rate": 9.80476603215721e-06, + "loss": 48875.0375, + "step": 90160 + }, + { + "epoch": 0.18214910490996578, + "grad_norm": 6898221.0, + "learning_rate": 9.804669430114625e-06, + "loss": 100480.7625, + "step": 90170 + }, + { + "epoch": 0.1821693055426496, + "grad_norm": 302275.40625, + "learning_rate": 9.804572804654662e-06, + "loss": 69809.7125, + "step": 90180 + }, + { + "epoch": 0.18218950617533342, + "grad_norm": 2290630.0, + "learning_rate": 9.804476155777796e-06, + "loss": 111008.775, + "step": 90190 + }, + { + "epoch": 0.18220970680801724, + "grad_norm": 1126948.125, + "learning_rate": 9.804379483484493e-06, + "loss": 49432.1719, + "step": 90200 + }, + { + "epoch": 0.18222990744070106, + "grad_norm": 1812689.125, + "learning_rate": 9.80428278777523e-06, + "loss": 97070.1625, + "step": 90210 + }, + { + "epoch": 0.18225010807338485, + "grad_norm": 11493448.0, + "learning_rate": 9.804186068650474e-06, + "loss": 85559.075, + "step": 90220 + }, + { + "epoch": 0.18227030870606867, + "grad_norm": 2139579.25, + "learning_rate": 9.804089326110697e-06, + "loss": 123636.0125, + "step": 90230 + }, + { + "epoch": 0.1822905093387525, + "grad_norm": 1349572.75, + "learning_rate": 9.803992560156372e-06, + "loss": 69189.1, + "step": 90240 + }, + { + "epoch": 0.1823107099714363, + "grad_norm": 2480889.75, + "learning_rate": 9.803895770787972e-06, + "loss": 54369.1125, + "step": 90250 + }, + { + "epoch": 0.18233091060412013, + "grad_norm": 1338643.375, + "learning_rate": 9.803798958005965e-06, + "loss": 72867.675, + "step": 90260 + }, + { + "epoch": 0.18235111123680395, + "grad_norm": 937529.3125, + "learning_rate": 9.803702121810823e-06, + "loss": 145063.975, + "step": 90270 + }, + { + "epoch": 0.18237131186948774, + "grad_norm": 1124285.375, + "learning_rate": 9.803605262203022e-06, + "loss": 56968.1937, + "step": 90280 + }, + { + "epoch": 0.18239151250217156, + "grad_norm": 1166571.5, + "learning_rate": 9.80350837918303e-06, + "loss": 39622.8875, + "step": 90290 + }, + { + "epoch": 0.18241171313485538, + "grad_norm": 181237.9375, + "learning_rate": 9.803411472751321e-06, + "loss": 47841.2875, + "step": 90300 + }, + { + "epoch": 0.1824319137675392, + "grad_norm": 1495976.125, + "learning_rate": 9.803314542908368e-06, + "loss": 45273.0437, + "step": 90310 + }, + { + "epoch": 0.18245211440022302, + "grad_norm": 1875055.875, + "learning_rate": 9.803217589654642e-06, + "loss": 149837.6, + "step": 90320 + }, + { + "epoch": 0.18247231503290684, + "grad_norm": 2091609.375, + "learning_rate": 9.803120612990616e-06, + "loss": 106700.55, + "step": 90330 + }, + { + "epoch": 0.18249251566559066, + "grad_norm": 101923.7265625, + "learning_rate": 9.803023612916763e-06, + "loss": 32910.1937, + "step": 90340 + }, + { + "epoch": 0.18251271629827445, + "grad_norm": 2138162.25, + "learning_rate": 9.802926589433553e-06, + "loss": 62085.6375, + "step": 90350 + }, + { + "epoch": 0.18253291693095827, + "grad_norm": 4014063.5, + "learning_rate": 9.802829542541463e-06, + "loss": 75882.6625, + "step": 90360 + }, + { + "epoch": 0.1825531175636421, + "grad_norm": 2653093.0, + "learning_rate": 9.802732472240966e-06, + "loss": 114632.5625, + "step": 90370 + }, + { + "epoch": 0.1825733181963259, + "grad_norm": 477734.25, + "learning_rate": 9.802635378532531e-06, + "loss": 46731.7688, + "step": 90380 + }, + { + "epoch": 0.18259351882900973, + "grad_norm": 1380472.25, + "learning_rate": 9.802538261416635e-06, + "loss": 48513.7688, + "step": 90390 + }, + { + "epoch": 0.18261371946169355, + "grad_norm": 1267796.375, + "learning_rate": 9.80244112089375e-06, + "loss": 109987.6375, + "step": 90400 + }, + { + "epoch": 0.18263392009437734, + "grad_norm": 1330185.25, + "learning_rate": 9.802343956964348e-06, + "loss": 66577.9875, + "step": 90410 + }, + { + "epoch": 0.18265412072706116, + "grad_norm": 13157866.0, + "learning_rate": 9.802246769628906e-06, + "loss": 103225.5875, + "step": 90420 + }, + { + "epoch": 0.18267432135974498, + "grad_norm": 487976.15625, + "learning_rate": 9.802149558887895e-06, + "loss": 48391.8469, + "step": 90430 + }, + { + "epoch": 0.1826945219924288, + "grad_norm": 1581534.0, + "learning_rate": 9.802052324741789e-06, + "loss": 59603.7812, + "step": 90440 + }, + { + "epoch": 0.18271472262511262, + "grad_norm": 761137.375, + "learning_rate": 9.801955067191062e-06, + "loss": 159476.625, + "step": 90450 + }, + { + "epoch": 0.18273492325779644, + "grad_norm": 731356.3125, + "learning_rate": 9.80185778623619e-06, + "loss": 67294.9125, + "step": 90460 + }, + { + "epoch": 0.18275512389048024, + "grad_norm": 4872016.0, + "learning_rate": 9.801760481877644e-06, + "loss": 52348.7875, + "step": 90470 + }, + { + "epoch": 0.18277532452316406, + "grad_norm": 3101220.25, + "learning_rate": 9.8016631541159e-06, + "loss": 44843.0344, + "step": 90480 + }, + { + "epoch": 0.18279552515584788, + "grad_norm": 354745.4375, + "learning_rate": 9.801565802951432e-06, + "loss": 72601.2125, + "step": 90490 + }, + { + "epoch": 0.1828157257885317, + "grad_norm": 581344.8125, + "learning_rate": 9.801468428384716e-06, + "loss": 85174.3875, + "step": 90500 + }, + { + "epoch": 0.18283592642121552, + "grad_norm": 16300.7099609375, + "learning_rate": 9.801371030416224e-06, + "loss": 69213.3125, + "step": 90510 + }, + { + "epoch": 0.18285612705389934, + "grad_norm": 3565864.5, + "learning_rate": 9.801273609046433e-06, + "loss": 123396.875, + "step": 90520 + }, + { + "epoch": 0.18287632768658316, + "grad_norm": 3584271.25, + "learning_rate": 9.801176164275816e-06, + "loss": 64632.125, + "step": 90530 + }, + { + "epoch": 0.18289652831926695, + "grad_norm": 2582068.75, + "learning_rate": 9.801078696104849e-06, + "loss": 169558.175, + "step": 90540 + }, + { + "epoch": 0.18291672895195077, + "grad_norm": 886623.0, + "learning_rate": 9.800981204534006e-06, + "loss": 142501.3875, + "step": 90550 + }, + { + "epoch": 0.1829369295846346, + "grad_norm": 1265280.0, + "learning_rate": 9.800883689563764e-06, + "loss": 93027.4, + "step": 90560 + }, + { + "epoch": 0.1829571302173184, + "grad_norm": 1542166.125, + "learning_rate": 9.800786151194596e-06, + "loss": 138418.4, + "step": 90570 + }, + { + "epoch": 0.18297733085000223, + "grad_norm": 5147257.0, + "learning_rate": 9.800688589426978e-06, + "loss": 180228.7875, + "step": 90580 + }, + { + "epoch": 0.18299753148268605, + "grad_norm": 1917551.375, + "learning_rate": 9.800591004261388e-06, + "loss": 42883.225, + "step": 90590 + }, + { + "epoch": 0.18301773211536984, + "grad_norm": 157823.53125, + "learning_rate": 9.8004933956983e-06, + "loss": 79349.6125, + "step": 90600 + }, + { + "epoch": 0.18303793274805366, + "grad_norm": 1282760.375, + "learning_rate": 9.800395763738189e-06, + "loss": 42438.4938, + "step": 90610 + }, + { + "epoch": 0.18305813338073748, + "grad_norm": 1631965.875, + "learning_rate": 9.80029810838153e-06, + "loss": 75633.775, + "step": 90620 + }, + { + "epoch": 0.1830783340134213, + "grad_norm": 9916.703125, + "learning_rate": 9.8002004296288e-06, + "loss": 47336.1719, + "step": 90630 + }, + { + "epoch": 0.18309853464610512, + "grad_norm": 1668216.75, + "learning_rate": 9.800102727480476e-06, + "loss": 87262.325, + "step": 90640 + }, + { + "epoch": 0.18311873527878894, + "grad_norm": 1070477.25, + "learning_rate": 9.800005001937034e-06, + "loss": 31128.9875, + "step": 90650 + }, + { + "epoch": 0.18313893591147276, + "grad_norm": 1939638.0, + "learning_rate": 9.79990725299895e-06, + "loss": 106177.5375, + "step": 90660 + }, + { + "epoch": 0.18315913654415655, + "grad_norm": 615412.6875, + "learning_rate": 9.7998094806667e-06, + "loss": 149085.1125, + "step": 90670 + }, + { + "epoch": 0.18317933717684037, + "grad_norm": 171541.6875, + "learning_rate": 9.79971168494076e-06, + "loss": 86899.6438, + "step": 90680 + }, + { + "epoch": 0.1831995378095242, + "grad_norm": 4342648.0, + "learning_rate": 9.799613865821608e-06, + "loss": 143915.175, + "step": 90690 + }, + { + "epoch": 0.183219738442208, + "grad_norm": 160314.96875, + "learning_rate": 9.799516023309719e-06, + "loss": 136139.3, + "step": 90700 + }, + { + "epoch": 0.18323993907489183, + "grad_norm": 390965.9375, + "learning_rate": 9.799418157405571e-06, + "loss": 108490.55, + "step": 90710 + }, + { + "epoch": 0.18326013970757565, + "grad_norm": 2866849.5, + "learning_rate": 9.799320268109644e-06, + "loss": 57908.3, + "step": 90720 + }, + { + "epoch": 0.18328034034025945, + "grad_norm": 943420.875, + "learning_rate": 9.799222355422409e-06, + "loss": 68164.7437, + "step": 90730 + }, + { + "epoch": 0.18330054097294327, + "grad_norm": 100640.0234375, + "learning_rate": 9.799124419344348e-06, + "loss": 70625.2312, + "step": 90740 + }, + { + "epoch": 0.18332074160562709, + "grad_norm": 287711.0, + "learning_rate": 9.799026459875935e-06, + "loss": 123428.9, + "step": 90750 + }, + { + "epoch": 0.1833409422383109, + "grad_norm": 153953.125, + "learning_rate": 9.798928477017651e-06, + "loss": 68207.625, + "step": 90760 + }, + { + "epoch": 0.18336114287099473, + "grad_norm": 470297.65625, + "learning_rate": 9.79883047076997e-06, + "loss": 142000.6875, + "step": 90770 + }, + { + "epoch": 0.18338134350367855, + "grad_norm": 3367302.25, + "learning_rate": 9.798732441133372e-06, + "loss": 143725.5375, + "step": 90780 + }, + { + "epoch": 0.18340154413636234, + "grad_norm": 1319303.75, + "learning_rate": 9.798634388108334e-06, + "loss": 90027.15, + "step": 90790 + }, + { + "epoch": 0.18342174476904616, + "grad_norm": 15759851.0, + "learning_rate": 9.798536311695334e-06, + "loss": 204907.675, + "step": 90800 + }, + { + "epoch": 0.18344194540172998, + "grad_norm": 317452.09375, + "learning_rate": 9.79843821189485e-06, + "loss": 90340.1313, + "step": 90810 + }, + { + "epoch": 0.1834621460344138, + "grad_norm": 6704780.0, + "learning_rate": 9.79834008870736e-06, + "loss": 96107.8687, + "step": 90820 + }, + { + "epoch": 0.18348234666709762, + "grad_norm": 15328.49609375, + "learning_rate": 9.798241942133344e-06, + "loss": 187266.775, + "step": 90830 + }, + { + "epoch": 0.18350254729978144, + "grad_norm": 76590.7890625, + "learning_rate": 9.798143772173276e-06, + "loss": 59414.8187, + "step": 90840 + }, + { + "epoch": 0.18352274793246526, + "grad_norm": 2643271.75, + "learning_rate": 9.79804557882764e-06, + "loss": 67620.2625, + "step": 90850 + }, + { + "epoch": 0.18354294856514905, + "grad_norm": 26092.03125, + "learning_rate": 9.797947362096909e-06, + "loss": 195254.6625, + "step": 90860 + }, + { + "epoch": 0.18356314919783287, + "grad_norm": 15324561.0, + "learning_rate": 9.797849121981566e-06, + "loss": 140534.775, + "step": 90870 + }, + { + "epoch": 0.1835833498305167, + "grad_norm": 132757.53125, + "learning_rate": 9.797750858482088e-06, + "loss": 57883.275, + "step": 90880 + }, + { + "epoch": 0.1836035504632005, + "grad_norm": 1744357.75, + "learning_rate": 9.797652571598954e-06, + "loss": 79155.6375, + "step": 90890 + }, + { + "epoch": 0.18362375109588433, + "grad_norm": 56569.1640625, + "learning_rate": 9.797554261332644e-06, + "loss": 95895.9375, + "step": 90900 + }, + { + "epoch": 0.18364395172856815, + "grad_norm": 1479606.25, + "learning_rate": 9.797455927683637e-06, + "loss": 150393.9375, + "step": 90910 + }, + { + "epoch": 0.18366415236125194, + "grad_norm": 66381.296875, + "learning_rate": 9.79735757065241e-06, + "loss": 118466.9875, + "step": 90920 + }, + { + "epoch": 0.18368435299393576, + "grad_norm": 2106037.75, + "learning_rate": 9.797259190239444e-06, + "loss": 144360.975, + "step": 90930 + }, + { + "epoch": 0.18370455362661958, + "grad_norm": 319415.875, + "learning_rate": 9.797160786445218e-06, + "loss": 45392.4656, + "step": 90940 + }, + { + "epoch": 0.1837247542593034, + "grad_norm": 7134059.0, + "learning_rate": 9.797062359270215e-06, + "loss": 136070.3875, + "step": 90950 + }, + { + "epoch": 0.18374495489198722, + "grad_norm": 15199285.0, + "learning_rate": 9.79696390871491e-06, + "loss": 210360.775, + "step": 90960 + }, + { + "epoch": 0.18376515552467104, + "grad_norm": 29299.00390625, + "learning_rate": 9.796865434779786e-06, + "loss": 101023.7563, + "step": 90970 + }, + { + "epoch": 0.18378535615735486, + "grad_norm": 6669269.0, + "learning_rate": 9.79676693746532e-06, + "loss": 53091.0813, + "step": 90980 + }, + { + "epoch": 0.18380555679003865, + "grad_norm": 11205046.0, + "learning_rate": 9.796668416771996e-06, + "loss": 132021.025, + "step": 90990 + }, + { + "epoch": 0.18382575742272247, + "grad_norm": 5249936.5, + "learning_rate": 9.796569872700287e-06, + "loss": 33454.0563, + "step": 91000 + }, + { + "epoch": 0.1838459580554063, + "grad_norm": 13490079.0, + "learning_rate": 9.796471305250683e-06, + "loss": 130569.7875, + "step": 91010 + }, + { + "epoch": 0.18386615868809011, + "grad_norm": 1194383.0, + "learning_rate": 9.79637271442366e-06, + "loss": 77995.025, + "step": 91020 + }, + { + "epoch": 0.18388635932077393, + "grad_norm": 157337.4375, + "learning_rate": 9.796274100219693e-06, + "loss": 34268.0906, + "step": 91030 + }, + { + "epoch": 0.18390655995345775, + "grad_norm": 738396.0, + "learning_rate": 9.796175462639273e-06, + "loss": 78974.1, + "step": 91040 + }, + { + "epoch": 0.18392676058614155, + "grad_norm": 2003400.875, + "learning_rate": 9.796076801682873e-06, + "loss": 77138.0125, + "step": 91050 + }, + { + "epoch": 0.18394696121882537, + "grad_norm": 3986083.5, + "learning_rate": 9.795978117350976e-06, + "loss": 67070.375, + "step": 91060 + }, + { + "epoch": 0.1839671618515092, + "grad_norm": 333013.65625, + "learning_rate": 9.795879409644064e-06, + "loss": 121485.625, + "step": 91070 + }, + { + "epoch": 0.183987362484193, + "grad_norm": 8210512.5, + "learning_rate": 9.795780678562618e-06, + "loss": 45789.8, + "step": 91080 + }, + { + "epoch": 0.18400756311687683, + "grad_norm": 606906.625, + "learning_rate": 9.79568192410712e-06, + "loss": 28249.1437, + "step": 91090 + }, + { + "epoch": 0.18402776374956065, + "grad_norm": 4376710.5, + "learning_rate": 9.795583146278047e-06, + "loss": 115183.6375, + "step": 91100 + }, + { + "epoch": 0.18404796438224444, + "grad_norm": 1097855.0, + "learning_rate": 9.795484345075882e-06, + "loss": 193242.0625, + "step": 91110 + }, + { + "epoch": 0.18406816501492826, + "grad_norm": 2513875.0, + "learning_rate": 9.795385520501113e-06, + "loss": 105722.0375, + "step": 91120 + }, + { + "epoch": 0.18408836564761208, + "grad_norm": 6217592.5, + "learning_rate": 9.795286672554214e-06, + "loss": 63221.65, + "step": 91130 + }, + { + "epoch": 0.1841085662802959, + "grad_norm": 1994697.75, + "learning_rate": 9.795187801235668e-06, + "loss": 114264.025, + "step": 91140 + }, + { + "epoch": 0.18412876691297972, + "grad_norm": 1862491.875, + "learning_rate": 9.795088906545959e-06, + "loss": 171758.525, + "step": 91150 + }, + { + "epoch": 0.18414896754566354, + "grad_norm": 1576807.5, + "learning_rate": 9.794989988485571e-06, + "loss": 86936.5562, + "step": 91160 + }, + { + "epoch": 0.18416916817834736, + "grad_norm": 278400.90625, + "learning_rate": 9.79489104705498e-06, + "loss": 156206.4875, + "step": 91170 + }, + { + "epoch": 0.18418936881103115, + "grad_norm": 6150995.0, + "learning_rate": 9.794792082254673e-06, + "loss": 108461.7125, + "step": 91180 + }, + { + "epoch": 0.18420956944371497, + "grad_norm": 900132.0625, + "learning_rate": 9.79469309408513e-06, + "loss": 84141.1938, + "step": 91190 + }, + { + "epoch": 0.1842297700763988, + "grad_norm": 11980327.0, + "learning_rate": 9.794594082546835e-06, + "loss": 194429.3125, + "step": 91200 + }, + { + "epoch": 0.1842499707090826, + "grad_norm": 68671.65625, + "learning_rate": 9.794495047640271e-06, + "loss": 152998.4375, + "step": 91210 + }, + { + "epoch": 0.18427017134176643, + "grad_norm": 2179158.25, + "learning_rate": 9.79439598936592e-06, + "loss": 99067.4688, + "step": 91220 + }, + { + "epoch": 0.18429037197445025, + "grad_norm": 69640.546875, + "learning_rate": 9.794296907724262e-06, + "loss": 84047.3625, + "step": 91230 + }, + { + "epoch": 0.18431057260713404, + "grad_norm": 2140523.75, + "learning_rate": 9.794197802715784e-06, + "loss": 91388.5437, + "step": 91240 + }, + { + "epoch": 0.18433077323981786, + "grad_norm": 10707817.0, + "learning_rate": 9.794098674340966e-06, + "loss": 130223.45, + "step": 91250 + }, + { + "epoch": 0.18435097387250168, + "grad_norm": 882873.4375, + "learning_rate": 9.793999522600293e-06, + "loss": 86296.5063, + "step": 91260 + }, + { + "epoch": 0.1843711745051855, + "grad_norm": 448172.65625, + "learning_rate": 9.793900347494248e-06, + "loss": 183297.3125, + "step": 91270 + }, + { + "epoch": 0.18439137513786932, + "grad_norm": 1074699.625, + "learning_rate": 9.793801149023315e-06, + "loss": 213893.625, + "step": 91280 + }, + { + "epoch": 0.18441157577055314, + "grad_norm": 7908159.5, + "learning_rate": 9.793701927187975e-06, + "loss": 81434.1313, + "step": 91290 + }, + { + "epoch": 0.18443177640323694, + "grad_norm": 286417.0, + "learning_rate": 9.793602681988714e-06, + "loss": 58155.5625, + "step": 91300 + }, + { + "epoch": 0.18445197703592076, + "grad_norm": 298880.875, + "learning_rate": 9.793503413426016e-06, + "loss": 78367.3813, + "step": 91310 + }, + { + "epoch": 0.18447217766860458, + "grad_norm": 609210.6875, + "learning_rate": 9.793404121500362e-06, + "loss": 123494.525, + "step": 91320 + }, + { + "epoch": 0.1844923783012884, + "grad_norm": 17218.26171875, + "learning_rate": 9.79330480621224e-06, + "loss": 110779.5875, + "step": 91330 + }, + { + "epoch": 0.18451257893397222, + "grad_norm": 4225872.0, + "learning_rate": 9.793205467562131e-06, + "loss": 141835.6125, + "step": 91340 + }, + { + "epoch": 0.18453277956665604, + "grad_norm": 1161477.75, + "learning_rate": 9.793106105550518e-06, + "loss": 51913.2812, + "step": 91350 + }, + { + "epoch": 0.18455298019933986, + "grad_norm": 1267203.0, + "learning_rate": 9.793006720177887e-06, + "loss": 90595.375, + "step": 91360 + }, + { + "epoch": 0.18457318083202365, + "grad_norm": 134007.953125, + "learning_rate": 9.792907311444724e-06, + "loss": 108553.0375, + "step": 91370 + }, + { + "epoch": 0.18459338146470747, + "grad_norm": 20010370.0, + "learning_rate": 9.792807879351513e-06, + "loss": 135632.575, + "step": 91380 + }, + { + "epoch": 0.1846135820973913, + "grad_norm": 344817.65625, + "learning_rate": 9.792708423898735e-06, + "loss": 59495.6937, + "step": 91390 + }, + { + "epoch": 0.1846337827300751, + "grad_norm": 1555779.75, + "learning_rate": 9.79260894508688e-06, + "loss": 97189.025, + "step": 91400 + }, + { + "epoch": 0.18465398336275893, + "grad_norm": 26366776.0, + "learning_rate": 9.79250944291643e-06, + "loss": 114780.3625, + "step": 91410 + }, + { + "epoch": 0.18467418399544275, + "grad_norm": 3561967.25, + "learning_rate": 9.792409917387869e-06, + "loss": 30735.1281, + "step": 91420 + }, + { + "epoch": 0.18469438462812654, + "grad_norm": 843804.5625, + "learning_rate": 9.792310368501684e-06, + "loss": 75427.6125, + "step": 91430 + }, + { + "epoch": 0.18471458526081036, + "grad_norm": 572538.1875, + "learning_rate": 9.792210796258358e-06, + "loss": 82701.6187, + "step": 91440 + }, + { + "epoch": 0.18473478589349418, + "grad_norm": 340465.90625, + "learning_rate": 9.79211120065838e-06, + "loss": 27222.9188, + "step": 91450 + }, + { + "epoch": 0.184754986526178, + "grad_norm": 970686.1875, + "learning_rate": 9.792011581702234e-06, + "loss": 88046.9062, + "step": 91460 + }, + { + "epoch": 0.18477518715886182, + "grad_norm": 23288.26171875, + "learning_rate": 9.791911939390401e-06, + "loss": 58283.6813, + "step": 91470 + }, + { + "epoch": 0.18479538779154564, + "grad_norm": 120390.71875, + "learning_rate": 9.791812273723374e-06, + "loss": 55077.3, + "step": 91480 + }, + { + "epoch": 0.18481558842422946, + "grad_norm": 679356.4375, + "learning_rate": 9.791712584701634e-06, + "loss": 105238.425, + "step": 91490 + }, + { + "epoch": 0.18483578905691325, + "grad_norm": 399435.84375, + "learning_rate": 9.791612872325667e-06, + "loss": 140183.525, + "step": 91500 + }, + { + "epoch": 0.18485598968959707, + "grad_norm": 6828486.0, + "learning_rate": 9.79151313659596e-06, + "loss": 97640.1125, + "step": 91510 + }, + { + "epoch": 0.1848761903222809, + "grad_norm": 78349.40625, + "learning_rate": 9.791413377513001e-06, + "loss": 116202.4375, + "step": 91520 + }, + { + "epoch": 0.1848963909549647, + "grad_norm": 800034.1875, + "learning_rate": 9.791313595077272e-06, + "loss": 72724.1625, + "step": 91530 + }, + { + "epoch": 0.18491659158764853, + "grad_norm": 29616.888671875, + "learning_rate": 9.791213789289264e-06, + "loss": 77092.9563, + "step": 91540 + }, + { + "epoch": 0.18493679222033235, + "grad_norm": 463307.875, + "learning_rate": 9.791113960149458e-06, + "loss": 71911.3188, + "step": 91550 + }, + { + "epoch": 0.18495699285301614, + "grad_norm": 502367.75, + "learning_rate": 9.791014107658348e-06, + "loss": 99806.0375, + "step": 91560 + }, + { + "epoch": 0.18497719348569996, + "grad_norm": 872782.1875, + "learning_rate": 9.790914231816414e-06, + "loss": 60132.775, + "step": 91570 + }, + { + "epoch": 0.18499739411838378, + "grad_norm": 22496.666015625, + "learning_rate": 9.790814332624144e-06, + "loss": 65296.3438, + "step": 91580 + }, + { + "epoch": 0.1850175947510676, + "grad_norm": 134701.328125, + "learning_rate": 9.790714410082027e-06, + "loss": 89924.2937, + "step": 91590 + }, + { + "epoch": 0.18503779538375142, + "grad_norm": 216438.640625, + "learning_rate": 9.79061446419055e-06, + "loss": 61145.3812, + "step": 91600 + }, + { + "epoch": 0.18505799601643524, + "grad_norm": 0.0, + "learning_rate": 9.790514494950196e-06, + "loss": 128118.525, + "step": 91610 + }, + { + "epoch": 0.18507819664911904, + "grad_norm": 294918.375, + "learning_rate": 9.790414502361458e-06, + "loss": 81501.125, + "step": 91620 + }, + { + "epoch": 0.18509839728180286, + "grad_norm": 6607256.0, + "learning_rate": 9.790314486424821e-06, + "loss": 128108.65, + "step": 91630 + }, + { + "epoch": 0.18511859791448668, + "grad_norm": 3548280.25, + "learning_rate": 9.790214447140771e-06, + "loss": 202347.65, + "step": 91640 + }, + { + "epoch": 0.1851387985471705, + "grad_norm": 379223.5, + "learning_rate": 9.790114384509796e-06, + "loss": 89967.9563, + "step": 91650 + }, + { + "epoch": 0.18515899917985432, + "grad_norm": 111693.59375, + "learning_rate": 9.790014298532386e-06, + "loss": 25946.2031, + "step": 91660 + }, + { + "epoch": 0.18517919981253814, + "grad_norm": 460151.53125, + "learning_rate": 9.789914189209028e-06, + "loss": 60741.6875, + "step": 91670 + }, + { + "epoch": 0.18519940044522196, + "grad_norm": 1461656.375, + "learning_rate": 9.789814056540207e-06, + "loss": 88255.95, + "step": 91680 + }, + { + "epoch": 0.18521960107790575, + "grad_norm": 0.0, + "learning_rate": 9.789713900526415e-06, + "loss": 54066.5563, + "step": 91690 + }, + { + "epoch": 0.18523980171058957, + "grad_norm": 2063105.125, + "learning_rate": 9.789613721168138e-06, + "loss": 116289.6, + "step": 91700 + }, + { + "epoch": 0.1852600023432734, + "grad_norm": 8267698.5, + "learning_rate": 9.789513518465866e-06, + "loss": 128047.025, + "step": 91710 + }, + { + "epoch": 0.1852802029759572, + "grad_norm": 5418409.5, + "learning_rate": 9.789413292420082e-06, + "loss": 97211.2625, + "step": 91720 + }, + { + "epoch": 0.18530040360864103, + "grad_norm": 12017473.0, + "learning_rate": 9.789313043031281e-06, + "loss": 54574.5, + "step": 91730 + }, + { + "epoch": 0.18532060424132485, + "grad_norm": 5901526.5, + "learning_rate": 9.78921277029995e-06, + "loss": 56295.1312, + "step": 91740 + }, + { + "epoch": 0.18534080487400864, + "grad_norm": 1534066.625, + "learning_rate": 9.789112474226575e-06, + "loss": 52210.0, + "step": 91750 + }, + { + "epoch": 0.18536100550669246, + "grad_norm": 81650.546875, + "learning_rate": 9.789012154811648e-06, + "loss": 26376.2406, + "step": 91760 + }, + { + "epoch": 0.18538120613937628, + "grad_norm": 1188312.0, + "learning_rate": 9.788911812055656e-06, + "loss": 45950.8969, + "step": 91770 + }, + { + "epoch": 0.1854014067720601, + "grad_norm": 110608.78125, + "learning_rate": 9.788811445959088e-06, + "loss": 105016.1625, + "step": 91780 + }, + { + "epoch": 0.18542160740474392, + "grad_norm": 1636863.25, + "learning_rate": 9.788711056522436e-06, + "loss": 135114.4875, + "step": 91790 + }, + { + "epoch": 0.18544180803742774, + "grad_norm": 1109733.375, + "learning_rate": 9.788610643746184e-06, + "loss": 57940.0062, + "step": 91800 + }, + { + "epoch": 0.18546200867011156, + "grad_norm": 313134.4375, + "learning_rate": 9.788510207630825e-06, + "loss": 73589.6625, + "step": 91810 + }, + { + "epoch": 0.18548220930279535, + "grad_norm": 684389.5, + "learning_rate": 9.78840974817685e-06, + "loss": 151044.7125, + "step": 91820 + }, + { + "epoch": 0.18550240993547917, + "grad_norm": 3574390.75, + "learning_rate": 9.788309265384745e-06, + "loss": 50912.775, + "step": 91830 + }, + { + "epoch": 0.185522610568163, + "grad_norm": 111858.671875, + "learning_rate": 9.788208759255003e-06, + "loss": 57507.8187, + "step": 91840 + }, + { + "epoch": 0.1855428112008468, + "grad_norm": 539452.3125, + "learning_rate": 9.788108229788111e-06, + "loss": 62765.2688, + "step": 91850 + }, + { + "epoch": 0.18556301183353063, + "grad_norm": 1788965.75, + "learning_rate": 9.788007676984562e-06, + "loss": 95013.15, + "step": 91860 + }, + { + "epoch": 0.18558321246621445, + "grad_norm": 182377.328125, + "learning_rate": 9.787907100844842e-06, + "loss": 119060.65, + "step": 91870 + }, + { + "epoch": 0.18560341309889825, + "grad_norm": 8524628.0, + "learning_rate": 9.787806501369446e-06, + "loss": 60023.625, + "step": 91880 + }, + { + "epoch": 0.18562361373158207, + "grad_norm": 1440722.0, + "learning_rate": 9.78770587855886e-06, + "loss": 116118.95, + "step": 91890 + }, + { + "epoch": 0.18564381436426589, + "grad_norm": 356104.15625, + "learning_rate": 9.787605232413575e-06, + "loss": 123584.2875, + "step": 91900 + }, + { + "epoch": 0.1856640149969497, + "grad_norm": 706609.5625, + "learning_rate": 9.787504562934085e-06, + "loss": 141602.3375, + "step": 91910 + }, + { + "epoch": 0.18568421562963353, + "grad_norm": 391109.6875, + "learning_rate": 9.787403870120877e-06, + "loss": 31702.5125, + "step": 91920 + }, + { + "epoch": 0.18570441626231735, + "grad_norm": 2100031.5, + "learning_rate": 9.787303153974444e-06, + "loss": 141225.975, + "step": 91930 + }, + { + "epoch": 0.18572461689500114, + "grad_norm": 712101.1875, + "learning_rate": 9.787202414495275e-06, + "loss": 72854.5875, + "step": 91940 + }, + { + "epoch": 0.18574481752768496, + "grad_norm": 425663.5, + "learning_rate": 9.787101651683864e-06, + "loss": 29961.9844, + "step": 91950 + }, + { + "epoch": 0.18576501816036878, + "grad_norm": 8389161.0, + "learning_rate": 9.787000865540698e-06, + "loss": 167638.8375, + "step": 91960 + }, + { + "epoch": 0.1857852187930526, + "grad_norm": 161710.96875, + "learning_rate": 9.786900056066272e-06, + "loss": 42259.8187, + "step": 91970 + }, + { + "epoch": 0.18580541942573642, + "grad_norm": 83854.8046875, + "learning_rate": 9.786799223261076e-06, + "loss": 117986.5375, + "step": 91980 + }, + { + "epoch": 0.18582562005842024, + "grad_norm": 292768.53125, + "learning_rate": 9.7866983671256e-06, + "loss": 86745.4187, + "step": 91990 + }, + { + "epoch": 0.18584582069110406, + "grad_norm": 6339637.5, + "learning_rate": 9.786597487660336e-06, + "loss": 76770.8125, + "step": 92000 } ], "logging_steps": 10,