{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.997899649941656, "eval_steps": 536, "global_step": 19278, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004667444574095683, "grad_norm": 4.125, "learning_rate": 2e-05, "loss": 2.581, "step": 1 }, { "epoch": 0.0004667444574095683, "eval_loss": 2.3898825645446777, "eval_runtime": 93.0875, "eval_samples_per_second": 19.38, "eval_steps_per_second": 2.428, "step": 1 }, { "epoch": 0.0009334889148191366, "grad_norm": 4.15625, "learning_rate": 4e-05, "loss": 2.5575, "step": 2 }, { "epoch": 0.0014002333722287048, "grad_norm": 4.5625, "learning_rate": 6e-05, "loss": 2.4575, "step": 3 }, { "epoch": 0.0018669778296382731, "grad_norm": 3.015625, "learning_rate": 8e-05, "loss": 2.1761, "step": 4 }, { "epoch": 0.002333722287047841, "grad_norm": 2.171875, "learning_rate": 0.0001, "loss": 2.0576, "step": 5 }, { "epoch": 0.0028004667444574095, "grad_norm": 1.1171875, "learning_rate": 0.00012, "loss": 1.7047, "step": 6 }, { "epoch": 0.003267211201866978, "grad_norm": 0.92578125, "learning_rate": 0.00014, "loss": 1.8031, "step": 7 }, { "epoch": 0.0037339556592765463, "grad_norm": 0.96484375, "learning_rate": 0.00016, "loss": 1.5404, "step": 8 }, { "epoch": 0.004200700116686115, "grad_norm": 0.921875, "learning_rate": 0.00018, "loss": 1.5804, "step": 9 }, { "epoch": 0.004667444574095682, "grad_norm": 0.75, "learning_rate": 0.0002, "loss": 1.4808, "step": 10 }, { "epoch": 0.005134189031505251, "grad_norm": 0.71484375, "learning_rate": 0.00019999999892344455, "loss": 1.4612, "step": 11 }, { "epoch": 0.005600933488914819, "grad_norm": 0.7578125, "learning_rate": 0.00019999999569377815, "loss": 1.4376, "step": 12 }, { "epoch": 0.006067677946324387, "grad_norm": 0.74609375, "learning_rate": 0.00019999999031100084, "loss": 1.4622, "step": 13 }, { "epoch": 0.006534422403733956, "grad_norm": 0.828125, "learning_rate": 0.00019999998277511286, "loss": 1.3237, "step": 14 }, { "epoch": 0.007001166861143524, "grad_norm": 0.76953125, "learning_rate": 0.00019999997308611426, "loss": 1.4007, "step": 15 }, { "epoch": 0.007467911318553093, "grad_norm": 0.74609375, "learning_rate": 0.0001999999612440053, "loss": 1.349, "step": 16 }, { "epoch": 0.007934655775962661, "grad_norm": 0.70703125, "learning_rate": 0.00019999994724878624, "loss": 1.2734, "step": 17 }, { "epoch": 0.00840140023337223, "grad_norm": 0.7890625, "learning_rate": 0.00019999993110045734, "loss": 1.3, "step": 18 }, { "epoch": 0.008868144690781798, "grad_norm": 0.75390625, "learning_rate": 0.00019999991279901898, "loss": 1.387, "step": 19 }, { "epoch": 0.009334889148191364, "grad_norm": 0.69140625, "learning_rate": 0.00019999989234447153, "loss": 1.2229, "step": 20 }, { "epoch": 0.009801633605600933, "grad_norm": 0.609375, "learning_rate": 0.00019999986973681547, "loss": 1.1438, "step": 21 }, { "epoch": 0.010268378063010501, "grad_norm": 0.85546875, "learning_rate": 0.00019999984497605125, "loss": 1.3672, "step": 22 }, { "epoch": 0.01073512252042007, "grad_norm": 0.6953125, "learning_rate": 0.00019999981806217943, "loss": 1.2266, "step": 23 }, { "epoch": 0.011201866977829638, "grad_norm": 0.69140625, "learning_rate": 0.00019999978899520055, "loss": 1.1961, "step": 24 }, { "epoch": 0.011668611435239206, "grad_norm": 0.703125, "learning_rate": 0.00019999975777511527, "loss": 1.2069, "step": 25 }, { "epoch": 0.012135355892648775, "grad_norm": 0.625, "learning_rate": 0.00019999972440192426, "loss": 1.0946, "step": 26 }, { "epoch": 0.012602100350058343, "grad_norm": 0.67578125, "learning_rate": 0.00019999968887562822, "loss": 1.2584, "step": 27 }, { "epoch": 0.013068844807467912, "grad_norm": 0.625, "learning_rate": 0.00019999965119622796, "loss": 1.1032, "step": 28 }, { "epoch": 0.01353558926487748, "grad_norm": 0.5859375, "learning_rate": 0.00019999961136372423, "loss": 0.9899, "step": 29 }, { "epoch": 0.014002333722287048, "grad_norm": 0.65625, "learning_rate": 0.0001999995693781179, "loss": 1.1128, "step": 30 }, { "epoch": 0.014469078179696617, "grad_norm": 0.69921875, "learning_rate": 0.00019999952523940993, "loss": 1.2303, "step": 31 }, { "epoch": 0.014935822637106185, "grad_norm": 0.6015625, "learning_rate": 0.0001999994789476012, "loss": 1.0485, "step": 32 }, { "epoch": 0.015402567094515752, "grad_norm": 0.5703125, "learning_rate": 0.00019999943050269273, "loss": 0.9912, "step": 33 }, { "epoch": 0.015869311551925322, "grad_norm": 0.6015625, "learning_rate": 0.0001999993799046856, "loss": 1.1316, "step": 34 }, { "epoch": 0.01633605600933489, "grad_norm": 0.65234375, "learning_rate": 0.00019999932715358086, "loss": 1.1705, "step": 35 }, { "epoch": 0.01680280046674446, "grad_norm": 0.578125, "learning_rate": 0.00019999927224937966, "loss": 1.0543, "step": 36 }, { "epoch": 0.017269544924154025, "grad_norm": 0.65625, "learning_rate": 0.00019999921519208317, "loss": 1.1632, "step": 37 }, { "epoch": 0.017736289381563596, "grad_norm": 0.6015625, "learning_rate": 0.00019999915598169264, "loss": 1.086, "step": 38 }, { "epoch": 0.018203033838973162, "grad_norm": 0.625, "learning_rate": 0.0001999990946182093, "loss": 1.2955, "step": 39 }, { "epoch": 0.01866977829638273, "grad_norm": 0.59375, "learning_rate": 0.00019999903110163453, "loss": 1.1241, "step": 40 }, { "epoch": 0.0191365227537923, "grad_norm": 0.61328125, "learning_rate": 0.00019999896543196964, "loss": 1.1581, "step": 41 }, { "epoch": 0.019603267211201866, "grad_norm": 0.6328125, "learning_rate": 0.0001999988976092161, "loss": 1.0129, "step": 42 }, { "epoch": 0.020070011668611436, "grad_norm": 0.703125, "learning_rate": 0.00019999882763337533, "loss": 1.258, "step": 43 }, { "epoch": 0.020536756126021002, "grad_norm": 0.63671875, "learning_rate": 0.00019999875550444886, "loss": 1.1702, "step": 44 }, { "epoch": 0.021003500583430573, "grad_norm": 0.6015625, "learning_rate": 0.00019999868122243825, "loss": 1.1404, "step": 45 }, { "epoch": 0.02147024504084014, "grad_norm": 0.55859375, "learning_rate": 0.00019999860478734505, "loss": 0.9923, "step": 46 }, { "epoch": 0.02193698949824971, "grad_norm": 0.58203125, "learning_rate": 0.00019999852619917094, "loss": 1.0718, "step": 47 }, { "epoch": 0.022403733955659276, "grad_norm": 0.66796875, "learning_rate": 0.00019999844545791763, "loss": 1.2165, "step": 48 }, { "epoch": 0.022870478413068846, "grad_norm": 0.5390625, "learning_rate": 0.00019999836256358685, "loss": 0.9337, "step": 49 }, { "epoch": 0.023337222870478413, "grad_norm": 0.57421875, "learning_rate": 0.00019999827751618032, "loss": 1.067, "step": 50 }, { "epoch": 0.023803967327887983, "grad_norm": 0.625, "learning_rate": 0.00019999819031569995, "loss": 1.0405, "step": 51 }, { "epoch": 0.02427071178529755, "grad_norm": 0.578125, "learning_rate": 0.0001999981009621476, "loss": 1.1471, "step": 52 }, { "epoch": 0.024737456242707116, "grad_norm": 0.578125, "learning_rate": 0.0001999980094555252, "loss": 1.0263, "step": 53 }, { "epoch": 0.025204200700116686, "grad_norm": 0.60546875, "learning_rate": 0.00019999791579583472, "loss": 1.0821, "step": 54 }, { "epoch": 0.025670945157526253, "grad_norm": 0.51171875, "learning_rate": 0.00019999781998307812, "loss": 0.9545, "step": 55 }, { "epoch": 0.026137689614935823, "grad_norm": 0.57421875, "learning_rate": 0.00019999772201725752, "loss": 1.1339, "step": 56 }, { "epoch": 0.02660443407234539, "grad_norm": 0.59375, "learning_rate": 0.00019999762189837502, "loss": 1.1229, "step": 57 }, { "epoch": 0.02707117852975496, "grad_norm": 0.60546875, "learning_rate": 0.00019999751962643277, "loss": 1.1392, "step": 58 }, { "epoch": 0.027537922987164527, "grad_norm": 0.56640625, "learning_rate": 0.00019999741520143298, "loss": 1.0464, "step": 59 }, { "epoch": 0.028004667444574097, "grad_norm": 0.56640625, "learning_rate": 0.0001999973086233779, "loss": 0.9927, "step": 60 }, { "epoch": 0.028471411901983663, "grad_norm": 0.57421875, "learning_rate": 0.0001999971998922698, "loss": 1.0365, "step": 61 }, { "epoch": 0.028938156359393234, "grad_norm": 0.5703125, "learning_rate": 0.00019999708900811104, "loss": 1.0833, "step": 62 }, { "epoch": 0.0294049008168028, "grad_norm": 0.5546875, "learning_rate": 0.00019999697597090398, "loss": 1.0158, "step": 63 }, { "epoch": 0.02987164527421237, "grad_norm": 0.6015625, "learning_rate": 0.00019999686078065113, "loss": 1.1899, "step": 64 }, { "epoch": 0.030338389731621937, "grad_norm": 0.5625, "learning_rate": 0.0001999967434373549, "loss": 1.0692, "step": 65 }, { "epoch": 0.030805134189031504, "grad_norm": 0.58203125, "learning_rate": 0.0001999966239410178, "loss": 1.0199, "step": 66 }, { "epoch": 0.03127187864644107, "grad_norm": 0.474609375, "learning_rate": 0.0001999965022916425, "loss": 0.9763, "step": 67 }, { "epoch": 0.031738623103850644, "grad_norm": 0.5390625, "learning_rate": 0.0001999963784892315, "loss": 1.002, "step": 68 }, { "epoch": 0.03220536756126021, "grad_norm": 0.59375, "learning_rate": 0.00019999625253378756, "loss": 1.1227, "step": 69 }, { "epoch": 0.03267211201866978, "grad_norm": 0.5390625, "learning_rate": 0.00019999612442531332, "loss": 1.0688, "step": 70 }, { "epoch": 0.033138856476079344, "grad_norm": 0.58203125, "learning_rate": 0.00019999599416381162, "loss": 1.0535, "step": 71 }, { "epoch": 0.03360560093348892, "grad_norm": 0.60546875, "learning_rate": 0.0001999958617492852, "loss": 1.2168, "step": 72 }, { "epoch": 0.034072345390898484, "grad_norm": 0.625, "learning_rate": 0.0001999957271817369, "loss": 1.0747, "step": 73 }, { "epoch": 0.03453908984830805, "grad_norm": 0.55859375, "learning_rate": 0.00019999559046116966, "loss": 1.0446, "step": 74 }, { "epoch": 0.03500583430571762, "grad_norm": 0.54296875, "learning_rate": 0.00019999545158758643, "loss": 1.0294, "step": 75 }, { "epoch": 0.03547257876312719, "grad_norm": 0.5078125, "learning_rate": 0.00019999531056099019, "loss": 1.0653, "step": 76 }, { "epoch": 0.03593932322053676, "grad_norm": 0.51171875, "learning_rate": 0.00019999516738138394, "loss": 0.9742, "step": 77 }, { "epoch": 0.036406067677946324, "grad_norm": 0.53125, "learning_rate": 0.00019999502204877078, "loss": 0.9658, "step": 78 }, { "epoch": 0.03687281213535589, "grad_norm": 0.5390625, "learning_rate": 0.00019999487456315385, "loss": 1.1283, "step": 79 }, { "epoch": 0.03733955659276546, "grad_norm": 0.55078125, "learning_rate": 0.00019999472492453636, "loss": 1.0825, "step": 80 }, { "epoch": 0.03780630105017503, "grad_norm": 0.5625, "learning_rate": 0.0001999945731329215, "loss": 1.1282, "step": 81 }, { "epoch": 0.0382730455075846, "grad_norm": 0.53125, "learning_rate": 0.0001999944191883125, "loss": 1.0473, "step": 82 }, { "epoch": 0.038739789964994165, "grad_norm": 0.55078125, "learning_rate": 0.00019999426309071273, "loss": 1.0766, "step": 83 }, { "epoch": 0.03920653442240373, "grad_norm": 0.53125, "learning_rate": 0.00019999410484012554, "loss": 1.0673, "step": 84 }, { "epoch": 0.039673278879813305, "grad_norm": 0.5625, "learning_rate": 0.00019999394443655428, "loss": 0.9415, "step": 85 }, { "epoch": 0.04014002333722287, "grad_norm": 0.9296875, "learning_rate": 0.00019999378188000248, "loss": 1.0481, "step": 86 }, { "epoch": 0.04060676779463244, "grad_norm": 0.5078125, "learning_rate": 0.00019999361717047362, "loss": 0.9316, "step": 87 }, { "epoch": 0.041073512252042005, "grad_norm": 0.625, "learning_rate": 0.00019999345030797123, "loss": 1.0445, "step": 88 }, { "epoch": 0.04154025670945158, "grad_norm": 0.515625, "learning_rate": 0.0001999932812924989, "loss": 0.9591, "step": 89 }, { "epoch": 0.042007001166861145, "grad_norm": 0.58203125, "learning_rate": 0.00019999311012406027, "loss": 1.0437, "step": 90 }, { "epoch": 0.04247374562427071, "grad_norm": 0.54296875, "learning_rate": 0.00019999293680265907, "loss": 1.0453, "step": 91 }, { "epoch": 0.04294049008168028, "grad_norm": 0.55859375, "learning_rate": 0.00019999276132829898, "loss": 1.1028, "step": 92 }, { "epoch": 0.043407234539089845, "grad_norm": 0.5390625, "learning_rate": 0.0001999925837009838, "loss": 0.9184, "step": 93 }, { "epoch": 0.04387397899649942, "grad_norm": 0.59375, "learning_rate": 0.00019999240392071731, "loss": 1.1142, "step": 94 }, { "epoch": 0.044340723453908985, "grad_norm": 0.5625, "learning_rate": 0.00019999222198750349, "loss": 1.0435, "step": 95 }, { "epoch": 0.04480746791131855, "grad_norm": 0.51953125, "learning_rate": 0.00019999203790134614, "loss": 0.9208, "step": 96 }, { "epoch": 0.04527421236872812, "grad_norm": 0.52734375, "learning_rate": 0.0001999918516622493, "loss": 1.0377, "step": 97 }, { "epoch": 0.04574095682613769, "grad_norm": 0.57421875, "learning_rate": 0.0001999916632702169, "loss": 1.0346, "step": 98 }, { "epoch": 0.04620770128354726, "grad_norm": 0.498046875, "learning_rate": 0.0001999914727252531, "loss": 0.8648, "step": 99 }, { "epoch": 0.046674445740956826, "grad_norm": 0.5234375, "learning_rate": 0.00019999128002736192, "loss": 0.9425, "step": 100 }, { "epoch": 0.04714119019836639, "grad_norm": 0.5234375, "learning_rate": 0.00019999108517654754, "loss": 0.9718, "step": 101 }, { "epoch": 0.047607934655775966, "grad_norm": 0.55859375, "learning_rate": 0.00019999088817281417, "loss": 0.9913, "step": 102 }, { "epoch": 0.04807467911318553, "grad_norm": 0.53125, "learning_rate": 0.00019999068901616602, "loss": 1.0457, "step": 103 }, { "epoch": 0.0485414235705951, "grad_norm": 0.53515625, "learning_rate": 0.0001999904877066074, "loss": 0.9852, "step": 104 }, { "epoch": 0.049008168028004666, "grad_norm": 0.55859375, "learning_rate": 0.00019999028424414267, "loss": 1.0041, "step": 105 }, { "epoch": 0.04947491248541423, "grad_norm": 0.578125, "learning_rate": 0.00019999007862877614, "loss": 1.0784, "step": 106 }, { "epoch": 0.049941656942823806, "grad_norm": 0.5, "learning_rate": 0.00019998987086051226, "loss": 1.0024, "step": 107 }, { "epoch": 0.05040840140023337, "grad_norm": 0.49609375, "learning_rate": 0.00019998966093935557, "loss": 0.9716, "step": 108 }, { "epoch": 0.05087514585764294, "grad_norm": 0.5625, "learning_rate": 0.0001999894488653105, "loss": 1.0776, "step": 109 }, { "epoch": 0.051341890315052506, "grad_norm": 0.50390625, "learning_rate": 0.0001999892346383817, "loss": 0.9575, "step": 110 }, { "epoch": 0.05180863477246208, "grad_norm": 0.54296875, "learning_rate": 0.0001999890182585737, "loss": 0.9859, "step": 111 }, { "epoch": 0.052275379229871646, "grad_norm": 0.60546875, "learning_rate": 0.00019998879972589118, "loss": 1.065, "step": 112 }, { "epoch": 0.05274212368728121, "grad_norm": 0.478515625, "learning_rate": 0.0001999885790403389, "loss": 0.9501, "step": 113 }, { "epoch": 0.05320886814469078, "grad_norm": 0.55078125, "learning_rate": 0.00019998835620192157, "loss": 1.0011, "step": 114 }, { "epoch": 0.05367561260210035, "grad_norm": 0.57421875, "learning_rate": 0.00019998813121064398, "loss": 1.0163, "step": 115 }, { "epoch": 0.05414235705950992, "grad_norm": 0.515625, "learning_rate": 0.000199987904066511, "loss": 0.9972, "step": 116 }, { "epoch": 0.05460910151691949, "grad_norm": 0.5546875, "learning_rate": 0.00019998767476952747, "loss": 0.9394, "step": 117 }, { "epoch": 0.05507584597432905, "grad_norm": 0.51171875, "learning_rate": 0.0001999874433196984, "loss": 0.9621, "step": 118 }, { "epoch": 0.05554259043173862, "grad_norm": 0.5703125, "learning_rate": 0.00019998720971702873, "loss": 0.9837, "step": 119 }, { "epoch": 0.056009334889148193, "grad_norm": 0.58203125, "learning_rate": 0.00019998697396152347, "loss": 1.1347, "step": 120 }, { "epoch": 0.05647607934655776, "grad_norm": 0.5234375, "learning_rate": 0.00019998673605318778, "loss": 0.902, "step": 121 }, { "epoch": 0.05694282380396733, "grad_norm": 0.55859375, "learning_rate": 0.0001999864959920267, "loss": 1.0605, "step": 122 }, { "epoch": 0.05740956826137689, "grad_norm": 0.546875, "learning_rate": 0.0001999862537780454, "loss": 1.0244, "step": 123 }, { "epoch": 0.05787631271878647, "grad_norm": 0.625, "learning_rate": 0.00019998600941124916, "loss": 1.2095, "step": 124 }, { "epoch": 0.058343057176196034, "grad_norm": 0.5078125, "learning_rate": 0.00019998576289164316, "loss": 0.9162, "step": 125 }, { "epoch": 0.0588098016336056, "grad_norm": 0.515625, "learning_rate": 0.00019998551421923278, "loss": 1.0131, "step": 126 }, { "epoch": 0.05927654609101517, "grad_norm": 0.49609375, "learning_rate": 0.00019998526339402332, "loss": 0.9307, "step": 127 }, { "epoch": 0.05974329054842474, "grad_norm": 0.451171875, "learning_rate": 0.00019998501041602022, "loss": 0.8748, "step": 128 }, { "epoch": 0.06021003500583431, "grad_norm": 0.578125, "learning_rate": 0.00019998475528522892, "loss": 1.088, "step": 129 }, { "epoch": 0.060676779463243874, "grad_norm": 0.5546875, "learning_rate": 0.0001999844980016549, "loss": 1.1424, "step": 130 }, { "epoch": 0.06114352392065344, "grad_norm": 0.470703125, "learning_rate": 0.0001999842385653037, "loss": 0.9336, "step": 131 }, { "epoch": 0.06161026837806301, "grad_norm": 0.53125, "learning_rate": 0.00019998397697618088, "loss": 1.0417, "step": 132 }, { "epoch": 0.06207701283547258, "grad_norm": 0.490234375, "learning_rate": 0.00019998371323429212, "loss": 0.9003, "step": 133 }, { "epoch": 0.06254375729288214, "grad_norm": 0.5703125, "learning_rate": 0.0001999834473396431, "loss": 1.0819, "step": 134 }, { "epoch": 0.06301050175029171, "grad_norm": 0.53125, "learning_rate": 0.0001999831792922395, "loss": 0.9295, "step": 135 }, { "epoch": 0.06347724620770129, "grad_norm": 0.51171875, "learning_rate": 0.0001999829090920871, "loss": 0.9579, "step": 136 }, { "epoch": 0.06394399066511085, "grad_norm": 0.50390625, "learning_rate": 0.00019998263673919175, "loss": 0.9949, "step": 137 }, { "epoch": 0.06441073512252042, "grad_norm": 0.546875, "learning_rate": 0.00019998236223355933, "loss": 0.9855, "step": 138 }, { "epoch": 0.06487747957993, "grad_norm": 0.5859375, "learning_rate": 0.00019998208557519572, "loss": 1.0609, "step": 139 }, { "epoch": 0.06534422403733955, "grad_norm": 0.55859375, "learning_rate": 0.00019998180676410684, "loss": 1.0431, "step": 140 }, { "epoch": 0.06581096849474913, "grad_norm": 0.53125, "learning_rate": 0.00019998152580029874, "loss": 0.9369, "step": 141 }, { "epoch": 0.06627771295215869, "grad_norm": 0.51171875, "learning_rate": 0.00019998124268377746, "loss": 0.8901, "step": 142 }, { "epoch": 0.06674445740956826, "grad_norm": 0.53515625, "learning_rate": 0.00019998095741454908, "loss": 0.9857, "step": 143 }, { "epoch": 0.06721120186697783, "grad_norm": 0.50390625, "learning_rate": 0.00019998066999261975, "loss": 0.924, "step": 144 }, { "epoch": 0.0676779463243874, "grad_norm": 0.53125, "learning_rate": 0.00019998038041799568, "loss": 1.0101, "step": 145 }, { "epoch": 0.06814469078179697, "grad_norm": 0.59375, "learning_rate": 0.00019998008869068313, "loss": 1.0248, "step": 146 }, { "epoch": 0.06861143523920653, "grad_norm": 0.5234375, "learning_rate": 0.0001999797948106883, "loss": 0.9731, "step": 147 }, { "epoch": 0.0690781796966161, "grad_norm": 0.46875, "learning_rate": 0.00019997949877801756, "loss": 0.8834, "step": 148 }, { "epoch": 0.06954492415402568, "grad_norm": 0.515625, "learning_rate": 0.00019997920059267727, "loss": 0.9599, "step": 149 }, { "epoch": 0.07001166861143523, "grad_norm": 0.546875, "learning_rate": 0.00019997890025467388, "loss": 1.0223, "step": 150 }, { "epoch": 0.07047841306884481, "grad_norm": 0.46875, "learning_rate": 0.00019997859776401384, "loss": 0.92, "step": 151 }, { "epoch": 0.07094515752625438, "grad_norm": 0.490234375, "learning_rate": 0.00019997829312070367, "loss": 0.9074, "step": 152 }, { "epoch": 0.07141190198366394, "grad_norm": 0.515625, "learning_rate": 0.0001999779863247499, "loss": 1.0465, "step": 153 }, { "epoch": 0.07187864644107352, "grad_norm": 0.51171875, "learning_rate": 0.00019997767737615915, "loss": 0.8792, "step": 154 }, { "epoch": 0.07234539089848308, "grad_norm": 0.51953125, "learning_rate": 0.00019997736627493811, "loss": 0.9099, "step": 155 }, { "epoch": 0.07281213535589265, "grad_norm": 0.5078125, "learning_rate": 0.00019997705302109343, "loss": 1.0126, "step": 156 }, { "epoch": 0.07327887981330222, "grad_norm": 0.5078125, "learning_rate": 0.00019997673761463185, "loss": 0.9207, "step": 157 }, { "epoch": 0.07374562427071178, "grad_norm": 0.5234375, "learning_rate": 0.00019997642005556022, "loss": 0.9632, "step": 158 }, { "epoch": 0.07421236872812136, "grad_norm": 0.48046875, "learning_rate": 0.0001999761003438853, "loss": 0.8723, "step": 159 }, { "epoch": 0.07467911318553092, "grad_norm": 0.5078125, "learning_rate": 0.00019997577847961407, "loss": 0.8775, "step": 160 }, { "epoch": 0.07514585764294049, "grad_norm": 0.482421875, "learning_rate": 0.00019997545446275337, "loss": 0.8926, "step": 161 }, { "epoch": 0.07561260210035006, "grad_norm": 0.50390625, "learning_rate": 0.00019997512829331022, "loss": 1.0073, "step": 162 }, { "epoch": 0.07607934655775962, "grad_norm": 0.51171875, "learning_rate": 0.00019997479997129164, "loss": 0.9234, "step": 163 }, { "epoch": 0.0765460910151692, "grad_norm": 0.5390625, "learning_rate": 0.00019997446949670468, "loss": 1.0193, "step": 164 }, { "epoch": 0.07701283547257877, "grad_norm": 0.4765625, "learning_rate": 0.00019997413686955647, "loss": 0.8789, "step": 165 }, { "epoch": 0.07747957992998833, "grad_norm": 0.474609375, "learning_rate": 0.00019997380208985416, "loss": 0.9006, "step": 166 }, { "epoch": 0.0779463243873979, "grad_norm": 0.5078125, "learning_rate": 0.00019997346515760497, "loss": 1.0905, "step": 167 }, { "epoch": 0.07841306884480746, "grad_norm": 0.53125, "learning_rate": 0.00019997312607281619, "loss": 1.023, "step": 168 }, { "epoch": 0.07887981330221704, "grad_norm": 0.5390625, "learning_rate": 0.00019997278483549504, "loss": 1.0344, "step": 169 }, { "epoch": 0.07934655775962661, "grad_norm": 0.494140625, "learning_rate": 0.00019997244144564892, "loss": 0.9687, "step": 170 }, { "epoch": 0.07981330221703617, "grad_norm": 0.52734375, "learning_rate": 0.00019997209590328523, "loss": 0.9775, "step": 171 }, { "epoch": 0.08028004667444574, "grad_norm": 0.5546875, "learning_rate": 0.0001999717482084114, "loss": 0.9761, "step": 172 }, { "epoch": 0.0807467911318553, "grad_norm": 0.53125, "learning_rate": 0.00019997139836103487, "loss": 0.9741, "step": 173 }, { "epoch": 0.08121353558926488, "grad_norm": 0.5, "learning_rate": 0.00019997104636116324, "loss": 0.9491, "step": 174 }, { "epoch": 0.08168028004667445, "grad_norm": 0.458984375, "learning_rate": 0.00019997069220880405, "loss": 0.9257, "step": 175 }, { "epoch": 0.08214702450408401, "grad_norm": 0.482421875, "learning_rate": 0.00019997033590396493, "loss": 0.9054, "step": 176 }, { "epoch": 0.08261376896149358, "grad_norm": 0.49609375, "learning_rate": 0.0001999699774466536, "loss": 1.0, "step": 177 }, { "epoch": 0.08308051341890316, "grad_norm": 0.5859375, "learning_rate": 0.00019996961683687773, "loss": 1.0593, "step": 178 }, { "epoch": 0.08354725787631272, "grad_norm": 0.546875, "learning_rate": 0.00019996925407464507, "loss": 0.949, "step": 179 }, { "epoch": 0.08401400233372229, "grad_norm": 0.515625, "learning_rate": 0.00019996888915996345, "loss": 0.9735, "step": 180 }, { "epoch": 0.08448074679113185, "grad_norm": 0.50390625, "learning_rate": 0.00019996852209284074, "loss": 0.9375, "step": 181 }, { "epoch": 0.08494749124854142, "grad_norm": 0.59375, "learning_rate": 0.00019996815287328483, "loss": 0.9325, "step": 182 }, { "epoch": 0.085414235705951, "grad_norm": 0.4921875, "learning_rate": 0.00019996778150130367, "loss": 0.8899, "step": 183 }, { "epoch": 0.08588098016336056, "grad_norm": 0.51171875, "learning_rate": 0.00019996740797690525, "loss": 1.0411, "step": 184 }, { "epoch": 0.08634772462077013, "grad_norm": 0.50390625, "learning_rate": 0.00019996703230009763, "loss": 0.962, "step": 185 }, { "epoch": 0.08681446907817969, "grad_norm": 0.50390625, "learning_rate": 0.0001999666544708889, "loss": 0.8849, "step": 186 }, { "epoch": 0.08728121353558926, "grad_norm": 0.478515625, "learning_rate": 0.00019996627448928716, "loss": 0.9275, "step": 187 }, { "epoch": 0.08774795799299884, "grad_norm": 0.50390625, "learning_rate": 0.00019996589235530063, "loss": 1.0102, "step": 188 }, { "epoch": 0.0882147024504084, "grad_norm": 0.6171875, "learning_rate": 0.00019996550806893754, "loss": 0.971, "step": 189 }, { "epoch": 0.08868144690781797, "grad_norm": 0.490234375, "learning_rate": 0.0001999651216302061, "loss": 0.8787, "step": 190 }, { "epoch": 0.08914819136522754, "grad_norm": 0.5546875, "learning_rate": 0.00019996473303911472, "loss": 1.032, "step": 191 }, { "epoch": 0.0896149358226371, "grad_norm": 0.48828125, "learning_rate": 0.00019996434229567172, "loss": 0.9348, "step": 192 }, { "epoch": 0.09008168028004668, "grad_norm": 0.51171875, "learning_rate": 0.0001999639493998855, "loss": 0.9619, "step": 193 }, { "epoch": 0.09054842473745624, "grad_norm": 0.486328125, "learning_rate": 0.00019996355435176455, "loss": 0.8818, "step": 194 }, { "epoch": 0.09101516919486581, "grad_norm": 0.50390625, "learning_rate": 0.00019996315715131737, "loss": 0.9511, "step": 195 }, { "epoch": 0.09148191365227538, "grad_norm": 0.53515625, "learning_rate": 0.00019996275779855249, "loss": 0.9715, "step": 196 }, { "epoch": 0.09194865810968494, "grad_norm": 0.53125, "learning_rate": 0.00019996235629347853, "loss": 0.9739, "step": 197 }, { "epoch": 0.09241540256709452, "grad_norm": 0.4765625, "learning_rate": 0.0001999619526361041, "loss": 0.8739, "step": 198 }, { "epoch": 0.09288214702450408, "grad_norm": 0.5, "learning_rate": 0.00019996154682643796, "loss": 0.9189, "step": 199 }, { "epoch": 0.09334889148191365, "grad_norm": 0.490234375, "learning_rate": 0.00019996113886448883, "loss": 0.9125, "step": 200 }, { "epoch": 0.09381563593932322, "grad_norm": 0.5078125, "learning_rate": 0.00019996072875026543, "loss": 0.8406, "step": 201 }, { "epoch": 0.09428238039673278, "grad_norm": 0.54296875, "learning_rate": 0.00019996031648377663, "loss": 0.8843, "step": 202 }, { "epoch": 0.09474912485414236, "grad_norm": 0.5234375, "learning_rate": 0.00019995990206503133, "loss": 1.0226, "step": 203 }, { "epoch": 0.09521586931155193, "grad_norm": 0.546875, "learning_rate": 0.0001999594854940384, "loss": 1.0359, "step": 204 }, { "epoch": 0.09568261376896149, "grad_norm": 0.51171875, "learning_rate": 0.00019995906677080686, "loss": 0.974, "step": 205 }, { "epoch": 0.09614935822637106, "grad_norm": 0.50390625, "learning_rate": 0.0001999586458953457, "loss": 0.8993, "step": 206 }, { "epoch": 0.09661610268378062, "grad_norm": 0.4609375, "learning_rate": 0.00019995822286766402, "loss": 0.9165, "step": 207 }, { "epoch": 0.0970828471411902, "grad_norm": 0.4921875, "learning_rate": 0.00019995779768777085, "loss": 0.8766, "step": 208 }, { "epoch": 0.09754959159859977, "grad_norm": 0.486328125, "learning_rate": 0.00019995737035567542, "loss": 0.9438, "step": 209 }, { "epoch": 0.09801633605600933, "grad_norm": 0.5546875, "learning_rate": 0.00019995694087138687, "loss": 0.9538, "step": 210 }, { "epoch": 0.0984830805134189, "grad_norm": 0.47265625, "learning_rate": 0.00019995650923491454, "loss": 0.8677, "step": 211 }, { "epoch": 0.09894982497082846, "grad_norm": 0.51171875, "learning_rate": 0.0001999560754462676, "loss": 0.9725, "step": 212 }, { "epoch": 0.09941656942823804, "grad_norm": 0.55078125, "learning_rate": 0.0001999556395054555, "loss": 0.9733, "step": 213 }, { "epoch": 0.09988331388564761, "grad_norm": 0.515625, "learning_rate": 0.00019995520141248752, "loss": 0.9579, "step": 214 }, { "epoch": 0.10035005834305717, "grad_norm": 0.5078125, "learning_rate": 0.0001999547611673732, "loss": 0.9401, "step": 215 }, { "epoch": 0.10081680280046675, "grad_norm": 0.56640625, "learning_rate": 0.00019995431877012197, "loss": 1.0415, "step": 216 }, { "epoch": 0.10128354725787632, "grad_norm": 0.55859375, "learning_rate": 0.00019995387422074332, "loss": 1.0081, "step": 217 }, { "epoch": 0.10175029171528588, "grad_norm": 0.47265625, "learning_rate": 0.0001999534275192469, "loss": 0.8797, "step": 218 }, { "epoch": 0.10221703617269545, "grad_norm": 0.5390625, "learning_rate": 0.00019995297866564227, "loss": 0.9835, "step": 219 }, { "epoch": 0.10268378063010501, "grad_norm": 0.474609375, "learning_rate": 0.00019995252765993908, "loss": 0.918, "step": 220 }, { "epoch": 0.10315052508751459, "grad_norm": 0.451171875, "learning_rate": 0.0001999520745021471, "loss": 0.824, "step": 221 }, { "epoch": 0.10361726954492416, "grad_norm": 0.49609375, "learning_rate": 0.00019995161919227606, "loss": 0.8907, "step": 222 }, { "epoch": 0.10408401400233372, "grad_norm": 0.54296875, "learning_rate": 0.00019995116173033575, "loss": 0.9877, "step": 223 }, { "epoch": 0.10455075845974329, "grad_norm": 0.52734375, "learning_rate": 0.00019995070211633603, "loss": 0.9514, "step": 224 }, { "epoch": 0.10501750291715285, "grad_norm": 0.4765625, "learning_rate": 0.00019995024035028677, "loss": 0.7733, "step": 225 }, { "epoch": 0.10548424737456243, "grad_norm": 0.625, "learning_rate": 0.000199949776432198, "loss": 0.9043, "step": 226 }, { "epoch": 0.105950991831972, "grad_norm": 0.46875, "learning_rate": 0.0001999493103620796, "loss": 0.7945, "step": 227 }, { "epoch": 0.10641773628938156, "grad_norm": 0.55078125, "learning_rate": 0.00019994884213994166, "loss": 0.9175, "step": 228 }, { "epoch": 0.10688448074679113, "grad_norm": 0.5, "learning_rate": 0.00019994837176579426, "loss": 0.8977, "step": 229 }, { "epoch": 0.1073512252042007, "grad_norm": 0.52734375, "learning_rate": 0.00019994789923964752, "loss": 0.9744, "step": 230 }, { "epoch": 0.10781796966161027, "grad_norm": 0.5, "learning_rate": 0.00019994742456151164, "loss": 0.9331, "step": 231 }, { "epoch": 0.10828471411901984, "grad_norm": 0.49609375, "learning_rate": 0.0001999469477313968, "loss": 1.0236, "step": 232 }, { "epoch": 0.1087514585764294, "grad_norm": 0.51953125, "learning_rate": 0.00019994646874931324, "loss": 0.9392, "step": 233 }, { "epoch": 0.10921820303383897, "grad_norm": 0.53515625, "learning_rate": 0.00019994598761527137, "loss": 1.0308, "step": 234 }, { "epoch": 0.10968494749124855, "grad_norm": 0.478515625, "learning_rate": 0.00019994550432928145, "loss": 0.9339, "step": 235 }, { "epoch": 0.1101516919486581, "grad_norm": 0.5078125, "learning_rate": 0.000199945018891354, "loss": 0.9856, "step": 236 }, { "epoch": 0.11061843640606768, "grad_norm": 0.484375, "learning_rate": 0.00019994453130149932, "loss": 0.754, "step": 237 }, { "epoch": 0.11108518086347724, "grad_norm": 0.498046875, "learning_rate": 0.00019994404155972804, "loss": 0.8992, "step": 238 }, { "epoch": 0.11155192532088681, "grad_norm": 0.59765625, "learning_rate": 0.00019994354966605064, "loss": 1.0772, "step": 239 }, { "epoch": 0.11201866977829639, "grad_norm": 0.55078125, "learning_rate": 0.0001999430556204777, "loss": 0.9704, "step": 240 }, { "epoch": 0.11248541423570595, "grad_norm": 0.6015625, "learning_rate": 0.00019994255942301994, "loss": 1.0512, "step": 241 }, { "epoch": 0.11295215869311552, "grad_norm": 0.4921875, "learning_rate": 0.00019994206107368795, "loss": 0.7931, "step": 242 }, { "epoch": 0.1134189031505251, "grad_norm": 0.515625, "learning_rate": 0.00019994156057249253, "loss": 0.7867, "step": 243 }, { "epoch": 0.11388564760793465, "grad_norm": 0.54296875, "learning_rate": 0.0001999410579194444, "loss": 0.9791, "step": 244 }, { "epoch": 0.11435239206534423, "grad_norm": 0.578125, "learning_rate": 0.0001999405531145544, "loss": 0.79, "step": 245 }, { "epoch": 0.11481913652275379, "grad_norm": 0.49609375, "learning_rate": 0.00019994004615783343, "loss": 0.8584, "step": 246 }, { "epoch": 0.11528588098016336, "grad_norm": 0.52734375, "learning_rate": 0.00019993953704929235, "loss": 0.8858, "step": 247 }, { "epoch": 0.11575262543757293, "grad_norm": 0.53515625, "learning_rate": 0.00019993902578894216, "loss": 0.9514, "step": 248 }, { "epoch": 0.1162193698949825, "grad_norm": 0.62890625, "learning_rate": 0.0001999385123767939, "loss": 0.9232, "step": 249 }, { "epoch": 0.11668611435239207, "grad_norm": 0.59375, "learning_rate": 0.0001999379968128585, "loss": 1.0322, "step": 250 }, { "epoch": 0.11715285880980163, "grad_norm": 0.482421875, "learning_rate": 0.00019993747909714723, "loss": 0.9013, "step": 251 }, { "epoch": 0.1176196032672112, "grad_norm": 0.52734375, "learning_rate": 0.0001999369592296711, "loss": 0.8958, "step": 252 }, { "epoch": 0.11808634772462077, "grad_norm": 0.515625, "learning_rate": 0.00019993643721044139, "loss": 0.9049, "step": 253 }, { "epoch": 0.11855309218203033, "grad_norm": 0.515625, "learning_rate": 0.0001999359130394693, "loss": 0.8687, "step": 254 }, { "epoch": 0.11901983663943991, "grad_norm": 0.54296875, "learning_rate": 0.0001999353867167661, "loss": 0.9216, "step": 255 }, { "epoch": 0.11948658109684948, "grad_norm": 0.5, "learning_rate": 0.00019993485824234316, "loss": 0.9323, "step": 256 }, { "epoch": 0.11995332555425904, "grad_norm": 0.498046875, "learning_rate": 0.00019993432761621186, "loss": 0.9418, "step": 257 }, { "epoch": 0.12042007001166861, "grad_norm": 0.4609375, "learning_rate": 0.00019993379483838357, "loss": 0.8182, "step": 258 }, { "epoch": 0.12088681446907817, "grad_norm": 0.466796875, "learning_rate": 0.00019993325990886983, "loss": 0.8715, "step": 259 }, { "epoch": 0.12135355892648775, "grad_norm": 0.453125, "learning_rate": 0.00019993272282768212, "loss": 0.7782, "step": 260 }, { "epoch": 0.12182030338389732, "grad_norm": 0.490234375, "learning_rate": 0.00019993218359483196, "loss": 0.8634, "step": 261 }, { "epoch": 0.12228704784130688, "grad_norm": 0.48828125, "learning_rate": 0.00019993164221033106, "loss": 0.7912, "step": 262 }, { "epoch": 0.12275379229871645, "grad_norm": 0.494140625, "learning_rate": 0.00019993109867419103, "loss": 0.8787, "step": 263 }, { "epoch": 0.12322053675612601, "grad_norm": 0.5, "learning_rate": 0.00019993055298642357, "loss": 0.8288, "step": 264 }, { "epoch": 0.12368728121353559, "grad_norm": 0.51171875, "learning_rate": 0.00019993000514704042, "loss": 0.8709, "step": 265 }, { "epoch": 0.12415402567094516, "grad_norm": 0.49609375, "learning_rate": 0.00019992945515605337, "loss": 0.8269, "step": 266 }, { "epoch": 0.12462077012835472, "grad_norm": 0.55859375, "learning_rate": 0.00019992890301347429, "loss": 0.9573, "step": 267 }, { "epoch": 0.12508751458576428, "grad_norm": 0.5234375, "learning_rate": 0.00019992834871931505, "loss": 0.9278, "step": 268 }, { "epoch": 0.12555425904317385, "grad_norm": 0.51953125, "learning_rate": 0.0001999277922735876, "loss": 0.8542, "step": 269 }, { "epoch": 0.12602100350058343, "grad_norm": 0.53125, "learning_rate": 0.00019992723367630392, "loss": 0.8666, "step": 270 }, { "epoch": 0.126487747957993, "grad_norm": 0.486328125, "learning_rate": 0.000199926672927476, "loss": 0.8194, "step": 271 }, { "epoch": 0.12695449241540258, "grad_norm": 0.5078125, "learning_rate": 0.00019992611002711595, "loss": 0.925, "step": 272 }, { "epoch": 0.12742123687281215, "grad_norm": 0.50390625, "learning_rate": 0.0001999255449752359, "loss": 0.8912, "step": 273 }, { "epoch": 0.1278879813302217, "grad_norm": 0.48828125, "learning_rate": 0.00019992497777184798, "loss": 0.8968, "step": 274 }, { "epoch": 0.12835472578763127, "grad_norm": 0.51171875, "learning_rate": 0.00019992440841696442, "loss": 0.9033, "step": 275 }, { "epoch": 0.12882147024504084, "grad_norm": 0.58203125, "learning_rate": 0.00019992383691059748, "loss": 1.0323, "step": 276 }, { "epoch": 0.12928821470245042, "grad_norm": 0.515625, "learning_rate": 0.0001999232632527595, "loss": 0.8634, "step": 277 }, { "epoch": 0.12975495915986, "grad_norm": 0.53125, "learning_rate": 0.00019992268744346275, "loss": 0.9324, "step": 278 }, { "epoch": 0.13022170361726954, "grad_norm": 0.498046875, "learning_rate": 0.00019992210948271968, "loss": 0.9209, "step": 279 }, { "epoch": 0.1306884480746791, "grad_norm": 0.5078125, "learning_rate": 0.00019992152937054272, "loss": 0.9396, "step": 280 }, { "epoch": 0.13115519253208868, "grad_norm": 0.48046875, "learning_rate": 0.0001999209471069444, "loss": 0.9412, "step": 281 }, { "epoch": 0.13162193698949826, "grad_norm": 0.44921875, "learning_rate": 0.0001999203626919372, "loss": 0.8208, "step": 282 }, { "epoch": 0.13208868144690783, "grad_norm": 0.5, "learning_rate": 0.00019991977612553372, "loss": 0.8783, "step": 283 }, { "epoch": 0.13255542590431738, "grad_norm": 0.453125, "learning_rate": 0.0001999191874077466, "loss": 0.8393, "step": 284 }, { "epoch": 0.13302217036172695, "grad_norm": 0.5703125, "learning_rate": 0.00019991859653858854, "loss": 1.0623, "step": 285 }, { "epoch": 0.13348891481913652, "grad_norm": 0.5625, "learning_rate": 0.00019991800351807222, "loss": 1.002, "step": 286 }, { "epoch": 0.1339556592765461, "grad_norm": 0.51171875, "learning_rate": 0.00019991740834621042, "loss": 0.8858, "step": 287 }, { "epoch": 0.13442240373395567, "grad_norm": 0.52734375, "learning_rate": 0.00019991681102301596, "loss": 0.8736, "step": 288 }, { "epoch": 0.13488914819136522, "grad_norm": 0.46875, "learning_rate": 0.00019991621154850169, "loss": 0.8057, "step": 289 }, { "epoch": 0.1353558926487748, "grad_norm": 0.53125, "learning_rate": 0.00019991560992268053, "loss": 0.9185, "step": 290 }, { "epoch": 0.13582263710618436, "grad_norm": 0.490234375, "learning_rate": 0.00019991500614556545, "loss": 0.8324, "step": 291 }, { "epoch": 0.13628938156359394, "grad_norm": 0.51953125, "learning_rate": 0.0001999144002171694, "loss": 0.9509, "step": 292 }, { "epoch": 0.1367561260210035, "grad_norm": 0.486328125, "learning_rate": 0.00019991379213750546, "loss": 0.7803, "step": 293 }, { "epoch": 0.13722287047841306, "grad_norm": 0.478515625, "learning_rate": 0.00019991318190658674, "loss": 0.9608, "step": 294 }, { "epoch": 0.13768961493582263, "grad_norm": 0.498046875, "learning_rate": 0.00019991256952442635, "loss": 0.8689, "step": 295 }, { "epoch": 0.1381563593932322, "grad_norm": 0.52734375, "learning_rate": 0.0001999119549910375, "loss": 0.8479, "step": 296 }, { "epoch": 0.13862310385064178, "grad_norm": 0.4921875, "learning_rate": 0.00019991133830643337, "loss": 0.8892, "step": 297 }, { "epoch": 0.13908984830805135, "grad_norm": 0.5234375, "learning_rate": 0.00019991071947062728, "loss": 0.8905, "step": 298 }, { "epoch": 0.13955659276546092, "grad_norm": 0.5390625, "learning_rate": 0.00019991009848363257, "loss": 0.9355, "step": 299 }, { "epoch": 0.14002333722287047, "grad_norm": 0.51953125, "learning_rate": 0.00019990947534546258, "loss": 0.871, "step": 300 }, { "epoch": 0.14049008168028004, "grad_norm": 0.51171875, "learning_rate": 0.0001999088500561307, "loss": 0.8625, "step": 301 }, { "epoch": 0.14095682613768962, "grad_norm": 0.52734375, "learning_rate": 0.00019990822261565047, "loss": 0.9275, "step": 302 }, { "epoch": 0.1414235705950992, "grad_norm": 0.5234375, "learning_rate": 0.00019990759302403535, "loss": 0.8776, "step": 303 }, { "epoch": 0.14189031505250876, "grad_norm": 0.5078125, "learning_rate": 0.00019990696128129888, "loss": 0.843, "step": 304 }, { "epoch": 0.1423570595099183, "grad_norm": 0.5703125, "learning_rate": 0.0001999063273874547, "loss": 1.0007, "step": 305 }, { "epoch": 0.14282380396732788, "grad_norm": 0.53515625, "learning_rate": 0.00019990569134251644, "loss": 0.9292, "step": 306 }, { "epoch": 0.14329054842473746, "grad_norm": 0.478515625, "learning_rate": 0.00019990505314649781, "loss": 0.8729, "step": 307 }, { "epoch": 0.14375729288214703, "grad_norm": 0.5078125, "learning_rate": 0.00019990441279941253, "loss": 0.9855, "step": 308 }, { "epoch": 0.1442240373395566, "grad_norm": 0.5, "learning_rate": 0.0001999037703012744, "loss": 0.9412, "step": 309 }, { "epoch": 0.14469078179696615, "grad_norm": 0.5859375, "learning_rate": 0.00019990312565209726, "loss": 1.0191, "step": 310 }, { "epoch": 0.14515752625437572, "grad_norm": 0.47265625, "learning_rate": 0.00019990247885189494, "loss": 0.7305, "step": 311 }, { "epoch": 0.1456242707117853, "grad_norm": 0.4609375, "learning_rate": 0.00019990182990068146, "loss": 0.8149, "step": 312 }, { "epoch": 0.14609101516919487, "grad_norm": 0.470703125, "learning_rate": 0.0001999011787984707, "loss": 0.885, "step": 313 }, { "epoch": 0.14655775962660444, "grad_norm": 0.59765625, "learning_rate": 0.00019990052554527674, "loss": 0.9428, "step": 314 }, { "epoch": 0.147024504084014, "grad_norm": 0.546875, "learning_rate": 0.00019989987014111359, "loss": 0.9553, "step": 315 }, { "epoch": 0.14749124854142356, "grad_norm": 0.5078125, "learning_rate": 0.00019989921258599546, "loss": 0.9251, "step": 316 }, { "epoch": 0.14795799299883314, "grad_norm": 0.51171875, "learning_rate": 0.00019989855287993636, "loss": 0.8842, "step": 317 }, { "epoch": 0.1484247374562427, "grad_norm": 0.486328125, "learning_rate": 0.0001998978910229506, "loss": 0.8508, "step": 318 }, { "epoch": 0.14889148191365228, "grad_norm": 0.470703125, "learning_rate": 0.00019989722701505244, "loss": 0.8295, "step": 319 }, { "epoch": 0.14935822637106183, "grad_norm": 0.4609375, "learning_rate": 0.00019989656085625613, "loss": 0.8384, "step": 320 }, { "epoch": 0.1498249708284714, "grad_norm": 0.4765625, "learning_rate": 0.00019989589254657605, "loss": 0.7348, "step": 321 }, { "epoch": 0.15029171528588098, "grad_norm": 0.55078125, "learning_rate": 0.00019989522208602653, "loss": 0.9145, "step": 322 }, { "epoch": 0.15075845974329055, "grad_norm": 0.53515625, "learning_rate": 0.0001998945494746221, "loss": 0.9695, "step": 323 }, { "epoch": 0.15122520420070013, "grad_norm": 0.5390625, "learning_rate": 0.00019989387471237713, "loss": 0.9439, "step": 324 }, { "epoch": 0.1516919486581097, "grad_norm": 0.59375, "learning_rate": 0.0001998931977993062, "loss": 0.9206, "step": 325 }, { "epoch": 0.15215869311551924, "grad_norm": 0.5234375, "learning_rate": 0.0001998925187354239, "loss": 0.813, "step": 326 }, { "epoch": 0.15262543757292882, "grad_norm": 0.5546875, "learning_rate": 0.00019989183752074484, "loss": 1.0323, "step": 327 }, { "epoch": 0.1530921820303384, "grad_norm": 0.4609375, "learning_rate": 0.0001998911541552837, "loss": 0.7331, "step": 328 }, { "epoch": 0.15355892648774797, "grad_norm": 0.50390625, "learning_rate": 0.00019989046863905517, "loss": 0.8732, "step": 329 }, { "epoch": 0.15402567094515754, "grad_norm": 0.54296875, "learning_rate": 0.00019988978097207403, "loss": 0.9539, "step": 330 }, { "epoch": 0.15449241540256708, "grad_norm": 0.546875, "learning_rate": 0.00019988909115435508, "loss": 0.9257, "step": 331 }, { "epoch": 0.15495915985997666, "grad_norm": 0.490234375, "learning_rate": 0.00019988839918591315, "loss": 0.7343, "step": 332 }, { "epoch": 0.15542590431738623, "grad_norm": 0.48046875, "learning_rate": 0.00019988770506676316, "loss": 0.8209, "step": 333 }, { "epoch": 0.1558926487747958, "grad_norm": 0.5, "learning_rate": 0.00019988700879692005, "loss": 0.8491, "step": 334 }, { "epoch": 0.15635939323220538, "grad_norm": 0.458984375, "learning_rate": 0.00019988631037639882, "loss": 0.8388, "step": 335 }, { "epoch": 0.15682613768961493, "grad_norm": 0.44921875, "learning_rate": 0.0001998856098052145, "loss": 0.837, "step": 336 }, { "epoch": 0.1572928821470245, "grad_norm": 0.50390625, "learning_rate": 0.0001998849070833822, "loss": 0.8748, "step": 337 }, { "epoch": 0.15775962660443407, "grad_norm": 0.51171875, "learning_rate": 0.00019988420221091698, "loss": 0.8861, "step": 338 }, { "epoch": 0.15822637106184365, "grad_norm": 0.5078125, "learning_rate": 0.00019988349518783408, "loss": 0.8853, "step": 339 }, { "epoch": 0.15869311551925322, "grad_norm": 0.515625, "learning_rate": 0.00019988278601414873, "loss": 0.8224, "step": 340 }, { "epoch": 0.15915985997666277, "grad_norm": 0.515625, "learning_rate": 0.00019988207468987613, "loss": 0.7861, "step": 341 }, { "epoch": 0.15962660443407234, "grad_norm": 0.48046875, "learning_rate": 0.0001998813612150317, "loss": 0.8256, "step": 342 }, { "epoch": 0.1600933488914819, "grad_norm": 0.44140625, "learning_rate": 0.0001998806455896307, "loss": 0.7111, "step": 343 }, { "epoch": 0.16056009334889149, "grad_norm": 0.6875, "learning_rate": 0.00019987992781368862, "loss": 1.1183, "step": 344 }, { "epoch": 0.16102683780630106, "grad_norm": 0.54296875, "learning_rate": 0.00019987920788722084, "loss": 0.8174, "step": 345 }, { "epoch": 0.1614935822637106, "grad_norm": 0.5390625, "learning_rate": 0.00019987848581024288, "loss": 0.916, "step": 346 }, { "epoch": 0.16196032672112018, "grad_norm": 0.515625, "learning_rate": 0.00019987776158277032, "loss": 0.7234, "step": 347 }, { "epoch": 0.16242707117852975, "grad_norm": 0.5546875, "learning_rate": 0.00019987703520481874, "loss": 0.9876, "step": 348 }, { "epoch": 0.16289381563593933, "grad_norm": 0.6015625, "learning_rate": 0.0001998763066764038, "loss": 1.0152, "step": 349 }, { "epoch": 0.1633605600933489, "grad_norm": 0.546875, "learning_rate": 0.00019987557599754113, "loss": 0.9657, "step": 350 }, { "epoch": 0.16382730455075847, "grad_norm": 0.478515625, "learning_rate": 0.00019987484316824652, "loss": 0.8207, "step": 351 }, { "epoch": 0.16429404900816802, "grad_norm": 0.515625, "learning_rate": 0.00019987410818853572, "loss": 0.9195, "step": 352 }, { "epoch": 0.1647607934655776, "grad_norm": 0.5, "learning_rate": 0.00019987337105842453, "loss": 0.9246, "step": 353 }, { "epoch": 0.16522753792298717, "grad_norm": 0.498046875, "learning_rate": 0.00019987263177792888, "loss": 0.8218, "step": 354 }, { "epoch": 0.16569428238039674, "grad_norm": 0.5078125, "learning_rate": 0.00019987189034706464, "loss": 0.8671, "step": 355 }, { "epoch": 0.1661610268378063, "grad_norm": 0.5, "learning_rate": 0.0001998711467658478, "loss": 0.8414, "step": 356 }, { "epoch": 0.16662777129521586, "grad_norm": 0.53515625, "learning_rate": 0.00019987040103429436, "loss": 0.8736, "step": 357 }, { "epoch": 0.16709451575262543, "grad_norm": 0.494140625, "learning_rate": 0.00019986965315242037, "loss": 0.7668, "step": 358 }, { "epoch": 0.167561260210035, "grad_norm": 0.5625, "learning_rate": 0.00019986890312024197, "loss": 0.8707, "step": 359 }, { "epoch": 0.16802800466744458, "grad_norm": 0.451171875, "learning_rate": 0.00019986815093777524, "loss": 0.708, "step": 360 }, { "epoch": 0.16849474912485415, "grad_norm": 0.51953125, "learning_rate": 0.00019986739660503644, "loss": 0.8333, "step": 361 }, { "epoch": 0.1689614935822637, "grad_norm": 0.5390625, "learning_rate": 0.00019986664012204176, "loss": 0.8402, "step": 362 }, { "epoch": 0.16942823803967327, "grad_norm": 0.55078125, "learning_rate": 0.00019986588148880754, "loss": 0.8683, "step": 363 }, { "epoch": 0.16989498249708285, "grad_norm": 0.62890625, "learning_rate": 0.0001998651207053501, "loss": 1.028, "step": 364 }, { "epoch": 0.17036172695449242, "grad_norm": 0.52734375, "learning_rate": 0.00019986435777168577, "loss": 0.8522, "step": 365 }, { "epoch": 0.170828471411902, "grad_norm": 0.53125, "learning_rate": 0.00019986359268783104, "loss": 0.8638, "step": 366 }, { "epoch": 0.17129521586931154, "grad_norm": 0.486328125, "learning_rate": 0.00019986282545380235, "loss": 0.7876, "step": 367 }, { "epoch": 0.1717619603267211, "grad_norm": 0.474609375, "learning_rate": 0.00019986205606961624, "loss": 0.7971, "step": 368 }, { "epoch": 0.1722287047841307, "grad_norm": 0.515625, "learning_rate": 0.00019986128453528925, "loss": 0.8204, "step": 369 }, { "epoch": 0.17269544924154026, "grad_norm": 0.51953125, "learning_rate": 0.000199860510850838, "loss": 0.8482, "step": 370 }, { "epoch": 0.17316219369894983, "grad_norm": 0.515625, "learning_rate": 0.00019985973501627916, "loss": 0.9228, "step": 371 }, { "epoch": 0.17362893815635938, "grad_norm": 0.6171875, "learning_rate": 0.00019985895703162945, "loss": 1.0125, "step": 372 }, { "epoch": 0.17409568261376895, "grad_norm": 0.5625, "learning_rate": 0.00019985817689690557, "loss": 0.9471, "step": 373 }, { "epoch": 0.17456242707117853, "grad_norm": 0.482421875, "learning_rate": 0.00019985739461212437, "loss": 0.8284, "step": 374 }, { "epoch": 0.1750291715285881, "grad_norm": 0.48828125, "learning_rate": 0.00019985661017730267, "loss": 0.8444, "step": 375 }, { "epoch": 0.17549591598599767, "grad_norm": 0.55859375, "learning_rate": 0.00019985582359245734, "loss": 1.0169, "step": 376 }, { "epoch": 0.17596266044340725, "grad_norm": 0.466796875, "learning_rate": 0.00019985503485760536, "loss": 0.8166, "step": 377 }, { "epoch": 0.1764294049008168, "grad_norm": 0.474609375, "learning_rate": 0.00019985424397276366, "loss": 0.7685, "step": 378 }, { "epoch": 0.17689614935822637, "grad_norm": 0.470703125, "learning_rate": 0.0001998534509379493, "loss": 0.8422, "step": 379 }, { "epoch": 0.17736289381563594, "grad_norm": 0.498046875, "learning_rate": 0.00019985265575317936, "loss": 0.8165, "step": 380 }, { "epoch": 0.17782963827304551, "grad_norm": 0.46875, "learning_rate": 0.00019985185841847098, "loss": 0.7526, "step": 381 }, { "epoch": 0.1782963827304551, "grad_norm": 0.56640625, "learning_rate": 0.00019985105893384128, "loss": 0.9166, "step": 382 }, { "epoch": 0.17876312718786463, "grad_norm": 0.466796875, "learning_rate": 0.00019985025729930752, "loss": 0.7758, "step": 383 }, { "epoch": 0.1792298716452742, "grad_norm": 0.51171875, "learning_rate": 0.00019984945351488688, "loss": 0.7748, "step": 384 }, { "epoch": 0.17969661610268378, "grad_norm": 0.578125, "learning_rate": 0.00019984864758059676, "loss": 0.9146, "step": 385 }, { "epoch": 0.18016336056009336, "grad_norm": 0.50390625, "learning_rate": 0.00019984783949645446, "loss": 0.7409, "step": 386 }, { "epoch": 0.18063010501750293, "grad_norm": 0.54296875, "learning_rate": 0.0001998470292624774, "loss": 0.88, "step": 387 }, { "epoch": 0.18109684947491247, "grad_norm": 0.578125, "learning_rate": 0.000199846216878683, "loss": 1.0229, "step": 388 }, { "epoch": 0.18156359393232205, "grad_norm": 0.462890625, "learning_rate": 0.00019984540234508879, "loss": 0.8394, "step": 389 }, { "epoch": 0.18203033838973162, "grad_norm": 0.515625, "learning_rate": 0.0001998445856617123, "loss": 0.8287, "step": 390 }, { "epoch": 0.1824970828471412, "grad_norm": 0.515625, "learning_rate": 0.00019984376682857108, "loss": 0.8967, "step": 391 }, { "epoch": 0.18296382730455077, "grad_norm": 0.52734375, "learning_rate": 0.00019984294584568277, "loss": 0.8842, "step": 392 }, { "epoch": 0.18343057176196031, "grad_norm": 0.447265625, "learning_rate": 0.0001998421227130651, "loss": 0.7919, "step": 393 }, { "epoch": 0.1838973162193699, "grad_norm": 0.48046875, "learning_rate": 0.00019984129743073567, "loss": 0.8026, "step": 394 }, { "epoch": 0.18436406067677946, "grad_norm": 0.5, "learning_rate": 0.0001998404699987124, "loss": 0.8767, "step": 395 }, { "epoch": 0.18483080513418904, "grad_norm": 0.5, "learning_rate": 0.000199839640417013, "loss": 0.8463, "step": 396 }, { "epoch": 0.1852975495915986, "grad_norm": 0.6328125, "learning_rate": 0.00019983880868565542, "loss": 0.9201, "step": 397 }, { "epoch": 0.18576429404900816, "grad_norm": 0.54296875, "learning_rate": 0.00019983797480465748, "loss": 0.782, "step": 398 }, { "epoch": 0.18623103850641773, "grad_norm": 0.52734375, "learning_rate": 0.00019983713877403715, "loss": 0.8767, "step": 399 }, { "epoch": 0.1866977829638273, "grad_norm": 0.55078125, "learning_rate": 0.00019983630059381248, "loss": 0.9837, "step": 400 }, { "epoch": 0.18716452742123688, "grad_norm": 0.52734375, "learning_rate": 0.00019983546026400149, "loss": 0.7993, "step": 401 }, { "epoch": 0.18763127187864645, "grad_norm": 0.486328125, "learning_rate": 0.00019983461778462226, "loss": 0.874, "step": 402 }, { "epoch": 0.18809801633605602, "grad_norm": 0.53515625, "learning_rate": 0.00019983377315569293, "loss": 0.9171, "step": 403 }, { "epoch": 0.18856476079346557, "grad_norm": 0.49609375, "learning_rate": 0.00019983292637723173, "loss": 0.832, "step": 404 }, { "epoch": 0.18903150525087514, "grad_norm": 0.48046875, "learning_rate": 0.00019983207744925683, "loss": 0.821, "step": 405 }, { "epoch": 0.18949824970828472, "grad_norm": 0.546875, "learning_rate": 0.00019983122637178656, "loss": 0.9555, "step": 406 }, { "epoch": 0.1899649941656943, "grad_norm": 0.51953125, "learning_rate": 0.00019983037314483918, "loss": 0.9288, "step": 407 }, { "epoch": 0.19043173862310386, "grad_norm": 0.49609375, "learning_rate": 0.00019982951776843316, "loss": 0.8557, "step": 408 }, { "epoch": 0.1908984830805134, "grad_norm": 0.53125, "learning_rate": 0.00019982866024258682, "loss": 0.8824, "step": 409 }, { "epoch": 0.19136522753792298, "grad_norm": 0.5078125, "learning_rate": 0.00019982780056731868, "loss": 0.8061, "step": 410 }, { "epoch": 0.19183197199533256, "grad_norm": 0.5234375, "learning_rate": 0.0001998269387426472, "loss": 0.9368, "step": 411 }, { "epoch": 0.19229871645274213, "grad_norm": 0.54296875, "learning_rate": 0.00019982607476859098, "loss": 0.8888, "step": 412 }, { "epoch": 0.1927654609101517, "grad_norm": 0.60546875, "learning_rate": 0.00019982520864516865, "loss": 0.9224, "step": 413 }, { "epoch": 0.19323220536756125, "grad_norm": 0.515625, "learning_rate": 0.00019982434037239878, "loss": 0.8144, "step": 414 }, { "epoch": 0.19369894982497082, "grad_norm": 0.52734375, "learning_rate": 0.00019982346995030015, "loss": 0.8414, "step": 415 }, { "epoch": 0.1941656942823804, "grad_norm": 0.5078125, "learning_rate": 0.00019982259737889142, "loss": 0.8562, "step": 416 }, { "epoch": 0.19463243873978997, "grad_norm": 0.48046875, "learning_rate": 0.0001998217226581914, "loss": 0.7829, "step": 417 }, { "epoch": 0.19509918319719954, "grad_norm": 0.5, "learning_rate": 0.00019982084578821897, "loss": 0.8173, "step": 418 }, { "epoch": 0.1955659276546091, "grad_norm": 0.60546875, "learning_rate": 0.00019981996676899296, "loss": 0.8386, "step": 419 }, { "epoch": 0.19603267211201866, "grad_norm": 0.6015625, "learning_rate": 0.00019981908560053234, "loss": 0.9298, "step": 420 }, { "epoch": 0.19649941656942824, "grad_norm": 0.5390625, "learning_rate": 0.00019981820228285598, "loss": 0.9126, "step": 421 }, { "epoch": 0.1969661610268378, "grad_norm": 0.423828125, "learning_rate": 0.00019981731681598306, "loss": 0.6823, "step": 422 }, { "epoch": 0.19743290548424738, "grad_norm": 0.51171875, "learning_rate": 0.00019981642919993252, "loss": 0.7723, "step": 423 }, { "epoch": 0.19789964994165693, "grad_norm": 0.486328125, "learning_rate": 0.0001998155394347235, "loss": 0.8236, "step": 424 }, { "epoch": 0.1983663943990665, "grad_norm": 0.46484375, "learning_rate": 0.00019981464752037522, "loss": 0.7825, "step": 425 }, { "epoch": 0.19883313885647608, "grad_norm": 0.53125, "learning_rate": 0.00019981375345690678, "loss": 0.9205, "step": 426 }, { "epoch": 0.19929988331388565, "grad_norm": 0.5, "learning_rate": 0.0001998128572443375, "loss": 0.9017, "step": 427 }, { "epoch": 0.19976662777129522, "grad_norm": 0.53125, "learning_rate": 0.00019981195888268667, "loss": 0.8884, "step": 428 }, { "epoch": 0.2002333722287048, "grad_norm": 0.515625, "learning_rate": 0.00019981105837197364, "loss": 0.8075, "step": 429 }, { "epoch": 0.20070011668611434, "grad_norm": 0.55078125, "learning_rate": 0.00019981015571221772, "loss": 0.9266, "step": 430 }, { "epoch": 0.20116686114352392, "grad_norm": 0.58984375, "learning_rate": 0.00019980925090343844, "loss": 0.8949, "step": 431 }, { "epoch": 0.2016336056009335, "grad_norm": 0.494140625, "learning_rate": 0.00019980834394565524, "loss": 0.7104, "step": 432 }, { "epoch": 0.20210035005834306, "grad_norm": 0.5546875, "learning_rate": 0.0001998074348388877, "loss": 0.8944, "step": 433 }, { "epoch": 0.20256709451575264, "grad_norm": 0.51171875, "learning_rate": 0.0001998065235831553, "loss": 0.7776, "step": 434 }, { "epoch": 0.20303383897316218, "grad_norm": 0.609375, "learning_rate": 0.0001998056101784777, "loss": 0.8335, "step": 435 }, { "epoch": 0.20350058343057176, "grad_norm": 0.546875, "learning_rate": 0.00019980469462487461, "loss": 0.8517, "step": 436 }, { "epoch": 0.20396732788798133, "grad_norm": 0.5546875, "learning_rate": 0.00019980377692236569, "loss": 0.9135, "step": 437 }, { "epoch": 0.2044340723453909, "grad_norm": 0.53515625, "learning_rate": 0.00019980285707097074, "loss": 0.8213, "step": 438 }, { "epoch": 0.20490081680280048, "grad_norm": 0.55078125, "learning_rate": 0.00019980193507070952, "loss": 0.8519, "step": 439 }, { "epoch": 0.20536756126021002, "grad_norm": 0.58203125, "learning_rate": 0.00019980101092160194, "loss": 0.9009, "step": 440 }, { "epoch": 0.2058343057176196, "grad_norm": 0.62890625, "learning_rate": 0.00019980008462366783, "loss": 1.0034, "step": 441 }, { "epoch": 0.20630105017502917, "grad_norm": 0.5546875, "learning_rate": 0.0001997991561769272, "loss": 0.8987, "step": 442 }, { "epoch": 0.20676779463243875, "grad_norm": 0.546875, "learning_rate": 0.00019979822558139997, "loss": 0.9159, "step": 443 }, { "epoch": 0.20723453908984832, "grad_norm": 0.46484375, "learning_rate": 0.00019979729283710622, "loss": 0.779, "step": 444 }, { "epoch": 0.20770128354725786, "grad_norm": 0.482421875, "learning_rate": 0.00019979635794406607, "loss": 0.7709, "step": 445 }, { "epoch": 0.20816802800466744, "grad_norm": 0.5390625, "learning_rate": 0.00019979542090229957, "loss": 0.9629, "step": 446 }, { "epoch": 0.208634772462077, "grad_norm": 0.55859375, "learning_rate": 0.00019979448171182694, "loss": 0.9416, "step": 447 }, { "epoch": 0.20910151691948659, "grad_norm": 0.466796875, "learning_rate": 0.0001997935403726684, "loss": 0.8382, "step": 448 }, { "epoch": 0.20956826137689616, "grad_norm": 0.5234375, "learning_rate": 0.0001997925968848442, "loss": 0.8457, "step": 449 }, { "epoch": 0.2100350058343057, "grad_norm": 0.5078125, "learning_rate": 0.00019979165124837467, "loss": 0.7761, "step": 450 }, { "epoch": 0.21050175029171528, "grad_norm": 0.52734375, "learning_rate": 0.0001997907034632802, "loss": 0.8188, "step": 451 }, { "epoch": 0.21096849474912485, "grad_norm": 0.5390625, "learning_rate": 0.00019978975352958115, "loss": 0.8768, "step": 452 }, { "epoch": 0.21143523920653443, "grad_norm": 0.5703125, "learning_rate": 0.00019978880144729794, "loss": 1.0063, "step": 453 }, { "epoch": 0.211901983663944, "grad_norm": 0.515625, "learning_rate": 0.00019978784721645117, "loss": 0.8357, "step": 454 }, { "epoch": 0.21236872812135357, "grad_norm": 0.56640625, "learning_rate": 0.0001997868908370613, "loss": 0.8385, "step": 455 }, { "epoch": 0.21283547257876312, "grad_norm": 0.466796875, "learning_rate": 0.000199785932309149, "loss": 0.7696, "step": 456 }, { "epoch": 0.2133022170361727, "grad_norm": 0.5078125, "learning_rate": 0.00019978497163273482, "loss": 0.8369, "step": 457 }, { "epoch": 0.21376896149358227, "grad_norm": 0.58984375, "learning_rate": 0.0001997840088078395, "loss": 1.0143, "step": 458 }, { "epoch": 0.21423570595099184, "grad_norm": 0.49609375, "learning_rate": 0.0001997830438344838, "loss": 0.7826, "step": 459 }, { "epoch": 0.2147024504084014, "grad_norm": 0.54296875, "learning_rate": 0.00019978207671268839, "loss": 0.9136, "step": 460 }, { "epoch": 0.21516919486581096, "grad_norm": 0.546875, "learning_rate": 0.0001997811074424742, "loss": 0.845, "step": 461 }, { "epoch": 0.21563593932322053, "grad_norm": 0.55078125, "learning_rate": 0.00019978013602386203, "loss": 0.9591, "step": 462 }, { "epoch": 0.2161026837806301, "grad_norm": 0.470703125, "learning_rate": 0.00019977916245687282, "loss": 0.818, "step": 463 }, { "epoch": 0.21656942823803968, "grad_norm": 0.462890625, "learning_rate": 0.00019977818674152755, "loss": 0.7291, "step": 464 }, { "epoch": 0.21703617269544925, "grad_norm": 0.490234375, "learning_rate": 0.00019977720887784722, "loss": 0.7468, "step": 465 }, { "epoch": 0.2175029171528588, "grad_norm": 0.48828125, "learning_rate": 0.00019977622886585287, "loss": 0.7187, "step": 466 }, { "epoch": 0.21796966161026837, "grad_norm": 0.53125, "learning_rate": 0.00019977524670556563, "loss": 0.8585, "step": 467 }, { "epoch": 0.21843640606767795, "grad_norm": 0.5234375, "learning_rate": 0.0001997742623970066, "loss": 0.7945, "step": 468 }, { "epoch": 0.21890315052508752, "grad_norm": 0.515625, "learning_rate": 0.000199773275940197, "loss": 0.8108, "step": 469 }, { "epoch": 0.2193698949824971, "grad_norm": 0.56640625, "learning_rate": 0.0001997722873351581, "loss": 0.7664, "step": 470 }, { "epoch": 0.21983663943990664, "grad_norm": 0.51171875, "learning_rate": 0.00019977129658191108, "loss": 0.8279, "step": 471 }, { "epoch": 0.2203033838973162, "grad_norm": 0.5703125, "learning_rate": 0.00019977030368047743, "loss": 0.9911, "step": 472 }, { "epoch": 0.2207701283547258, "grad_norm": 0.52734375, "learning_rate": 0.00019976930863087843, "loss": 0.8442, "step": 473 }, { "epoch": 0.22123687281213536, "grad_norm": 0.53515625, "learning_rate": 0.00019976831143313548, "loss": 0.8565, "step": 474 }, { "epoch": 0.22170361726954493, "grad_norm": 0.484375, "learning_rate": 0.00019976731208727012, "loss": 0.7531, "step": 475 }, { "epoch": 0.22217036172695448, "grad_norm": 0.51171875, "learning_rate": 0.00019976631059330383, "loss": 0.7538, "step": 476 }, { "epoch": 0.22263710618436405, "grad_norm": 0.5625, "learning_rate": 0.00019976530695125816, "loss": 0.9925, "step": 477 }, { "epoch": 0.22310385064177363, "grad_norm": 0.515625, "learning_rate": 0.00019976430116115475, "loss": 0.8312, "step": 478 }, { "epoch": 0.2235705950991832, "grad_norm": 0.55078125, "learning_rate": 0.00019976329322301525, "loss": 0.856, "step": 479 }, { "epoch": 0.22403733955659277, "grad_norm": 0.61328125, "learning_rate": 0.00019976228313686137, "loss": 0.964, "step": 480 }, { "epoch": 0.22450408401400235, "grad_norm": 0.5546875, "learning_rate": 0.00019976127090271483, "loss": 0.8208, "step": 481 }, { "epoch": 0.2249708284714119, "grad_norm": 0.5234375, "learning_rate": 0.00019976025652059746, "loss": 0.8248, "step": 482 }, { "epoch": 0.22543757292882147, "grad_norm": 0.51171875, "learning_rate": 0.00019975923999053107, "loss": 0.7991, "step": 483 }, { "epoch": 0.22590431738623104, "grad_norm": 0.486328125, "learning_rate": 0.00019975822131253757, "loss": 0.715, "step": 484 }, { "epoch": 0.22637106184364061, "grad_norm": 0.52734375, "learning_rate": 0.00019975720048663885, "loss": 0.7423, "step": 485 }, { "epoch": 0.2268378063010502, "grad_norm": 0.5078125, "learning_rate": 0.00019975617751285694, "loss": 0.8253, "step": 486 }, { "epoch": 0.22730455075845973, "grad_norm": 0.546875, "learning_rate": 0.00019975515239121383, "loss": 0.7883, "step": 487 }, { "epoch": 0.2277712952158693, "grad_norm": 0.49609375, "learning_rate": 0.00019975412512173165, "loss": 0.7187, "step": 488 }, { "epoch": 0.22823803967327888, "grad_norm": 0.54296875, "learning_rate": 0.00019975309570443244, "loss": 0.8448, "step": 489 }, { "epoch": 0.22870478413068845, "grad_norm": 0.55859375, "learning_rate": 0.0001997520641393384, "loss": 0.8407, "step": 490 }, { "epoch": 0.22917152858809803, "grad_norm": 0.609375, "learning_rate": 0.00019975103042647177, "loss": 0.9334, "step": 491 }, { "epoch": 0.22963827304550757, "grad_norm": 0.515625, "learning_rate": 0.00019974999456585474, "loss": 0.7595, "step": 492 }, { "epoch": 0.23010501750291715, "grad_norm": 0.52734375, "learning_rate": 0.0001997489565575097, "loss": 0.8393, "step": 493 }, { "epoch": 0.23057176196032672, "grad_norm": 0.54296875, "learning_rate": 0.00019974791640145893, "loss": 0.7795, "step": 494 }, { "epoch": 0.2310385064177363, "grad_norm": 0.56640625, "learning_rate": 0.00019974687409772483, "loss": 0.8779, "step": 495 }, { "epoch": 0.23150525087514587, "grad_norm": 0.53515625, "learning_rate": 0.0001997458296463299, "loss": 0.7768, "step": 496 }, { "epoch": 0.23197199533255541, "grad_norm": 0.52734375, "learning_rate": 0.00019974478304729654, "loss": 0.8143, "step": 497 }, { "epoch": 0.232438739789965, "grad_norm": 0.515625, "learning_rate": 0.00019974373430064736, "loss": 0.8656, "step": 498 }, { "epoch": 0.23290548424737456, "grad_norm": 0.625, "learning_rate": 0.00019974268340640488, "loss": 0.9543, "step": 499 }, { "epoch": 0.23337222870478413, "grad_norm": 0.61328125, "learning_rate": 0.00019974163036459182, "loss": 0.9469, "step": 500 }, { "epoch": 0.2338389731621937, "grad_norm": 0.52734375, "learning_rate": 0.00019974057517523078, "loss": 0.8112, "step": 501 }, { "epoch": 0.23430571761960325, "grad_norm": 0.5078125, "learning_rate": 0.00019973951783834446, "loss": 0.6788, "step": 502 }, { "epoch": 0.23477246207701283, "grad_norm": 0.54296875, "learning_rate": 0.00019973845835395565, "loss": 0.8324, "step": 503 }, { "epoch": 0.2352392065344224, "grad_norm": 0.64453125, "learning_rate": 0.0001997373967220872, "loss": 0.9217, "step": 504 }, { "epoch": 0.23570595099183198, "grad_norm": 0.60546875, "learning_rate": 0.00019973633294276196, "loss": 0.8814, "step": 505 }, { "epoch": 0.23617269544924155, "grad_norm": 0.51953125, "learning_rate": 0.00019973526701600277, "loss": 0.766, "step": 506 }, { "epoch": 0.23663943990665112, "grad_norm": 0.5, "learning_rate": 0.00019973419894183262, "loss": 0.7433, "step": 507 }, { "epoch": 0.23710618436406067, "grad_norm": 0.5, "learning_rate": 0.00019973312872027452, "loss": 0.7257, "step": 508 }, { "epoch": 0.23757292882147024, "grad_norm": 0.5234375, "learning_rate": 0.00019973205635135153, "loss": 0.7668, "step": 509 }, { "epoch": 0.23803967327887982, "grad_norm": 0.5390625, "learning_rate": 0.00019973098183508666, "loss": 0.8133, "step": 510 }, { "epoch": 0.2385064177362894, "grad_norm": 0.546875, "learning_rate": 0.00019972990517150315, "loss": 0.7507, "step": 511 }, { "epoch": 0.23897316219369896, "grad_norm": 0.53515625, "learning_rate": 0.00019972882636062412, "loss": 0.7349, "step": 512 }, { "epoch": 0.2394399066511085, "grad_norm": 0.56640625, "learning_rate": 0.00019972774540247276, "loss": 0.8308, "step": 513 }, { "epoch": 0.23990665110851808, "grad_norm": 0.546875, "learning_rate": 0.00019972666229707244, "loss": 0.7733, "step": 514 }, { "epoch": 0.24037339556592766, "grad_norm": 0.5625, "learning_rate": 0.00019972557704444642, "loss": 0.8146, "step": 515 }, { "epoch": 0.24084014002333723, "grad_norm": 0.62109375, "learning_rate": 0.00019972448964461806, "loss": 0.9102, "step": 516 }, { "epoch": 0.2413068844807468, "grad_norm": 0.58984375, "learning_rate": 0.00019972340009761084, "loss": 0.7334, "step": 517 }, { "epoch": 0.24177362893815635, "grad_norm": 0.48046875, "learning_rate": 0.00019972230840344812, "loss": 0.7908, "step": 518 }, { "epoch": 0.24224037339556592, "grad_norm": 0.54296875, "learning_rate": 0.00019972121456215348, "loss": 0.7758, "step": 519 }, { "epoch": 0.2427071178529755, "grad_norm": 0.54296875, "learning_rate": 0.00019972011857375045, "loss": 0.8219, "step": 520 }, { "epoch": 0.24317386231038507, "grad_norm": 0.55078125, "learning_rate": 0.00019971902043826263, "loss": 0.8146, "step": 521 }, { "epoch": 0.24364060676779464, "grad_norm": 0.625, "learning_rate": 0.00019971792015571364, "loss": 0.9178, "step": 522 }, { "epoch": 0.2441073512252042, "grad_norm": 0.60546875, "learning_rate": 0.0001997168177261272, "loss": 0.8689, "step": 523 }, { "epoch": 0.24457409568261376, "grad_norm": 0.478515625, "learning_rate": 0.00019971571314952703, "loss": 0.756, "step": 524 }, { "epoch": 0.24504084014002334, "grad_norm": 0.62890625, "learning_rate": 0.00019971460642593692, "loss": 0.8528, "step": 525 }, { "epoch": 0.2455075845974329, "grad_norm": 0.55078125, "learning_rate": 0.0001997134975553807, "loss": 0.8385, "step": 526 }, { "epoch": 0.24597432905484248, "grad_norm": 0.51171875, "learning_rate": 0.00019971238653788227, "loss": 0.7451, "step": 527 }, { "epoch": 0.24644107351225203, "grad_norm": 0.54296875, "learning_rate": 0.00019971127337346548, "loss": 0.8731, "step": 528 }, { "epoch": 0.2469078179696616, "grad_norm": 0.50390625, "learning_rate": 0.00019971015806215437, "loss": 0.7527, "step": 529 }, { "epoch": 0.24737456242707118, "grad_norm": 0.578125, "learning_rate": 0.00019970904060397294, "loss": 0.9084, "step": 530 }, { "epoch": 0.24784130688448075, "grad_norm": 0.47265625, "learning_rate": 0.0001997079209989452, "loss": 0.6927, "step": 531 }, { "epoch": 0.24830805134189032, "grad_norm": 0.5703125, "learning_rate": 0.00019970679924709533, "loss": 0.8436, "step": 532 }, { "epoch": 0.2487747957992999, "grad_norm": 0.5, "learning_rate": 0.0001997056753484474, "loss": 0.7204, "step": 533 }, { "epoch": 0.24924154025670944, "grad_norm": 0.56640625, "learning_rate": 0.0001997045493030257, "loss": 0.7987, "step": 534 }, { "epoch": 0.24970828471411902, "grad_norm": 0.5234375, "learning_rate": 0.0001997034211108544, "loss": 0.7463, "step": 535 }, { "epoch": 0.25017502917152856, "grad_norm": 0.515625, "learning_rate": 0.00019970229077195787, "loss": 0.7439, "step": 536 }, { "epoch": 0.25017502917152856, "eval_loss": 0.9957383871078491, "eval_runtime": 96.4538, "eval_samples_per_second": 18.703, "eval_steps_per_second": 2.343, "step": 536 }, { "epoch": 0.25064177362893814, "grad_norm": 0.5859375, "learning_rate": 0.00019970115828636033, "loss": 0.8223, "step": 537 }, { "epoch": 0.2511085180863477, "grad_norm": 0.59765625, "learning_rate": 0.00019970002365408626, "loss": 0.8557, "step": 538 }, { "epoch": 0.2515752625437573, "grad_norm": 0.5703125, "learning_rate": 0.00019969888687516006, "loss": 0.8198, "step": 539 }, { "epoch": 0.25204200700116686, "grad_norm": 0.52734375, "learning_rate": 0.00019969774794960623, "loss": 0.7595, "step": 540 }, { "epoch": 0.25250875145857643, "grad_norm": 0.578125, "learning_rate": 0.00019969660687744925, "loss": 0.8335, "step": 541 }, { "epoch": 0.252975495915986, "grad_norm": 0.609375, "learning_rate": 0.0001996954636587137, "loss": 0.8795, "step": 542 }, { "epoch": 0.2534422403733956, "grad_norm": 0.58984375, "learning_rate": 0.00019969431829342419, "loss": 0.8579, "step": 543 }, { "epoch": 0.25390898483080515, "grad_norm": 0.55859375, "learning_rate": 0.00019969317078160544, "loss": 0.7966, "step": 544 }, { "epoch": 0.2543757292882147, "grad_norm": 0.58984375, "learning_rate": 0.0001996920211232821, "loss": 0.9128, "step": 545 }, { "epoch": 0.2548424737456243, "grad_norm": 0.515625, "learning_rate": 0.00019969086931847892, "loss": 0.7504, "step": 546 }, { "epoch": 0.2553092182030338, "grad_norm": 0.498046875, "learning_rate": 0.00019968971536722072, "loss": 0.7172, "step": 547 }, { "epoch": 0.2557759626604434, "grad_norm": 0.48828125, "learning_rate": 0.00019968855926953233, "loss": 0.7656, "step": 548 }, { "epoch": 0.25624270711785296, "grad_norm": 0.515625, "learning_rate": 0.00019968740102543865, "loss": 0.7943, "step": 549 }, { "epoch": 0.25670945157526254, "grad_norm": 0.51953125, "learning_rate": 0.00019968624063496464, "loss": 0.7665, "step": 550 }, { "epoch": 0.2571761960326721, "grad_norm": 0.478515625, "learning_rate": 0.00019968507809813526, "loss": 0.748, "step": 551 }, { "epoch": 0.2576429404900817, "grad_norm": 0.6015625, "learning_rate": 0.00019968391341497555, "loss": 0.8829, "step": 552 }, { "epoch": 0.25810968494749126, "grad_norm": 0.546875, "learning_rate": 0.00019968274658551058, "loss": 0.7753, "step": 553 }, { "epoch": 0.25857642940490083, "grad_norm": 0.6328125, "learning_rate": 0.00019968157760976544, "loss": 0.8457, "step": 554 }, { "epoch": 0.2590431738623104, "grad_norm": 0.671875, "learning_rate": 0.00019968040648776542, "loss": 0.8644, "step": 555 }, { "epoch": 0.25950991831972, "grad_norm": 0.5703125, "learning_rate": 0.00019967923321953556, "loss": 0.827, "step": 556 }, { "epoch": 0.2599766627771295, "grad_norm": 0.5390625, "learning_rate": 0.00019967805780510126, "loss": 0.7913, "step": 557 }, { "epoch": 0.26044340723453907, "grad_norm": 0.50390625, "learning_rate": 0.00019967688024448776, "loss": 0.771, "step": 558 }, { "epoch": 0.26091015169194864, "grad_norm": 0.498046875, "learning_rate": 0.00019967570053772046, "loss": 0.7563, "step": 559 }, { "epoch": 0.2613768961493582, "grad_norm": 0.5625, "learning_rate": 0.00019967451868482475, "loss": 0.8177, "step": 560 }, { "epoch": 0.2618436406067678, "grad_norm": 0.56640625, "learning_rate": 0.00019967333468582602, "loss": 0.8346, "step": 561 }, { "epoch": 0.26231038506417736, "grad_norm": 0.5234375, "learning_rate": 0.00019967214854074982, "loss": 0.8214, "step": 562 }, { "epoch": 0.26277712952158694, "grad_norm": 0.59375, "learning_rate": 0.00019967096024962167, "loss": 0.9256, "step": 563 }, { "epoch": 0.2632438739789965, "grad_norm": 0.50390625, "learning_rate": 0.00019966976981246716, "loss": 0.7873, "step": 564 }, { "epoch": 0.2637106184364061, "grad_norm": 0.455078125, "learning_rate": 0.00019966857722931192, "loss": 0.7267, "step": 565 }, { "epoch": 0.26417736289381566, "grad_norm": 0.58984375, "learning_rate": 0.00019966738250018162, "loss": 0.9068, "step": 566 }, { "epoch": 0.2646441073512252, "grad_norm": 0.447265625, "learning_rate": 0.000199666185625102, "loss": 0.7013, "step": 567 }, { "epoch": 0.26511085180863475, "grad_norm": 0.486328125, "learning_rate": 0.00019966498660409885, "loss": 0.724, "step": 568 }, { "epoch": 0.2655775962660443, "grad_norm": 0.53125, "learning_rate": 0.00019966378543719794, "loss": 0.9003, "step": 569 }, { "epoch": 0.2660443407234539, "grad_norm": 0.52734375, "learning_rate": 0.00019966258212442513, "loss": 0.7738, "step": 570 }, { "epoch": 0.26651108518086347, "grad_norm": 0.5234375, "learning_rate": 0.00019966137666580636, "loss": 0.7163, "step": 571 }, { "epoch": 0.26697782963827305, "grad_norm": 0.55859375, "learning_rate": 0.00019966016906136758, "loss": 0.8256, "step": 572 }, { "epoch": 0.2674445740956826, "grad_norm": 0.49609375, "learning_rate": 0.00019965895931113478, "loss": 0.6719, "step": 573 }, { "epoch": 0.2679113185530922, "grad_norm": 0.64453125, "learning_rate": 0.00019965774741513402, "loss": 0.8816, "step": 574 }, { "epoch": 0.26837806301050177, "grad_norm": 0.515625, "learning_rate": 0.0001996565333733914, "loss": 0.7635, "step": 575 }, { "epoch": 0.26884480746791134, "grad_norm": 0.53125, "learning_rate": 0.000199655317185933, "loss": 0.8282, "step": 576 }, { "epoch": 0.2693115519253209, "grad_norm": 0.52734375, "learning_rate": 0.00019965409885278507, "loss": 0.7184, "step": 577 }, { "epoch": 0.26977829638273043, "grad_norm": 0.53515625, "learning_rate": 0.00019965287837397385, "loss": 0.7891, "step": 578 }, { "epoch": 0.27024504084014, "grad_norm": 0.59765625, "learning_rate": 0.00019965165574952555, "loss": 0.8856, "step": 579 }, { "epoch": 0.2707117852975496, "grad_norm": 0.5078125, "learning_rate": 0.00019965043097946658, "loss": 0.7419, "step": 580 }, { "epoch": 0.27117852975495915, "grad_norm": 0.482421875, "learning_rate": 0.00019964920406382322, "loss": 0.6847, "step": 581 }, { "epoch": 0.2716452742123687, "grad_norm": 0.5078125, "learning_rate": 0.00019964797500262197, "loss": 0.719, "step": 582 }, { "epoch": 0.2721120186697783, "grad_norm": 0.578125, "learning_rate": 0.00019964674379588927, "loss": 0.8563, "step": 583 }, { "epoch": 0.2725787631271879, "grad_norm": 0.5703125, "learning_rate": 0.00019964551044365157, "loss": 0.8969, "step": 584 }, { "epoch": 0.27304550758459745, "grad_norm": 0.53125, "learning_rate": 0.00019964427494593553, "loss": 0.8194, "step": 585 }, { "epoch": 0.273512252042007, "grad_norm": 0.54296875, "learning_rate": 0.00019964303730276764, "loss": 0.8075, "step": 586 }, { "epoch": 0.2739789964994166, "grad_norm": 0.474609375, "learning_rate": 0.00019964179751417462, "loss": 0.7459, "step": 587 }, { "epoch": 0.2744457409568261, "grad_norm": 0.515625, "learning_rate": 0.00019964055558018313, "loss": 0.7756, "step": 588 }, { "epoch": 0.2749124854142357, "grad_norm": 0.5859375, "learning_rate": 0.00019963931150081995, "loss": 0.8687, "step": 589 }, { "epoch": 0.27537922987164526, "grad_norm": 0.53125, "learning_rate": 0.00019963806527611183, "loss": 0.785, "step": 590 }, { "epoch": 0.27584597432905483, "grad_norm": 0.55859375, "learning_rate": 0.0001996368169060856, "loss": 0.8049, "step": 591 }, { "epoch": 0.2763127187864644, "grad_norm": 0.51953125, "learning_rate": 0.00019963556639076817, "loss": 0.7738, "step": 592 }, { "epoch": 0.276779463243874, "grad_norm": 0.61328125, "learning_rate": 0.00019963431373018646, "loss": 0.8263, "step": 593 }, { "epoch": 0.27724620770128355, "grad_norm": 0.484375, "learning_rate": 0.00019963305892436742, "loss": 0.6784, "step": 594 }, { "epoch": 0.2777129521586931, "grad_norm": 0.515625, "learning_rate": 0.00019963180197333805, "loss": 0.7123, "step": 595 }, { "epoch": 0.2781796966161027, "grad_norm": 0.546875, "learning_rate": 0.00019963054287712545, "loss": 0.7788, "step": 596 }, { "epoch": 0.2786464410735123, "grad_norm": 0.55859375, "learning_rate": 0.00019962928163575676, "loss": 0.8286, "step": 597 }, { "epoch": 0.27911318553092185, "grad_norm": 0.6328125, "learning_rate": 0.00019962801824925904, "loss": 0.7915, "step": 598 }, { "epoch": 0.27957992998833137, "grad_norm": 0.59375, "learning_rate": 0.0001996267527176596, "loss": 0.7893, "step": 599 }, { "epoch": 0.28004667444574094, "grad_norm": 0.70703125, "learning_rate": 0.0001996254850409856, "loss": 0.9239, "step": 600 }, { "epoch": 0.2805134189031505, "grad_norm": 0.70703125, "learning_rate": 0.0001996242152192644, "loss": 0.8571, "step": 601 }, { "epoch": 0.2809801633605601, "grad_norm": 0.484375, "learning_rate": 0.00019962294325252328, "loss": 0.7047, "step": 602 }, { "epoch": 0.28144690781796966, "grad_norm": 0.5078125, "learning_rate": 0.00019962166914078968, "loss": 0.6812, "step": 603 }, { "epoch": 0.28191365227537923, "grad_norm": 0.51953125, "learning_rate": 0.000199620392884091, "loss": 0.7156, "step": 604 }, { "epoch": 0.2823803967327888, "grad_norm": 0.55078125, "learning_rate": 0.00019961911448245474, "loss": 0.76, "step": 605 }, { "epoch": 0.2828471411901984, "grad_norm": 0.51953125, "learning_rate": 0.00019961783393590842, "loss": 0.7147, "step": 606 }, { "epoch": 0.28331388564760795, "grad_norm": 0.55859375, "learning_rate": 0.0001996165512444796, "loss": 0.7387, "step": 607 }, { "epoch": 0.28378063010501753, "grad_norm": 0.546875, "learning_rate": 0.00019961526640819587, "loss": 0.715, "step": 608 }, { "epoch": 0.28424737456242705, "grad_norm": 0.64453125, "learning_rate": 0.000199613979427085, "loss": 0.8067, "step": 609 }, { "epoch": 0.2847141190198366, "grad_norm": 0.65234375, "learning_rate": 0.00019961269030117457, "loss": 0.808, "step": 610 }, { "epoch": 0.2851808634772462, "grad_norm": 0.6171875, "learning_rate": 0.00019961139903049242, "loss": 0.7604, "step": 611 }, { "epoch": 0.28564760793465577, "grad_norm": 0.66015625, "learning_rate": 0.00019961010561506633, "loss": 0.7635, "step": 612 }, { "epoch": 0.28611435239206534, "grad_norm": 0.58203125, "learning_rate": 0.00019960881005492414, "loss": 0.7656, "step": 613 }, { "epoch": 0.2865810968494749, "grad_norm": 0.59375, "learning_rate": 0.00019960751235009375, "loss": 0.7342, "step": 614 }, { "epoch": 0.2870478413068845, "grad_norm": 0.59765625, "learning_rate": 0.00019960621250060312, "loss": 0.8269, "step": 615 }, { "epoch": 0.28751458576429406, "grad_norm": 0.55078125, "learning_rate": 0.0001996049105064802, "loss": 0.7303, "step": 616 }, { "epoch": 0.28798133022170364, "grad_norm": 0.5546875, "learning_rate": 0.00019960360636775304, "loss": 0.71, "step": 617 }, { "epoch": 0.2884480746791132, "grad_norm": 0.6640625, "learning_rate": 0.00019960230008444973, "loss": 0.9302, "step": 618 }, { "epoch": 0.2889148191365227, "grad_norm": 0.58984375, "learning_rate": 0.0001996009916565984, "loss": 0.7775, "step": 619 }, { "epoch": 0.2893815635939323, "grad_norm": 0.6328125, "learning_rate": 0.0001995996810842272, "loss": 0.8243, "step": 620 }, { "epoch": 0.2898483080513419, "grad_norm": 0.59765625, "learning_rate": 0.00019959836836736433, "loss": 0.7412, "step": 621 }, { "epoch": 0.29031505250875145, "grad_norm": 0.50390625, "learning_rate": 0.00019959705350603812, "loss": 0.65, "step": 622 }, { "epoch": 0.290781796966161, "grad_norm": 0.5859375, "learning_rate": 0.00019959573650027687, "loss": 0.77, "step": 623 }, { "epoch": 0.2912485414235706, "grad_norm": 0.57421875, "learning_rate": 0.00019959441735010883, "loss": 0.7805, "step": 624 }, { "epoch": 0.29171528588098017, "grad_norm": 0.58203125, "learning_rate": 0.00019959309605556256, "loss": 0.7656, "step": 625 }, { "epoch": 0.29218203033838974, "grad_norm": 0.58203125, "learning_rate": 0.00019959177261666638, "loss": 0.8074, "step": 626 }, { "epoch": 0.2926487747957993, "grad_norm": 0.60546875, "learning_rate": 0.00019959044703344886, "loss": 0.8004, "step": 627 }, { "epoch": 0.2931155192532089, "grad_norm": 0.55859375, "learning_rate": 0.00019958911930593854, "loss": 0.7589, "step": 628 }, { "epoch": 0.29358226371061846, "grad_norm": 0.54296875, "learning_rate": 0.00019958778943416394, "loss": 0.7345, "step": 629 }, { "epoch": 0.294049008168028, "grad_norm": 0.5078125, "learning_rate": 0.00019958645741815375, "loss": 0.7486, "step": 630 }, { "epoch": 0.29451575262543755, "grad_norm": 0.58984375, "learning_rate": 0.00019958512325793666, "loss": 0.7811, "step": 631 }, { "epoch": 0.29498249708284713, "grad_norm": 0.546875, "learning_rate": 0.00019958378695354134, "loss": 0.713, "step": 632 }, { "epoch": 0.2954492415402567, "grad_norm": 0.51953125, "learning_rate": 0.00019958244850499664, "loss": 0.7191, "step": 633 }, { "epoch": 0.2959159859976663, "grad_norm": 0.6328125, "learning_rate": 0.0001995811079123313, "loss": 0.8741, "step": 634 }, { "epoch": 0.29638273045507585, "grad_norm": 0.5625, "learning_rate": 0.00019957976517557424, "loss": 0.7818, "step": 635 }, { "epoch": 0.2968494749124854, "grad_norm": 0.54296875, "learning_rate": 0.00019957842029475433, "loss": 0.6942, "step": 636 }, { "epoch": 0.297316219369895, "grad_norm": 0.57421875, "learning_rate": 0.00019957707326990058, "loss": 0.7754, "step": 637 }, { "epoch": 0.29778296382730457, "grad_norm": 0.62109375, "learning_rate": 0.00019957572410104192, "loss": 0.7771, "step": 638 }, { "epoch": 0.29824970828471414, "grad_norm": 0.5703125, "learning_rate": 0.00019957437278820747, "loss": 0.74, "step": 639 }, { "epoch": 0.29871645274212366, "grad_norm": 0.55859375, "learning_rate": 0.00019957301933142628, "loss": 0.7339, "step": 640 }, { "epoch": 0.29918319719953324, "grad_norm": 0.625, "learning_rate": 0.0001995716637307275, "loss": 0.874, "step": 641 }, { "epoch": 0.2996499416569428, "grad_norm": 0.58984375, "learning_rate": 0.00019957030598614033, "loss": 0.8613, "step": 642 }, { "epoch": 0.3001166861143524, "grad_norm": 0.5546875, "learning_rate": 0.00019956894609769401, "loss": 0.7766, "step": 643 }, { "epoch": 0.30058343057176196, "grad_norm": 0.5859375, "learning_rate": 0.00019956758406541777, "loss": 0.8244, "step": 644 }, { "epoch": 0.30105017502917153, "grad_norm": 0.546875, "learning_rate": 0.000199566219889341, "loss": 0.804, "step": 645 }, { "epoch": 0.3015169194865811, "grad_norm": 0.58203125, "learning_rate": 0.00019956485356949306, "loss": 0.8804, "step": 646 }, { "epoch": 0.3019836639439907, "grad_norm": 0.486328125, "learning_rate": 0.00019956348510590335, "loss": 0.7264, "step": 647 }, { "epoch": 0.30245040840140025, "grad_norm": 0.6640625, "learning_rate": 0.00019956211449860133, "loss": 0.9112, "step": 648 }, { "epoch": 0.3029171528588098, "grad_norm": 0.5234375, "learning_rate": 0.0001995607417476165, "loss": 0.6976, "step": 649 }, { "epoch": 0.3033838973162194, "grad_norm": 0.53125, "learning_rate": 0.00019955936685297845, "loss": 0.685, "step": 650 }, { "epoch": 0.3038506417736289, "grad_norm": 0.55859375, "learning_rate": 0.00019955798981471676, "loss": 0.8505, "step": 651 }, { "epoch": 0.3043173862310385, "grad_norm": 0.58984375, "learning_rate": 0.0001995566106328611, "loss": 0.8052, "step": 652 }, { "epoch": 0.30478413068844806, "grad_norm": 0.5546875, "learning_rate": 0.00019955522930744114, "loss": 0.7442, "step": 653 }, { "epoch": 0.30525087514585764, "grad_norm": 0.57421875, "learning_rate": 0.00019955384583848665, "loss": 0.7123, "step": 654 }, { "epoch": 0.3057176196032672, "grad_norm": 0.55078125, "learning_rate": 0.00019955246022602738, "loss": 0.7227, "step": 655 }, { "epoch": 0.3061843640606768, "grad_norm": 0.5625, "learning_rate": 0.00019955107247009323, "loss": 0.724, "step": 656 }, { "epoch": 0.30665110851808636, "grad_norm": 0.7109375, "learning_rate": 0.000199549682570714, "loss": 0.7437, "step": 657 }, { "epoch": 0.30711785297549593, "grad_norm": 0.57421875, "learning_rate": 0.00019954829052791967, "loss": 0.7436, "step": 658 }, { "epoch": 0.3075845974329055, "grad_norm": 0.6171875, "learning_rate": 0.0001995468963417402, "loss": 0.7072, "step": 659 }, { "epoch": 0.3080513418903151, "grad_norm": 0.69921875, "learning_rate": 0.00019954550001220555, "loss": 0.9228, "step": 660 }, { "epoch": 0.3085180863477246, "grad_norm": 0.65625, "learning_rate": 0.00019954410153934587, "loss": 0.8562, "step": 661 }, { "epoch": 0.30898483080513417, "grad_norm": 0.57421875, "learning_rate": 0.00019954270092319124, "loss": 0.7602, "step": 662 }, { "epoch": 0.30945157526254374, "grad_norm": 0.5859375, "learning_rate": 0.00019954129816377182, "loss": 0.7952, "step": 663 }, { "epoch": 0.3099183197199533, "grad_norm": 0.5390625, "learning_rate": 0.00019953989326111779, "loss": 0.7912, "step": 664 }, { "epoch": 0.3103850641773629, "grad_norm": 0.51953125, "learning_rate": 0.00019953848621525943, "loss": 0.6111, "step": 665 }, { "epoch": 0.31085180863477246, "grad_norm": 0.57421875, "learning_rate": 0.00019953707702622704, "loss": 0.8098, "step": 666 }, { "epoch": 0.31131855309218204, "grad_norm": 0.63671875, "learning_rate": 0.00019953566569405094, "loss": 0.8148, "step": 667 }, { "epoch": 0.3117852975495916, "grad_norm": 0.62890625, "learning_rate": 0.0001995342522187615, "loss": 0.7623, "step": 668 }, { "epoch": 0.3122520420070012, "grad_norm": 0.578125, "learning_rate": 0.00019953283660038916, "loss": 0.7721, "step": 669 }, { "epoch": 0.31271878646441076, "grad_norm": 0.546875, "learning_rate": 0.00019953141883896446, "loss": 0.711, "step": 670 }, { "epoch": 0.3131855309218203, "grad_norm": 0.5625, "learning_rate": 0.00019952999893451786, "loss": 0.6888, "step": 671 }, { "epoch": 0.31365227537922985, "grad_norm": 0.703125, "learning_rate": 0.0001995285768870799, "loss": 0.7593, "step": 672 }, { "epoch": 0.3141190198366394, "grad_norm": 0.640625, "learning_rate": 0.00019952715269668132, "loss": 0.7696, "step": 673 }, { "epoch": 0.314585764294049, "grad_norm": 0.58984375, "learning_rate": 0.0001995257263633527, "loss": 0.7612, "step": 674 }, { "epoch": 0.31505250875145857, "grad_norm": 0.53125, "learning_rate": 0.00019952429788712474, "loss": 0.6959, "step": 675 }, { "epoch": 0.31551925320886814, "grad_norm": 0.5546875, "learning_rate": 0.00019952286726802827, "loss": 0.7807, "step": 676 }, { "epoch": 0.3159859976662777, "grad_norm": 0.5625, "learning_rate": 0.00019952143450609403, "loss": 0.7659, "step": 677 }, { "epoch": 0.3164527421236873, "grad_norm": 0.490234375, "learning_rate": 0.00019951999960135286, "loss": 0.69, "step": 678 }, { "epoch": 0.31691948658109687, "grad_norm": 0.5390625, "learning_rate": 0.00019951856255383566, "loss": 0.742, "step": 679 }, { "epoch": 0.31738623103850644, "grad_norm": 0.52734375, "learning_rate": 0.00019951712336357345, "loss": 0.7583, "step": 680 }, { "epoch": 0.317852975495916, "grad_norm": 0.52734375, "learning_rate": 0.0001995156820305971, "loss": 0.7252, "step": 681 }, { "epoch": 0.31831971995332553, "grad_norm": 0.5625, "learning_rate": 0.00019951423855493774, "loss": 0.746, "step": 682 }, { "epoch": 0.3187864644107351, "grad_norm": 0.55078125, "learning_rate": 0.0001995127929366264, "loss": 0.7351, "step": 683 }, { "epoch": 0.3192532088681447, "grad_norm": 0.5390625, "learning_rate": 0.00019951134517569421, "loss": 0.6398, "step": 684 }, { "epoch": 0.31971995332555425, "grad_norm": 0.5078125, "learning_rate": 0.00019950989527217237, "loss": 0.5848, "step": 685 }, { "epoch": 0.3201866977829638, "grad_norm": 0.59375, "learning_rate": 0.00019950844322609205, "loss": 0.6832, "step": 686 }, { "epoch": 0.3206534422403734, "grad_norm": 0.75, "learning_rate": 0.00019950698903748454, "loss": 0.8341, "step": 687 }, { "epoch": 0.32112018669778297, "grad_norm": 0.609375, "learning_rate": 0.0001995055327063812, "loss": 0.6931, "step": 688 }, { "epoch": 0.32158693115519255, "grad_norm": 0.6328125, "learning_rate": 0.0001995040742328133, "loss": 0.7179, "step": 689 }, { "epoch": 0.3220536756126021, "grad_norm": 0.64453125, "learning_rate": 0.00019950261361681228, "loss": 0.7363, "step": 690 }, { "epoch": 0.3225204200700117, "grad_norm": 0.6484375, "learning_rate": 0.00019950115085840957, "loss": 0.7609, "step": 691 }, { "epoch": 0.3229871645274212, "grad_norm": 0.625, "learning_rate": 0.0001994996859576367, "loss": 0.776, "step": 692 }, { "epoch": 0.3234539089848308, "grad_norm": 0.58203125, "learning_rate": 0.00019949821891452523, "loss": 0.7253, "step": 693 }, { "epoch": 0.32392065344224036, "grad_norm": 0.61328125, "learning_rate": 0.00019949674972910667, "loss": 0.7126, "step": 694 }, { "epoch": 0.32438739789964993, "grad_norm": 0.55078125, "learning_rate": 0.0001994952784014127, "loss": 0.7645, "step": 695 }, { "epoch": 0.3248541423570595, "grad_norm": 0.60546875, "learning_rate": 0.000199493804931475, "loss": 0.7993, "step": 696 }, { "epoch": 0.3253208868144691, "grad_norm": 0.54296875, "learning_rate": 0.0001994923293193253, "loss": 0.6651, "step": 697 }, { "epoch": 0.32578763127187865, "grad_norm": 0.58984375, "learning_rate": 0.00019949085156499533, "loss": 0.8008, "step": 698 }, { "epoch": 0.3262543757292882, "grad_norm": 0.59375, "learning_rate": 0.00019948937166851697, "loss": 0.7117, "step": 699 }, { "epoch": 0.3267211201866978, "grad_norm": 0.56640625, "learning_rate": 0.00019948788962992204, "loss": 0.7917, "step": 700 }, { "epoch": 0.3271878646441074, "grad_norm": 0.55078125, "learning_rate": 0.00019948640544924245, "loss": 0.6921, "step": 701 }, { "epoch": 0.32765460910151695, "grad_norm": 0.6171875, "learning_rate": 0.0001994849191265102, "loss": 0.7896, "step": 702 }, { "epoch": 0.32812135355892647, "grad_norm": 0.58984375, "learning_rate": 0.00019948343066175723, "loss": 0.7888, "step": 703 }, { "epoch": 0.32858809801633604, "grad_norm": 0.58203125, "learning_rate": 0.00019948194005501562, "loss": 0.7162, "step": 704 }, { "epoch": 0.3290548424737456, "grad_norm": 0.58203125, "learning_rate": 0.0001994804473063175, "loss": 0.7838, "step": 705 }, { "epoch": 0.3295215869311552, "grad_norm": 0.59765625, "learning_rate": 0.00019947895241569495, "loss": 0.7563, "step": 706 }, { "epoch": 0.32998833138856476, "grad_norm": 0.486328125, "learning_rate": 0.00019947745538318018, "loss": 0.556, "step": 707 }, { "epoch": 0.33045507584597433, "grad_norm": 0.5390625, "learning_rate": 0.00019947595620880542, "loss": 0.6932, "step": 708 }, { "epoch": 0.3309218203033839, "grad_norm": 0.61328125, "learning_rate": 0.00019947445489260296, "loss": 0.8337, "step": 709 }, { "epoch": 0.3313885647607935, "grad_norm": 0.57421875, "learning_rate": 0.0001994729514346051, "loss": 0.7472, "step": 710 }, { "epoch": 0.33185530921820305, "grad_norm": 0.56640625, "learning_rate": 0.00019947144583484425, "loss": 0.7849, "step": 711 }, { "epoch": 0.3323220536756126, "grad_norm": 0.59375, "learning_rate": 0.0001994699380933528, "loss": 0.8254, "step": 712 }, { "epoch": 0.33278879813302215, "grad_norm": 0.55078125, "learning_rate": 0.00019946842821016322, "loss": 0.7681, "step": 713 }, { "epoch": 0.3332555425904317, "grad_norm": 0.5234375, "learning_rate": 0.00019946691618530804, "loss": 0.7437, "step": 714 }, { "epoch": 0.3337222870478413, "grad_norm": 0.58203125, "learning_rate": 0.00019946540201881975, "loss": 0.783, "step": 715 }, { "epoch": 0.33418903150525087, "grad_norm": 0.55859375, "learning_rate": 0.00019946388571073103, "loss": 0.7149, "step": 716 }, { "epoch": 0.33465577596266044, "grad_norm": 0.4921875, "learning_rate": 0.00019946236726107447, "loss": 0.6135, "step": 717 }, { "epoch": 0.33512252042007, "grad_norm": 0.6328125, "learning_rate": 0.0001994608466698828, "loss": 0.705, "step": 718 }, { "epoch": 0.3355892648774796, "grad_norm": 0.70703125, "learning_rate": 0.00019945932393718873, "loss": 0.7772, "step": 719 }, { "epoch": 0.33605600933488916, "grad_norm": 0.62109375, "learning_rate": 0.00019945779906302508, "loss": 0.7351, "step": 720 }, { "epoch": 0.33652275379229873, "grad_norm": 0.67578125, "learning_rate": 0.00019945627204742466, "loss": 0.8505, "step": 721 }, { "epoch": 0.3369894982497083, "grad_norm": 0.61328125, "learning_rate": 0.00019945474289042035, "loss": 0.7008, "step": 722 }, { "epoch": 0.3374562427071178, "grad_norm": 0.6171875, "learning_rate": 0.00019945321159204512, "loss": 0.7036, "step": 723 }, { "epoch": 0.3379229871645274, "grad_norm": 0.63671875, "learning_rate": 0.00019945167815233186, "loss": 0.8273, "step": 724 }, { "epoch": 0.338389731621937, "grad_norm": 0.5703125, "learning_rate": 0.00019945014257131364, "loss": 0.6616, "step": 725 }, { "epoch": 0.33885647607934655, "grad_norm": 0.5703125, "learning_rate": 0.00019944860484902348, "loss": 0.6831, "step": 726 }, { "epoch": 0.3393232205367561, "grad_norm": 0.5390625, "learning_rate": 0.00019944706498549453, "loss": 0.7227, "step": 727 }, { "epoch": 0.3397899649941657, "grad_norm": 0.54296875, "learning_rate": 0.00019944552298075996, "loss": 0.7427, "step": 728 }, { "epoch": 0.34025670945157527, "grad_norm": 0.6796875, "learning_rate": 0.0001994439788348529, "loss": 0.8514, "step": 729 }, { "epoch": 0.34072345390898484, "grad_norm": 0.54296875, "learning_rate": 0.0001994424325478067, "loss": 0.6433, "step": 730 }, { "epoch": 0.3411901983663944, "grad_norm": 0.57421875, "learning_rate": 0.00019944088411965452, "loss": 0.78, "step": 731 }, { "epoch": 0.341656942823804, "grad_norm": 0.625, "learning_rate": 0.00019943933355042983, "loss": 0.8557, "step": 732 }, { "epoch": 0.34212368728121356, "grad_norm": 0.6171875, "learning_rate": 0.00019943778084016594, "loss": 0.7974, "step": 733 }, { "epoch": 0.3425904317386231, "grad_norm": 0.60546875, "learning_rate": 0.00019943622598889632, "loss": 0.7358, "step": 734 }, { "epoch": 0.34305717619603265, "grad_norm": 0.640625, "learning_rate": 0.00019943466899665437, "loss": 0.8237, "step": 735 }, { "epoch": 0.3435239206534422, "grad_norm": 0.5546875, "learning_rate": 0.00019943310986347373, "loss": 0.6492, "step": 736 }, { "epoch": 0.3439906651108518, "grad_norm": 0.59375, "learning_rate": 0.0001994315485893879, "loss": 0.7173, "step": 737 }, { "epoch": 0.3444574095682614, "grad_norm": 0.61328125, "learning_rate": 0.00019942998517443047, "loss": 0.8168, "step": 738 }, { "epoch": 0.34492415402567095, "grad_norm": 0.58984375, "learning_rate": 0.0001994284196186352, "loss": 0.6831, "step": 739 }, { "epoch": 0.3453908984830805, "grad_norm": 0.515625, "learning_rate": 0.0001994268519220357, "loss": 0.6027, "step": 740 }, { "epoch": 0.3458576429404901, "grad_norm": 0.57421875, "learning_rate": 0.00019942528208466577, "loss": 0.6888, "step": 741 }, { "epoch": 0.34632438739789967, "grad_norm": 0.6796875, "learning_rate": 0.0001994237101065592, "loss": 0.771, "step": 742 }, { "epoch": 0.34679113185530924, "grad_norm": 0.6015625, "learning_rate": 0.00019942213598774983, "loss": 0.6988, "step": 743 }, { "epoch": 0.34725787631271876, "grad_norm": 0.6484375, "learning_rate": 0.00019942055972827158, "loss": 0.7537, "step": 744 }, { "epoch": 0.34772462077012833, "grad_norm": 0.6015625, "learning_rate": 0.00019941898132815836, "loss": 0.6787, "step": 745 }, { "epoch": 0.3481913652275379, "grad_norm": 0.75, "learning_rate": 0.00019941740078744419, "loss": 0.8621, "step": 746 }, { "epoch": 0.3486581096849475, "grad_norm": 0.62890625, "learning_rate": 0.00019941581810616304, "loss": 0.7556, "step": 747 }, { "epoch": 0.34912485414235706, "grad_norm": 0.57421875, "learning_rate": 0.00019941423328434903, "loss": 0.6841, "step": 748 }, { "epoch": 0.34959159859976663, "grad_norm": 0.67578125, "learning_rate": 0.00019941264632203628, "loss": 0.7186, "step": 749 }, { "epoch": 0.3500583430571762, "grad_norm": 0.64453125, "learning_rate": 0.00019941105721925896, "loss": 0.7302, "step": 750 }, { "epoch": 0.3505250875145858, "grad_norm": 0.625, "learning_rate": 0.00019940946597605126, "loss": 0.794, "step": 751 }, { "epoch": 0.35099183197199535, "grad_norm": 0.6171875, "learning_rate": 0.0001994078725924475, "loss": 0.724, "step": 752 }, { "epoch": 0.3514585764294049, "grad_norm": 0.5234375, "learning_rate": 0.0001994062770684819, "loss": 0.6629, "step": 753 }, { "epoch": 0.3519253208868145, "grad_norm": 0.59375, "learning_rate": 0.0001994046794041889, "loss": 0.7525, "step": 754 }, { "epoch": 0.352392065344224, "grad_norm": 0.65625, "learning_rate": 0.00019940307959960283, "loss": 0.7591, "step": 755 }, { "epoch": 0.3528588098016336, "grad_norm": 0.65234375, "learning_rate": 0.00019940147765475818, "loss": 0.8073, "step": 756 }, { "epoch": 0.35332555425904316, "grad_norm": 0.7109375, "learning_rate": 0.00019939987356968947, "loss": 0.8765, "step": 757 }, { "epoch": 0.35379229871645274, "grad_norm": 0.58203125, "learning_rate": 0.00019939826734443114, "loss": 0.6879, "step": 758 }, { "epoch": 0.3542590431738623, "grad_norm": 0.62890625, "learning_rate": 0.00019939665897901785, "loss": 0.7468, "step": 759 }, { "epoch": 0.3547257876312719, "grad_norm": 0.64453125, "learning_rate": 0.0001993950484734842, "loss": 0.7073, "step": 760 }, { "epoch": 0.35519253208868146, "grad_norm": 0.61328125, "learning_rate": 0.00019939343582786487, "loss": 0.7915, "step": 761 }, { "epoch": 0.35565927654609103, "grad_norm": 0.6484375, "learning_rate": 0.00019939182104219462, "loss": 0.7578, "step": 762 }, { "epoch": 0.3561260210035006, "grad_norm": 0.65234375, "learning_rate": 0.00019939020411650815, "loss": 0.7697, "step": 763 }, { "epoch": 0.3565927654609102, "grad_norm": 0.546875, "learning_rate": 0.00019938858505084032, "loss": 0.7542, "step": 764 }, { "epoch": 0.3570595099183197, "grad_norm": 0.65625, "learning_rate": 0.00019938696384522598, "loss": 0.8836, "step": 765 }, { "epoch": 0.35752625437572927, "grad_norm": 0.671875, "learning_rate": 0.00019938534049970005, "loss": 0.7789, "step": 766 }, { "epoch": 0.35799299883313884, "grad_norm": 0.55078125, "learning_rate": 0.00019938371501429747, "loss": 0.6261, "step": 767 }, { "epoch": 0.3584597432905484, "grad_norm": 0.65625, "learning_rate": 0.00019938208738905325, "loss": 0.7938, "step": 768 }, { "epoch": 0.358926487747958, "grad_norm": 0.578125, "learning_rate": 0.00019938045762400236, "loss": 0.659, "step": 769 }, { "epoch": 0.35939323220536756, "grad_norm": 0.59765625, "learning_rate": 0.00019937882571918002, "loss": 0.6463, "step": 770 }, { "epoch": 0.35985997666277714, "grad_norm": 0.6640625, "learning_rate": 0.00019937719167462126, "loss": 0.8015, "step": 771 }, { "epoch": 0.3603267211201867, "grad_norm": 0.57421875, "learning_rate": 0.00019937555549036132, "loss": 0.6138, "step": 772 }, { "epoch": 0.3607934655775963, "grad_norm": 0.6015625, "learning_rate": 0.0001993739171664354, "loss": 0.7856, "step": 773 }, { "epoch": 0.36126021003500586, "grad_norm": 0.61328125, "learning_rate": 0.0001993722767028788, "loss": 0.7126, "step": 774 }, { "epoch": 0.3617269544924154, "grad_norm": 0.82421875, "learning_rate": 0.00019937063409972684, "loss": 0.8082, "step": 775 }, { "epoch": 0.36219369894982495, "grad_norm": 0.6171875, "learning_rate": 0.00019936898935701486, "loss": 0.7173, "step": 776 }, { "epoch": 0.3626604434072345, "grad_norm": 0.6015625, "learning_rate": 0.00019936734247477826, "loss": 0.6554, "step": 777 }, { "epoch": 0.3631271878646441, "grad_norm": 0.73828125, "learning_rate": 0.00019936569345305256, "loss": 0.6865, "step": 778 }, { "epoch": 0.36359393232205367, "grad_norm": 0.63671875, "learning_rate": 0.00019936404229187323, "loss": 0.7052, "step": 779 }, { "epoch": 0.36406067677946324, "grad_norm": 0.6796875, "learning_rate": 0.0001993623889912758, "loss": 0.8104, "step": 780 }, { "epoch": 0.3645274212368728, "grad_norm": 0.58984375, "learning_rate": 0.0001993607335512959, "loss": 0.6942, "step": 781 }, { "epoch": 0.3649941656942824, "grad_norm": 0.5625, "learning_rate": 0.00019935907597196917, "loss": 0.638, "step": 782 }, { "epoch": 0.36546091015169196, "grad_norm": 0.65625, "learning_rate": 0.00019935741625333128, "loss": 0.8416, "step": 783 }, { "epoch": 0.36592765460910154, "grad_norm": 0.66015625, "learning_rate": 0.000199355754395418, "loss": 0.7029, "step": 784 }, { "epoch": 0.3663943990665111, "grad_norm": 0.66796875, "learning_rate": 0.0001993540903982651, "loss": 0.8415, "step": 785 }, { "epoch": 0.36686114352392063, "grad_norm": 0.69140625, "learning_rate": 0.00019935242426190837, "loss": 0.7105, "step": 786 }, { "epoch": 0.3673278879813302, "grad_norm": 0.6796875, "learning_rate": 0.00019935075598638373, "loss": 0.8521, "step": 787 }, { "epoch": 0.3677946324387398, "grad_norm": 0.6484375, "learning_rate": 0.00019934908557172704, "loss": 0.796, "step": 788 }, { "epoch": 0.36826137689614935, "grad_norm": 0.6484375, "learning_rate": 0.00019934741301797434, "loss": 0.6624, "step": 789 }, { "epoch": 0.3687281213535589, "grad_norm": 0.59765625, "learning_rate": 0.00019934573832516164, "loss": 0.7359, "step": 790 }, { "epoch": 0.3691948658109685, "grad_norm": 0.5859375, "learning_rate": 0.00019934406149332495, "loss": 0.5849, "step": 791 }, { "epoch": 0.36966161026837807, "grad_norm": 0.62890625, "learning_rate": 0.00019934238252250036, "loss": 0.7446, "step": 792 }, { "epoch": 0.37012835472578764, "grad_norm": 0.62109375, "learning_rate": 0.00019934070141272407, "loss": 0.6786, "step": 793 }, { "epoch": 0.3705950991831972, "grad_norm": 0.6328125, "learning_rate": 0.00019933901816403227, "loss": 0.7116, "step": 794 }, { "epoch": 0.3710618436406068, "grad_norm": 0.7109375, "learning_rate": 0.00019933733277646118, "loss": 0.7427, "step": 795 }, { "epoch": 0.3715285880980163, "grad_norm": 0.61328125, "learning_rate": 0.0001993356452500471, "loss": 0.6392, "step": 796 }, { "epoch": 0.3719953325554259, "grad_norm": 0.671875, "learning_rate": 0.00019933395558482638, "loss": 0.7705, "step": 797 }, { "epoch": 0.37246207701283546, "grad_norm": 0.640625, "learning_rate": 0.00019933226378083537, "loss": 0.7715, "step": 798 }, { "epoch": 0.37292882147024503, "grad_norm": 0.6484375, "learning_rate": 0.00019933056983811049, "loss": 0.6703, "step": 799 }, { "epoch": 0.3733955659276546, "grad_norm": 0.5703125, "learning_rate": 0.00019932887375668826, "loss": 0.6387, "step": 800 }, { "epoch": 0.3738623103850642, "grad_norm": 0.671875, "learning_rate": 0.00019932717553660515, "loss": 0.7242, "step": 801 }, { "epoch": 0.37432905484247375, "grad_norm": 0.6640625, "learning_rate": 0.00019932547517789777, "loss": 0.7549, "step": 802 }, { "epoch": 0.3747957992998833, "grad_norm": 0.8203125, "learning_rate": 0.00019932377268060266, "loss": 0.7823, "step": 803 }, { "epoch": 0.3752625437572929, "grad_norm": 0.6328125, "learning_rate": 0.00019932206804475658, "loss": 0.689, "step": 804 }, { "epoch": 0.3757292882147025, "grad_norm": 0.6640625, "learning_rate": 0.00019932036127039615, "loss": 0.7811, "step": 805 }, { "epoch": 0.37619603267211205, "grad_norm": 0.5859375, "learning_rate": 0.00019931865235755814, "loss": 0.5949, "step": 806 }, { "epoch": 0.37666277712952156, "grad_norm": 0.546875, "learning_rate": 0.00019931694130627934, "loss": 0.6188, "step": 807 }, { "epoch": 0.37712952158693114, "grad_norm": 0.66015625, "learning_rate": 0.00019931522811659661, "loss": 0.6804, "step": 808 }, { "epoch": 0.3775962660443407, "grad_norm": 0.6875, "learning_rate": 0.00019931351278854685, "loss": 0.6732, "step": 809 }, { "epoch": 0.3780630105017503, "grad_norm": 0.63671875, "learning_rate": 0.00019931179532216696, "loss": 0.6789, "step": 810 }, { "epoch": 0.37852975495915986, "grad_norm": 0.71875, "learning_rate": 0.0001993100757174939, "loss": 0.6879, "step": 811 }, { "epoch": 0.37899649941656943, "grad_norm": 0.6875, "learning_rate": 0.00019930835397456473, "loss": 0.6632, "step": 812 }, { "epoch": 0.379463243873979, "grad_norm": 0.56640625, "learning_rate": 0.00019930663009341652, "loss": 0.57, "step": 813 }, { "epoch": 0.3799299883313886, "grad_norm": 0.765625, "learning_rate": 0.0001993049040740864, "loss": 0.8101, "step": 814 }, { "epoch": 0.38039673278879815, "grad_norm": 0.73046875, "learning_rate": 0.0001993031759166115, "loss": 0.7654, "step": 815 }, { "epoch": 0.3808634772462077, "grad_norm": 0.70703125, "learning_rate": 0.00019930144562102904, "loss": 0.7981, "step": 816 }, { "epoch": 0.38133022170361724, "grad_norm": 0.63671875, "learning_rate": 0.0001992997131873763, "loss": 0.7095, "step": 817 }, { "epoch": 0.3817969661610268, "grad_norm": 0.58984375, "learning_rate": 0.00019929797861569052, "loss": 0.7036, "step": 818 }, { "epoch": 0.3822637106184364, "grad_norm": 0.60546875, "learning_rate": 0.0001992962419060091, "loss": 0.7372, "step": 819 }, { "epoch": 0.38273045507584597, "grad_norm": 0.56640625, "learning_rate": 0.00019929450305836946, "loss": 0.7175, "step": 820 }, { "epoch": 0.38319719953325554, "grad_norm": 0.5546875, "learning_rate": 0.00019929276207280894, "loss": 0.5719, "step": 821 }, { "epoch": 0.3836639439906651, "grad_norm": 0.55078125, "learning_rate": 0.00019929101894936512, "loss": 0.6493, "step": 822 }, { "epoch": 0.3841306884480747, "grad_norm": 0.61328125, "learning_rate": 0.0001992892736880755, "loss": 0.7137, "step": 823 }, { "epoch": 0.38459743290548426, "grad_norm": 0.7109375, "learning_rate": 0.00019928752628897765, "loss": 0.8295, "step": 824 }, { "epoch": 0.38506417736289383, "grad_norm": 0.609375, "learning_rate": 0.0001992857767521092, "loss": 0.7852, "step": 825 }, { "epoch": 0.3855309218203034, "grad_norm": 0.58984375, "learning_rate": 0.00019928402507750782, "loss": 0.6636, "step": 826 }, { "epoch": 0.3859976662777129, "grad_norm": 0.546875, "learning_rate": 0.0001992822712652112, "loss": 0.6628, "step": 827 }, { "epoch": 0.3864644107351225, "grad_norm": 0.71484375, "learning_rate": 0.00019928051531525713, "loss": 0.754, "step": 828 }, { "epoch": 0.3869311551925321, "grad_norm": 0.609375, "learning_rate": 0.00019927875722768344, "loss": 0.699, "step": 829 }, { "epoch": 0.38739789964994165, "grad_norm": 0.625, "learning_rate": 0.00019927699700252792, "loss": 0.7438, "step": 830 }, { "epoch": 0.3878646441073512, "grad_norm": 0.65625, "learning_rate": 0.00019927523463982851, "loss": 0.6645, "step": 831 }, { "epoch": 0.3883313885647608, "grad_norm": 0.59375, "learning_rate": 0.0001992734701396232, "loss": 0.7631, "step": 832 }, { "epoch": 0.38879813302217037, "grad_norm": 0.65234375, "learning_rate": 0.0001992717035019499, "loss": 0.734, "step": 833 }, { "epoch": 0.38926487747957994, "grad_norm": 0.6796875, "learning_rate": 0.00019926993472684664, "loss": 0.7883, "step": 834 }, { "epoch": 0.3897316219369895, "grad_norm": 0.7109375, "learning_rate": 0.00019926816381435157, "loss": 0.6986, "step": 835 }, { "epoch": 0.3901983663943991, "grad_norm": 0.71484375, "learning_rate": 0.00019926639076450279, "loss": 0.7749, "step": 836 }, { "epoch": 0.39066511085180866, "grad_norm": 0.640625, "learning_rate": 0.0001992646155773385, "loss": 0.7334, "step": 837 }, { "epoch": 0.3911318553092182, "grad_norm": 0.6875, "learning_rate": 0.00019926283825289688, "loss": 0.6619, "step": 838 }, { "epoch": 0.39159859976662775, "grad_norm": 0.63671875, "learning_rate": 0.00019926105879121623, "loss": 0.7252, "step": 839 }, { "epoch": 0.3920653442240373, "grad_norm": 0.76171875, "learning_rate": 0.00019925927719233487, "loss": 0.7034, "step": 840 }, { "epoch": 0.3925320886814469, "grad_norm": 0.86328125, "learning_rate": 0.0001992574934562911, "loss": 0.9161, "step": 841 }, { "epoch": 0.3929988331388565, "grad_norm": 0.72265625, "learning_rate": 0.0001992557075831234, "loss": 0.907, "step": 842 }, { "epoch": 0.39346557759626605, "grad_norm": 0.6640625, "learning_rate": 0.00019925391957287017, "loss": 0.7525, "step": 843 }, { "epoch": 0.3939323220536756, "grad_norm": 0.625, "learning_rate": 0.00019925212942556995, "loss": 0.7048, "step": 844 }, { "epoch": 0.3943990665110852, "grad_norm": 0.59765625, "learning_rate": 0.00019925033714126124, "loss": 0.6646, "step": 845 }, { "epoch": 0.39486581096849477, "grad_norm": 0.5625, "learning_rate": 0.00019924854271998266, "loss": 0.6329, "step": 846 }, { "epoch": 0.39533255542590434, "grad_norm": 0.59765625, "learning_rate": 0.00019924674616177281, "loss": 0.7151, "step": 847 }, { "epoch": 0.39579929988331386, "grad_norm": 0.62890625, "learning_rate": 0.00019924494746667045, "loss": 0.7011, "step": 848 }, { "epoch": 0.39626604434072343, "grad_norm": 0.703125, "learning_rate": 0.0001992431466347142, "loss": 0.7077, "step": 849 }, { "epoch": 0.396732788798133, "grad_norm": 0.65625, "learning_rate": 0.00019924134366594294, "loss": 0.7014, "step": 850 }, { "epoch": 0.3971995332555426, "grad_norm": 0.75, "learning_rate": 0.00019923953856039543, "loss": 0.7786, "step": 851 }, { "epoch": 0.39766627771295215, "grad_norm": 0.65625, "learning_rate": 0.00019923773131811054, "loss": 0.7086, "step": 852 }, { "epoch": 0.39813302217036173, "grad_norm": 0.67578125, "learning_rate": 0.0001992359219391272, "loss": 0.6755, "step": 853 }, { "epoch": 0.3985997666277713, "grad_norm": 0.66015625, "learning_rate": 0.00019923411042348433, "loss": 0.7066, "step": 854 }, { "epoch": 0.3990665110851809, "grad_norm": 0.74609375, "learning_rate": 0.00019923229677122097, "loss": 0.8076, "step": 855 }, { "epoch": 0.39953325554259045, "grad_norm": 0.75, "learning_rate": 0.0001992304809823762, "loss": 0.7037, "step": 856 }, { "epoch": 0.4, "grad_norm": 0.59375, "learning_rate": 0.000199228663056989, "loss": 0.6435, "step": 857 }, { "epoch": 0.4004667444574096, "grad_norm": 0.66015625, "learning_rate": 0.00019922684299509863, "loss": 0.7999, "step": 858 }, { "epoch": 0.4009334889148191, "grad_norm": 0.6015625, "learning_rate": 0.00019922502079674424, "loss": 0.6508, "step": 859 }, { "epoch": 0.4014002333722287, "grad_norm": 0.73046875, "learning_rate": 0.00019922319646196505, "loss": 0.768, "step": 860 }, { "epoch": 0.40186697782963826, "grad_norm": 0.75, "learning_rate": 0.00019922136999080036, "loss": 0.788, "step": 861 }, { "epoch": 0.40233372228704783, "grad_norm": 0.66015625, "learning_rate": 0.00019921954138328948, "loss": 0.7233, "step": 862 }, { "epoch": 0.4028004667444574, "grad_norm": 0.67578125, "learning_rate": 0.0001992177106394718, "loss": 0.7035, "step": 863 }, { "epoch": 0.403267211201867, "grad_norm": 0.76171875, "learning_rate": 0.0001992158777593867, "loss": 0.7538, "step": 864 }, { "epoch": 0.40373395565927656, "grad_norm": 0.59375, "learning_rate": 0.00019921404274307372, "loss": 0.5887, "step": 865 }, { "epoch": 0.40420070011668613, "grad_norm": 0.9375, "learning_rate": 0.00019921220559057225, "loss": 0.9041, "step": 866 }, { "epoch": 0.4046674445740957, "grad_norm": 0.72265625, "learning_rate": 0.000199210366301922, "loss": 0.7397, "step": 867 }, { "epoch": 0.4051341890315053, "grad_norm": 0.67578125, "learning_rate": 0.0001992085248771624, "loss": 0.7871, "step": 868 }, { "epoch": 0.4056009334889148, "grad_norm": 0.59375, "learning_rate": 0.00019920668131633322, "loss": 0.6565, "step": 869 }, { "epoch": 0.40606767794632437, "grad_norm": 0.65625, "learning_rate": 0.00019920483561947414, "loss": 0.7352, "step": 870 }, { "epoch": 0.40653442240373394, "grad_norm": 0.625, "learning_rate": 0.00019920298778662487, "loss": 0.6845, "step": 871 }, { "epoch": 0.4070011668611435, "grad_norm": 0.6875, "learning_rate": 0.0001992011378178252, "loss": 0.691, "step": 872 }, { "epoch": 0.4074679113185531, "grad_norm": 0.58203125, "learning_rate": 0.00019919928571311495, "loss": 0.6989, "step": 873 }, { "epoch": 0.40793465577596266, "grad_norm": 0.609375, "learning_rate": 0.00019919743147253405, "loss": 0.683, "step": 874 }, { "epoch": 0.40840140023337224, "grad_norm": 0.60546875, "learning_rate": 0.00019919557509612236, "loss": 0.5746, "step": 875 }, { "epoch": 0.4088681446907818, "grad_norm": 0.6328125, "learning_rate": 0.0001991937165839199, "loss": 0.7188, "step": 876 }, { "epoch": 0.4093348891481914, "grad_norm": 0.59765625, "learning_rate": 0.00019919185593596662, "loss": 0.5876, "step": 877 }, { "epoch": 0.40980163360560096, "grad_norm": 0.7109375, "learning_rate": 0.0001991899931523027, "loss": 0.7175, "step": 878 }, { "epoch": 0.4102683780630105, "grad_norm": 0.75390625, "learning_rate": 0.00019918812823296813, "loss": 0.6975, "step": 879 }, { "epoch": 0.41073512252042005, "grad_norm": 0.6875, "learning_rate": 0.0001991862611780031, "loss": 0.674, "step": 880 }, { "epoch": 0.4112018669778296, "grad_norm": 0.71484375, "learning_rate": 0.00019918439198744783, "loss": 0.6851, "step": 881 }, { "epoch": 0.4116686114352392, "grad_norm": 0.77734375, "learning_rate": 0.00019918252066134255, "loss": 0.7126, "step": 882 }, { "epoch": 0.41213535589264877, "grad_norm": 0.68359375, "learning_rate": 0.00019918064719972757, "loss": 0.6733, "step": 883 }, { "epoch": 0.41260210035005834, "grad_norm": 0.73046875, "learning_rate": 0.0001991787716026432, "loss": 0.5945, "step": 884 }, { "epoch": 0.4130688448074679, "grad_norm": 0.78515625, "learning_rate": 0.00019917689387012984, "loss": 0.7451, "step": 885 }, { "epoch": 0.4135355892648775, "grad_norm": 0.75390625, "learning_rate": 0.0001991750140022279, "loss": 0.8363, "step": 886 }, { "epoch": 0.41400233372228706, "grad_norm": 0.65234375, "learning_rate": 0.00019917313199897788, "loss": 0.6196, "step": 887 }, { "epoch": 0.41446907817969664, "grad_norm": 0.67578125, "learning_rate": 0.0001991712478604203, "loss": 0.6753, "step": 888 }, { "epoch": 0.4149358226371062, "grad_norm": 0.69140625, "learning_rate": 0.00019916936158659572, "loss": 0.7073, "step": 889 }, { "epoch": 0.41540256709451573, "grad_norm": 0.60546875, "learning_rate": 0.0001991674731775448, "loss": 0.6567, "step": 890 }, { "epoch": 0.4158693115519253, "grad_norm": 0.76171875, "learning_rate": 0.0001991655826333081, "loss": 0.73, "step": 891 }, { "epoch": 0.4163360560093349, "grad_norm": 0.74609375, "learning_rate": 0.00019916368995392638, "loss": 0.7652, "step": 892 }, { "epoch": 0.41680280046674445, "grad_norm": 0.6328125, "learning_rate": 0.0001991617951394404, "loss": 0.654, "step": 893 }, { "epoch": 0.417269544924154, "grad_norm": 0.71875, "learning_rate": 0.00019915989818989095, "loss": 0.6455, "step": 894 }, { "epoch": 0.4177362893815636, "grad_norm": 0.60546875, "learning_rate": 0.00019915799910531887, "loss": 0.6531, "step": 895 }, { "epoch": 0.41820303383897317, "grad_norm": 0.66796875, "learning_rate": 0.00019915609788576506, "loss": 0.6891, "step": 896 }, { "epoch": 0.41866977829638274, "grad_norm": 0.6640625, "learning_rate": 0.00019915419453127046, "loss": 0.5585, "step": 897 }, { "epoch": 0.4191365227537923, "grad_norm": 0.81640625, "learning_rate": 0.00019915228904187598, "loss": 0.6863, "step": 898 }, { "epoch": 0.4196032672112019, "grad_norm": 0.66796875, "learning_rate": 0.00019915038141762276, "loss": 0.7216, "step": 899 }, { "epoch": 0.4200700116686114, "grad_norm": 0.63671875, "learning_rate": 0.00019914847165855183, "loss": 0.6804, "step": 900 }, { "epoch": 0.420536756126021, "grad_norm": 0.5703125, "learning_rate": 0.00019914655976470424, "loss": 0.5742, "step": 901 }, { "epoch": 0.42100350058343056, "grad_norm": 0.58203125, "learning_rate": 0.00019914464573612126, "loss": 0.584, "step": 902 }, { "epoch": 0.42147024504084013, "grad_norm": 0.55859375, "learning_rate": 0.00019914272957284403, "loss": 0.6384, "step": 903 }, { "epoch": 0.4219369894982497, "grad_norm": 0.69921875, "learning_rate": 0.00019914081127491383, "loss": 0.6122, "step": 904 }, { "epoch": 0.4224037339556593, "grad_norm": 0.6953125, "learning_rate": 0.000199138890842372, "loss": 0.642, "step": 905 }, { "epoch": 0.42287047841306885, "grad_norm": 0.76171875, "learning_rate": 0.00019913696827525982, "loss": 0.637, "step": 906 }, { "epoch": 0.4233372228704784, "grad_norm": 0.62109375, "learning_rate": 0.00019913504357361875, "loss": 0.6106, "step": 907 }, { "epoch": 0.423803967327888, "grad_norm": 0.76953125, "learning_rate": 0.0001991331167374902, "loss": 0.7521, "step": 908 }, { "epoch": 0.42427071178529757, "grad_norm": 0.7890625, "learning_rate": 0.00019913118776691558, "loss": 0.7924, "step": 909 }, { "epoch": 0.42473745624270715, "grad_norm": 0.6015625, "learning_rate": 0.0001991292566619366, "loss": 0.6847, "step": 910 }, { "epoch": 0.42520420070011666, "grad_norm": 0.56640625, "learning_rate": 0.00019912732342259467, "loss": 0.5853, "step": 911 }, { "epoch": 0.42567094515752624, "grad_norm": 0.69921875, "learning_rate": 0.0001991253880489315, "loss": 0.7638, "step": 912 }, { "epoch": 0.4261376896149358, "grad_norm": 0.84375, "learning_rate": 0.00019912345054098876, "loss": 0.7482, "step": 913 }, { "epoch": 0.4266044340723454, "grad_norm": 0.63671875, "learning_rate": 0.00019912151089880812, "loss": 0.6919, "step": 914 }, { "epoch": 0.42707117852975496, "grad_norm": 0.609375, "learning_rate": 0.0001991195691224314, "loss": 0.6083, "step": 915 }, { "epoch": 0.42753792298716453, "grad_norm": 0.625, "learning_rate": 0.00019911762521190038, "loss": 0.6483, "step": 916 }, { "epoch": 0.4280046674445741, "grad_norm": 0.66015625, "learning_rate": 0.00019911567916725692, "loss": 0.6598, "step": 917 }, { "epoch": 0.4284714119019837, "grad_norm": 0.71875, "learning_rate": 0.0001991137309885429, "loss": 0.6888, "step": 918 }, { "epoch": 0.42893815635939325, "grad_norm": 0.6328125, "learning_rate": 0.00019911178067580026, "loss": 0.7007, "step": 919 }, { "epoch": 0.4294049008168028, "grad_norm": 0.6796875, "learning_rate": 0.00019910982822907107, "loss": 0.6844, "step": 920 }, { "epoch": 0.42987164527421234, "grad_norm": 0.74609375, "learning_rate": 0.0001991078736483973, "loss": 0.8368, "step": 921 }, { "epoch": 0.4303383897316219, "grad_norm": 0.6953125, "learning_rate": 0.00019910591693382104, "loss": 0.7621, "step": 922 }, { "epoch": 0.4308051341890315, "grad_norm": 0.8203125, "learning_rate": 0.00019910395808538442, "loss": 0.6159, "step": 923 }, { "epoch": 0.43127187864644106, "grad_norm": 0.63671875, "learning_rate": 0.00019910199710312964, "loss": 0.6782, "step": 924 }, { "epoch": 0.43173862310385064, "grad_norm": 0.56640625, "learning_rate": 0.00019910003398709888, "loss": 0.6109, "step": 925 }, { "epoch": 0.4322053675612602, "grad_norm": 0.6328125, "learning_rate": 0.00019909806873733447, "loss": 0.6611, "step": 926 }, { "epoch": 0.4326721120186698, "grad_norm": 0.625, "learning_rate": 0.00019909610135387867, "loss": 0.6061, "step": 927 }, { "epoch": 0.43313885647607936, "grad_norm": 0.6640625, "learning_rate": 0.00019909413183677387, "loss": 0.6799, "step": 928 }, { "epoch": 0.43360560093348893, "grad_norm": 0.74609375, "learning_rate": 0.00019909216018606244, "loss": 0.7896, "step": 929 }, { "epoch": 0.4340723453908985, "grad_norm": 0.609375, "learning_rate": 0.00019909018640178688, "loss": 0.6194, "step": 930 }, { "epoch": 0.434539089848308, "grad_norm": 0.640625, "learning_rate": 0.00019908821048398966, "loss": 0.6702, "step": 931 }, { "epoch": 0.4350058343057176, "grad_norm": 0.765625, "learning_rate": 0.0001990862324327133, "loss": 0.7371, "step": 932 }, { "epoch": 0.43547257876312717, "grad_norm": 0.62109375, "learning_rate": 0.00019908425224800043, "loss": 0.6706, "step": 933 }, { "epoch": 0.43593932322053675, "grad_norm": 0.65234375, "learning_rate": 0.0001990822699298937, "loss": 0.5697, "step": 934 }, { "epoch": 0.4364060676779463, "grad_norm": 0.703125, "learning_rate": 0.00019908028547843576, "loss": 0.6944, "step": 935 }, { "epoch": 0.4368728121353559, "grad_norm": 0.68359375, "learning_rate": 0.00019907829889366933, "loss": 0.6388, "step": 936 }, { "epoch": 0.43733955659276547, "grad_norm": 0.76171875, "learning_rate": 0.00019907631017563723, "loss": 0.7356, "step": 937 }, { "epoch": 0.43780630105017504, "grad_norm": 0.62109375, "learning_rate": 0.0001990743193243822, "loss": 0.6149, "step": 938 }, { "epoch": 0.4382730455075846, "grad_norm": 0.78515625, "learning_rate": 0.00019907232633994715, "loss": 0.7016, "step": 939 }, { "epoch": 0.4387397899649942, "grad_norm": 0.6328125, "learning_rate": 0.000199070331222375, "loss": 0.6267, "step": 940 }, { "epoch": 0.43920653442240376, "grad_norm": 0.6953125, "learning_rate": 0.0001990683339717087, "loss": 0.6472, "step": 941 }, { "epoch": 0.4396732788798133, "grad_norm": 0.66796875, "learning_rate": 0.00019906633458799126, "loss": 0.7162, "step": 942 }, { "epoch": 0.44014002333722285, "grad_norm": 0.55859375, "learning_rate": 0.00019906433307126573, "loss": 0.5808, "step": 943 }, { "epoch": 0.4406067677946324, "grad_norm": 0.75390625, "learning_rate": 0.00019906232942157515, "loss": 0.7619, "step": 944 }, { "epoch": 0.441073512252042, "grad_norm": 0.640625, "learning_rate": 0.00019906032363896275, "loss": 0.6483, "step": 945 }, { "epoch": 0.4415402567094516, "grad_norm": 0.703125, "learning_rate": 0.00019905831572347165, "loss": 0.6812, "step": 946 }, { "epoch": 0.44200700116686115, "grad_norm": 0.6875, "learning_rate": 0.00019905630567514513, "loss": 0.6345, "step": 947 }, { "epoch": 0.4424737456242707, "grad_norm": 0.66796875, "learning_rate": 0.0001990542934940264, "loss": 0.7125, "step": 948 }, { "epoch": 0.4429404900816803, "grad_norm": 0.6640625, "learning_rate": 0.00019905227918015888, "loss": 0.6523, "step": 949 }, { "epoch": 0.44340723453908987, "grad_norm": 0.671875, "learning_rate": 0.00019905026273358588, "loss": 0.6574, "step": 950 }, { "epoch": 0.44387397899649944, "grad_norm": 0.64453125, "learning_rate": 0.0001990482441543508, "loss": 0.611, "step": 951 }, { "epoch": 0.44434072345390896, "grad_norm": 0.6640625, "learning_rate": 0.00019904622344249712, "loss": 0.6512, "step": 952 }, { "epoch": 0.44480746791131853, "grad_norm": 0.90234375, "learning_rate": 0.00019904420059806838, "loss": 0.7811, "step": 953 }, { "epoch": 0.4452742123687281, "grad_norm": 0.7265625, "learning_rate": 0.0001990421756211081, "loss": 0.6213, "step": 954 }, { "epoch": 0.4457409568261377, "grad_norm": 0.7578125, "learning_rate": 0.00019904014851165988, "loss": 0.7291, "step": 955 }, { "epoch": 0.44620770128354725, "grad_norm": 0.72265625, "learning_rate": 0.00019903811926976742, "loss": 0.7575, "step": 956 }, { "epoch": 0.4466744457409568, "grad_norm": 0.59375, "learning_rate": 0.00019903608789547433, "loss": 0.6126, "step": 957 }, { "epoch": 0.4471411901983664, "grad_norm": 0.72265625, "learning_rate": 0.0001990340543888244, "loss": 0.8007, "step": 958 }, { "epoch": 0.447607934655776, "grad_norm": 0.62890625, "learning_rate": 0.00019903201874986137, "loss": 0.636, "step": 959 }, { "epoch": 0.44807467911318555, "grad_norm": 0.7421875, "learning_rate": 0.00019902998097862912, "loss": 0.7971, "step": 960 }, { "epoch": 0.4485414235705951, "grad_norm": 0.61328125, "learning_rate": 0.00019902794107517152, "loss": 0.5808, "step": 961 }, { "epoch": 0.4490081680280047, "grad_norm": 0.61328125, "learning_rate": 0.00019902589903953244, "loss": 0.6067, "step": 962 }, { "epoch": 0.4494749124854142, "grad_norm": 0.7734375, "learning_rate": 0.0001990238548717559, "loss": 0.7232, "step": 963 }, { "epoch": 0.4499416569428238, "grad_norm": 0.78125, "learning_rate": 0.00019902180857188587, "loss": 0.6778, "step": 964 }, { "epoch": 0.45040840140023336, "grad_norm": 0.6953125, "learning_rate": 0.00019901976013996647, "loss": 0.6538, "step": 965 }, { "epoch": 0.45087514585764293, "grad_norm": 0.7890625, "learning_rate": 0.00019901770957604179, "loss": 0.63, "step": 966 }, { "epoch": 0.4513418903150525, "grad_norm": 0.60546875, "learning_rate": 0.0001990156568801559, "loss": 0.5631, "step": 967 }, { "epoch": 0.4518086347724621, "grad_norm": 0.765625, "learning_rate": 0.00019901360205235312, "loss": 0.6583, "step": 968 }, { "epoch": 0.45227537922987165, "grad_norm": 0.78125, "learning_rate": 0.0001990115450926776, "loss": 0.6951, "step": 969 }, { "epoch": 0.45274212368728123, "grad_norm": 0.8515625, "learning_rate": 0.00019900948600117365, "loss": 0.8222, "step": 970 }, { "epoch": 0.4532088681446908, "grad_norm": 0.6484375, "learning_rate": 0.0001990074247778856, "loss": 0.6175, "step": 971 }, { "epoch": 0.4536756126021004, "grad_norm": 0.76171875, "learning_rate": 0.00019900536142285788, "loss": 0.7423, "step": 972 }, { "epoch": 0.4541423570595099, "grad_norm": 0.67578125, "learning_rate": 0.00019900329593613484, "loss": 0.6104, "step": 973 }, { "epoch": 0.45460910151691947, "grad_norm": 0.6015625, "learning_rate": 0.00019900122831776105, "loss": 0.6204, "step": 974 }, { "epoch": 0.45507584597432904, "grad_norm": 0.69921875, "learning_rate": 0.00019899915856778092, "loss": 0.65, "step": 975 }, { "epoch": 0.4555425904317386, "grad_norm": 0.7734375, "learning_rate": 0.0001989970866862391, "loss": 0.7242, "step": 976 }, { "epoch": 0.4560093348891482, "grad_norm": 0.578125, "learning_rate": 0.00019899501267318015, "loss": 0.613, "step": 977 }, { "epoch": 0.45647607934655776, "grad_norm": 0.62109375, "learning_rate": 0.00019899293652864873, "loss": 0.6448, "step": 978 }, { "epoch": 0.45694282380396734, "grad_norm": 0.68359375, "learning_rate": 0.00019899085825268956, "loss": 0.6587, "step": 979 }, { "epoch": 0.4574095682613769, "grad_norm": 0.66796875, "learning_rate": 0.00019898877784534736, "loss": 0.6648, "step": 980 }, { "epoch": 0.4578763127187865, "grad_norm": 0.65234375, "learning_rate": 0.000198986695306667, "loss": 0.624, "step": 981 }, { "epoch": 0.45834305717619606, "grad_norm": 0.76171875, "learning_rate": 0.0001989846106366932, "loss": 0.6782, "step": 982 }, { "epoch": 0.4588098016336056, "grad_norm": 0.6015625, "learning_rate": 0.00019898252383547093, "loss": 0.6257, "step": 983 }, { "epoch": 0.45927654609101515, "grad_norm": 0.6640625, "learning_rate": 0.0001989804349030451, "loss": 0.6897, "step": 984 }, { "epoch": 0.4597432905484247, "grad_norm": 0.65625, "learning_rate": 0.0001989783438394607, "loss": 0.6195, "step": 985 }, { "epoch": 0.4602100350058343, "grad_norm": 0.69140625, "learning_rate": 0.00019897625064476272, "loss": 0.6427, "step": 986 }, { "epoch": 0.46067677946324387, "grad_norm": 0.64453125, "learning_rate": 0.00019897415531899623, "loss": 0.6818, "step": 987 }, { "epoch": 0.46114352392065344, "grad_norm": 0.62109375, "learning_rate": 0.0001989720578622064, "loss": 0.6442, "step": 988 }, { "epoch": 0.461610268378063, "grad_norm": 0.64453125, "learning_rate": 0.00019896995827443833, "loss": 0.5443, "step": 989 }, { "epoch": 0.4620770128354726, "grad_norm": 0.6640625, "learning_rate": 0.00019896785655573726, "loss": 0.7202, "step": 990 }, { "epoch": 0.46254375729288216, "grad_norm": 0.765625, "learning_rate": 0.00019896575270614844, "loss": 0.7193, "step": 991 }, { "epoch": 0.46301050175029174, "grad_norm": 0.69921875, "learning_rate": 0.00019896364672571714, "loss": 0.692, "step": 992 }, { "epoch": 0.4634772462077013, "grad_norm": 0.75, "learning_rate": 0.00019896153861448875, "loss": 0.7826, "step": 993 }, { "epoch": 0.46394399066511083, "grad_norm": 0.5859375, "learning_rate": 0.0001989594283725086, "loss": 0.5714, "step": 994 }, { "epoch": 0.4644107351225204, "grad_norm": 0.6953125, "learning_rate": 0.0001989573159998222, "loss": 0.7018, "step": 995 }, { "epoch": 0.46487747957993, "grad_norm": 0.69921875, "learning_rate": 0.00019895520149647495, "loss": 0.6572, "step": 996 }, { "epoch": 0.46534422403733955, "grad_norm": 0.7109375, "learning_rate": 0.00019895308486251242, "loss": 0.7123, "step": 997 }, { "epoch": 0.4658109684947491, "grad_norm": 0.79296875, "learning_rate": 0.00019895096609798022, "loss": 0.5941, "step": 998 }, { "epoch": 0.4662777129521587, "grad_norm": 0.61328125, "learning_rate": 0.0001989488452029239, "loss": 0.5684, "step": 999 }, { "epoch": 0.46674445740956827, "grad_norm": 0.66015625, "learning_rate": 0.00019894672217738915, "loss": 0.6617, "step": 1000 }, { "epoch": 0.46721120186697784, "grad_norm": 0.6953125, "learning_rate": 0.00019894459702142175, "loss": 0.7052, "step": 1001 }, { "epoch": 0.4676779463243874, "grad_norm": 0.7109375, "learning_rate": 0.00019894246973506735, "loss": 0.6782, "step": 1002 }, { "epoch": 0.468144690781797, "grad_norm": 0.65234375, "learning_rate": 0.00019894034031837178, "loss": 0.6076, "step": 1003 }, { "epoch": 0.4686114352392065, "grad_norm": 0.70703125, "learning_rate": 0.00019893820877138095, "loss": 0.6817, "step": 1004 }, { "epoch": 0.4690781796966161, "grad_norm": 0.69921875, "learning_rate": 0.00019893607509414068, "loss": 0.6389, "step": 1005 }, { "epoch": 0.46954492415402566, "grad_norm": 0.7265625, "learning_rate": 0.00019893393928669694, "loss": 0.6889, "step": 1006 }, { "epoch": 0.47001166861143523, "grad_norm": 0.64453125, "learning_rate": 0.00019893180134909573, "loss": 0.4746, "step": 1007 }, { "epoch": 0.4704784130688448, "grad_norm": 0.6640625, "learning_rate": 0.0001989296612813831, "loss": 0.6132, "step": 1008 }, { "epoch": 0.4709451575262544, "grad_norm": 0.74609375, "learning_rate": 0.00019892751908360504, "loss": 0.6687, "step": 1009 }, { "epoch": 0.47141190198366395, "grad_norm": 0.68359375, "learning_rate": 0.00019892537475580774, "loss": 0.562, "step": 1010 }, { "epoch": 0.4718786464410735, "grad_norm": 0.6640625, "learning_rate": 0.0001989232282980374, "loss": 0.6405, "step": 1011 }, { "epoch": 0.4723453908984831, "grad_norm": 0.625, "learning_rate": 0.00019892107971034018, "loss": 0.5299, "step": 1012 }, { "epoch": 0.47281213535589267, "grad_norm": 0.76171875, "learning_rate": 0.00019891892899276235, "loss": 0.6425, "step": 1013 }, { "epoch": 0.47327887981330224, "grad_norm": 0.80859375, "learning_rate": 0.00019891677614535024, "loss": 0.7187, "step": 1014 }, { "epoch": 0.47374562427071176, "grad_norm": 0.79296875, "learning_rate": 0.0001989146211681502, "loss": 0.6603, "step": 1015 }, { "epoch": 0.47421236872812134, "grad_norm": 0.734375, "learning_rate": 0.00019891246406120858, "loss": 0.6571, "step": 1016 }, { "epoch": 0.4746791131855309, "grad_norm": 0.6640625, "learning_rate": 0.0001989103048245719, "loss": 0.6117, "step": 1017 }, { "epoch": 0.4751458576429405, "grad_norm": 0.6875, "learning_rate": 0.0001989081434582866, "loss": 0.6603, "step": 1018 }, { "epoch": 0.47561260210035006, "grad_norm": 0.7421875, "learning_rate": 0.0001989059799623992, "loss": 0.6893, "step": 1019 }, { "epoch": 0.47607934655775963, "grad_norm": 0.734375, "learning_rate": 0.00019890381433695638, "loss": 0.68, "step": 1020 }, { "epoch": 0.4765460910151692, "grad_norm": 0.66796875, "learning_rate": 0.00019890164658200465, "loss": 0.66, "step": 1021 }, { "epoch": 0.4770128354725788, "grad_norm": 0.640625, "learning_rate": 0.00019889947669759074, "loss": 0.6105, "step": 1022 }, { "epoch": 0.47747957992998835, "grad_norm": 0.68359375, "learning_rate": 0.0001988973046837614, "loss": 0.681, "step": 1023 }, { "epoch": 0.4779463243873979, "grad_norm": 0.6875, "learning_rate": 0.00019889513054056332, "loss": 0.6251, "step": 1024 }, { "epoch": 0.47841306884480744, "grad_norm": 0.71875, "learning_rate": 0.00019889295426804338, "loss": 0.6463, "step": 1025 }, { "epoch": 0.478879813302217, "grad_norm": 0.80078125, "learning_rate": 0.0001988907758662484, "loss": 0.7035, "step": 1026 }, { "epoch": 0.4793465577596266, "grad_norm": 0.6953125, "learning_rate": 0.0001988885953352253, "loss": 0.6776, "step": 1027 }, { "epoch": 0.47981330221703616, "grad_norm": 0.75, "learning_rate": 0.000198886412675021, "loss": 0.6199, "step": 1028 }, { "epoch": 0.48028004667444574, "grad_norm": 0.7734375, "learning_rate": 0.00019888422788568258, "loss": 0.6544, "step": 1029 }, { "epoch": 0.4807467911318553, "grad_norm": 0.765625, "learning_rate": 0.00019888204096725696, "loss": 0.7016, "step": 1030 }, { "epoch": 0.4812135355892649, "grad_norm": 0.75, "learning_rate": 0.0001988798519197913, "loss": 0.587, "step": 1031 }, { "epoch": 0.48168028004667446, "grad_norm": 0.6953125, "learning_rate": 0.00019887766074333274, "loss": 0.5552, "step": 1032 }, { "epoch": 0.48214702450408403, "grad_norm": 0.875, "learning_rate": 0.00019887546743792842, "loss": 0.7311, "step": 1033 }, { "epoch": 0.4826137689614936, "grad_norm": 0.75390625, "learning_rate": 0.0001988732720036256, "loss": 0.6107, "step": 1034 }, { "epoch": 0.4830805134189031, "grad_norm": 0.71484375, "learning_rate": 0.00019887107444047151, "loss": 0.5597, "step": 1035 }, { "epoch": 0.4835472578763127, "grad_norm": 0.71484375, "learning_rate": 0.00019886887474851347, "loss": 0.6131, "step": 1036 }, { "epoch": 0.48401400233372227, "grad_norm": 0.76953125, "learning_rate": 0.0001988666729277989, "loss": 0.6671, "step": 1037 }, { "epoch": 0.48448074679113184, "grad_norm": 0.76953125, "learning_rate": 0.00019886446897837514, "loss": 0.7221, "step": 1038 }, { "epoch": 0.4849474912485414, "grad_norm": 0.87109375, "learning_rate": 0.00019886226290028968, "loss": 0.7206, "step": 1039 }, { "epoch": 0.485414235705951, "grad_norm": 0.6875, "learning_rate": 0.00019886005469359002, "loss": 0.6285, "step": 1040 }, { "epoch": 0.48588098016336057, "grad_norm": 0.71875, "learning_rate": 0.0001988578443583237, "loss": 0.617, "step": 1041 }, { "epoch": 0.48634772462077014, "grad_norm": 0.73828125, "learning_rate": 0.00019885563189453827, "loss": 0.5879, "step": 1042 }, { "epoch": 0.4868144690781797, "grad_norm": 0.828125, "learning_rate": 0.00019885341730228144, "loss": 0.6629, "step": 1043 }, { "epoch": 0.4872812135355893, "grad_norm": 0.71484375, "learning_rate": 0.00019885120058160083, "loss": 0.5798, "step": 1044 }, { "epoch": 0.48774795799299886, "grad_norm": 0.796875, "learning_rate": 0.0001988489817325442, "loss": 0.7046, "step": 1045 }, { "epoch": 0.4882147024504084, "grad_norm": 0.80859375, "learning_rate": 0.00019884676075515936, "loss": 0.7055, "step": 1046 }, { "epoch": 0.48868144690781795, "grad_norm": 0.7109375, "learning_rate": 0.000198844537649494, "loss": 0.6404, "step": 1047 }, { "epoch": 0.4891481913652275, "grad_norm": 0.7578125, "learning_rate": 0.00019884231241559618, "loss": 0.6823, "step": 1048 }, { "epoch": 0.4896149358226371, "grad_norm": 0.64453125, "learning_rate": 0.00019884008505351367, "loss": 0.4446, "step": 1049 }, { "epoch": 0.49008168028004667, "grad_norm": 0.65234375, "learning_rate": 0.0001988378555632944, "loss": 0.5704, "step": 1050 }, { "epoch": 0.49054842473745625, "grad_norm": 0.765625, "learning_rate": 0.0001988356239449865, "loss": 0.6071, "step": 1051 }, { "epoch": 0.4910151691948658, "grad_norm": 0.84765625, "learning_rate": 0.00019883339019863793, "loss": 0.673, "step": 1052 }, { "epoch": 0.4914819136522754, "grad_norm": 0.78125, "learning_rate": 0.00019883115432429684, "loss": 0.6836, "step": 1053 }, { "epoch": 0.49194865810968497, "grad_norm": 0.68359375, "learning_rate": 0.00019882891632201134, "loss": 0.487, "step": 1054 }, { "epoch": 0.49241540256709454, "grad_norm": 0.765625, "learning_rate": 0.0001988266761918296, "loss": 0.6445, "step": 1055 }, { "epoch": 0.49288214702450406, "grad_norm": 0.64453125, "learning_rate": 0.00019882443393379987, "loss": 0.4923, "step": 1056 }, { "epoch": 0.49334889148191363, "grad_norm": 0.703125, "learning_rate": 0.00019882218954797045, "loss": 0.5783, "step": 1057 }, { "epoch": 0.4938156359393232, "grad_norm": 0.8359375, "learning_rate": 0.00019881994303438963, "loss": 0.687, "step": 1058 }, { "epoch": 0.4942823803967328, "grad_norm": 0.73828125, "learning_rate": 0.0001988176943931058, "loss": 0.6454, "step": 1059 }, { "epoch": 0.49474912485414235, "grad_norm": 0.8359375, "learning_rate": 0.00019881544362416735, "loss": 0.677, "step": 1060 }, { "epoch": 0.4952158693115519, "grad_norm": 0.6875, "learning_rate": 0.00019881319072762276, "loss": 0.6048, "step": 1061 }, { "epoch": 0.4956826137689615, "grad_norm": 0.7734375, "learning_rate": 0.00019881093570352055, "loss": 0.6567, "step": 1062 }, { "epoch": 0.4961493582263711, "grad_norm": 0.7890625, "learning_rate": 0.00019880867855190927, "loss": 0.7238, "step": 1063 }, { "epoch": 0.49661610268378065, "grad_norm": 0.7734375, "learning_rate": 0.00019880641927283752, "loss": 0.6651, "step": 1064 }, { "epoch": 0.4970828471411902, "grad_norm": 0.69921875, "learning_rate": 0.00019880415786635393, "loss": 0.6228, "step": 1065 }, { "epoch": 0.4975495915985998, "grad_norm": 0.76953125, "learning_rate": 0.0001988018943325072, "loss": 0.6091, "step": 1066 }, { "epoch": 0.4980163360560093, "grad_norm": 0.765625, "learning_rate": 0.00019879962867134604, "loss": 0.6545, "step": 1067 }, { "epoch": 0.4984830805134189, "grad_norm": 0.82421875, "learning_rate": 0.00019879736088291924, "loss": 0.8203, "step": 1068 }, { "epoch": 0.49894982497082846, "grad_norm": 0.72265625, "learning_rate": 0.0001987950909672757, "loss": 0.6723, "step": 1069 }, { "epoch": 0.49941656942823803, "grad_norm": 0.7578125, "learning_rate": 0.0001987928189244642, "loss": 0.6822, "step": 1070 }, { "epoch": 0.4998833138856476, "grad_norm": 0.6640625, "learning_rate": 0.0001987905447545337, "loss": 0.5764, "step": 1071 }, { "epoch": 0.5003500583430571, "grad_norm": 0.66015625, "learning_rate": 0.00019878826845753315, "loss": 0.6364, "step": 1072 }, { "epoch": 0.5003500583430571, "eval_loss": 1.0250318050384521, "eval_runtime": 94.4783, "eval_samples_per_second": 19.094, "eval_steps_per_second": 2.392, "step": 1072 }, { "epoch": 0.5008168028004667, "grad_norm": 0.65625, "learning_rate": 0.00019878599003351157, "loss": 0.5955, "step": 1073 }, { "epoch": 0.5012835472578763, "grad_norm": 0.69921875, "learning_rate": 0.00019878370948251805, "loss": 0.4872, "step": 1074 }, { "epoch": 0.5017502917152858, "grad_norm": 0.73046875, "learning_rate": 0.00019878142680460162, "loss": 0.5654, "step": 1075 }, { "epoch": 0.5022170361726954, "grad_norm": 0.64453125, "learning_rate": 0.00019877914199981152, "loss": 0.5411, "step": 1076 }, { "epoch": 0.502683780630105, "grad_norm": 0.7421875, "learning_rate": 0.00019877685506819687, "loss": 0.5922, "step": 1077 }, { "epoch": 0.5031505250875146, "grad_norm": 0.87109375, "learning_rate": 0.00019877456600980693, "loss": 0.6406, "step": 1078 }, { "epoch": 0.5036172695449241, "grad_norm": 0.765625, "learning_rate": 0.000198772274824691, "loss": 0.611, "step": 1079 }, { "epoch": 0.5040840140023337, "grad_norm": 0.7421875, "learning_rate": 0.00019876998151289842, "loss": 0.6001, "step": 1080 }, { "epoch": 0.5045507584597433, "grad_norm": 0.72265625, "learning_rate": 0.00019876768607447854, "loss": 0.5799, "step": 1081 }, { "epoch": 0.5050175029171529, "grad_norm": 0.875, "learning_rate": 0.00019876538850948077, "loss": 0.7036, "step": 1082 }, { "epoch": 0.5054842473745624, "grad_norm": 0.73828125, "learning_rate": 0.0001987630888179546, "loss": 0.6494, "step": 1083 }, { "epoch": 0.505950991831972, "grad_norm": 0.66796875, "learning_rate": 0.00019876078699994959, "loss": 0.5809, "step": 1084 }, { "epoch": 0.5064177362893816, "grad_norm": 0.7421875, "learning_rate": 0.00019875848305551524, "loss": 0.5221, "step": 1085 }, { "epoch": 0.5068844807467912, "grad_norm": 0.93359375, "learning_rate": 0.00019875617698470114, "loss": 0.6165, "step": 1086 }, { "epoch": 0.5073512252042007, "grad_norm": 0.81640625, "learning_rate": 0.00019875386878755703, "loss": 0.6447, "step": 1087 }, { "epoch": 0.5078179696616103, "grad_norm": 0.6796875, "learning_rate": 0.00019875155846413255, "loss": 0.6486, "step": 1088 }, { "epoch": 0.5082847141190199, "grad_norm": 0.75, "learning_rate": 0.00019874924601447744, "loss": 0.6323, "step": 1089 }, { "epoch": 0.5087514585764294, "grad_norm": 0.76171875, "learning_rate": 0.00019874693143864146, "loss": 0.5859, "step": 1090 }, { "epoch": 0.509218203033839, "grad_norm": 0.84375, "learning_rate": 0.00019874461473667454, "loss": 0.6746, "step": 1091 }, { "epoch": 0.5096849474912486, "grad_norm": 0.7890625, "learning_rate": 0.00019874229590862648, "loss": 0.6366, "step": 1092 }, { "epoch": 0.5101516919486581, "grad_norm": 0.79296875, "learning_rate": 0.00019873997495454724, "loss": 0.5875, "step": 1093 }, { "epoch": 0.5106184364060676, "grad_norm": 0.84765625, "learning_rate": 0.00019873765187448678, "loss": 0.6922, "step": 1094 }, { "epoch": 0.5110851808634772, "grad_norm": 0.77734375, "learning_rate": 0.00019873532666849514, "loss": 0.6177, "step": 1095 }, { "epoch": 0.5115519253208868, "grad_norm": 0.81640625, "learning_rate": 0.00019873299933662235, "loss": 0.6782, "step": 1096 }, { "epoch": 0.5120186697782964, "grad_norm": 0.75, "learning_rate": 0.00019873066987891855, "loss": 0.5991, "step": 1097 }, { "epoch": 0.5124854142357059, "grad_norm": 0.734375, "learning_rate": 0.00019872833829543386, "loss": 0.5489, "step": 1098 }, { "epoch": 0.5129521586931155, "grad_norm": 0.77734375, "learning_rate": 0.00019872600458621854, "loss": 0.5954, "step": 1099 }, { "epoch": 0.5134189031505251, "grad_norm": 0.71875, "learning_rate": 0.00019872366875132278, "loss": 0.6374, "step": 1100 }, { "epoch": 0.5138856476079346, "grad_norm": 0.66796875, "learning_rate": 0.00019872133079079692, "loss": 0.5936, "step": 1101 }, { "epoch": 0.5143523920653442, "grad_norm": 0.8515625, "learning_rate": 0.00019871899070469123, "loss": 0.6048, "step": 1102 }, { "epoch": 0.5148191365227538, "grad_norm": 0.8359375, "learning_rate": 0.00019871664849305617, "loss": 0.6304, "step": 1103 }, { "epoch": 0.5152858809801634, "grad_norm": 0.7421875, "learning_rate": 0.00019871430415594214, "loss": 0.6339, "step": 1104 }, { "epoch": 0.5157526254375729, "grad_norm": 0.81640625, "learning_rate": 0.00019871195769339962, "loss": 0.6001, "step": 1105 }, { "epoch": 0.5162193698949825, "grad_norm": 0.76953125, "learning_rate": 0.00019870960910547912, "loss": 0.5366, "step": 1106 }, { "epoch": 0.5166861143523921, "grad_norm": 0.8828125, "learning_rate": 0.0001987072583922312, "loss": 0.7068, "step": 1107 }, { "epoch": 0.5171528588098017, "grad_norm": 0.62890625, "learning_rate": 0.0001987049055537065, "loss": 0.5369, "step": 1108 }, { "epoch": 0.5176196032672112, "grad_norm": 0.7109375, "learning_rate": 0.00019870255058995572, "loss": 0.5677, "step": 1109 }, { "epoch": 0.5180863477246208, "grad_norm": 0.73828125, "learning_rate": 0.00019870019350102944, "loss": 0.5989, "step": 1110 }, { "epoch": 0.5185530921820304, "grad_norm": 0.765625, "learning_rate": 0.00019869783428697852, "loss": 0.5877, "step": 1111 }, { "epoch": 0.51901983663944, "grad_norm": 0.7578125, "learning_rate": 0.0001986954729478537, "loss": 0.5889, "step": 1112 }, { "epoch": 0.5194865810968494, "grad_norm": 0.7421875, "learning_rate": 0.00019869310948370585, "loss": 0.6184, "step": 1113 }, { "epoch": 0.519953325554259, "grad_norm": 0.68359375, "learning_rate": 0.00019869074389458586, "loss": 0.5686, "step": 1114 }, { "epoch": 0.5204200700116686, "grad_norm": 0.6796875, "learning_rate": 0.00019868837618054465, "loss": 0.5511, "step": 1115 }, { "epoch": 0.5208868144690781, "grad_norm": 0.8828125, "learning_rate": 0.0001986860063416332, "loss": 0.694, "step": 1116 }, { "epoch": 0.5213535589264877, "grad_norm": 0.828125, "learning_rate": 0.00019868363437790257, "loss": 0.5902, "step": 1117 }, { "epoch": 0.5218203033838973, "grad_norm": 0.9140625, "learning_rate": 0.00019868126028940375, "loss": 0.6503, "step": 1118 }, { "epoch": 0.5222870478413069, "grad_norm": 0.71875, "learning_rate": 0.00019867888407618792, "loss": 0.6597, "step": 1119 }, { "epoch": 0.5227537922987164, "grad_norm": 0.77734375, "learning_rate": 0.00019867650573830625, "loss": 0.6554, "step": 1120 }, { "epoch": 0.523220536756126, "grad_norm": 0.8828125, "learning_rate": 0.00019867412527580994, "loss": 0.65, "step": 1121 }, { "epoch": 0.5236872812135356, "grad_norm": 0.62109375, "learning_rate": 0.0001986717426887502, "loss": 0.5183, "step": 1122 }, { "epoch": 0.5241540256709452, "grad_norm": 0.67578125, "learning_rate": 0.0001986693579771784, "loss": 0.5503, "step": 1123 }, { "epoch": 0.5246207701283547, "grad_norm": 0.890625, "learning_rate": 0.00019866697114114578, "loss": 0.6149, "step": 1124 }, { "epoch": 0.5250875145857643, "grad_norm": 0.70703125, "learning_rate": 0.00019866458218070385, "loss": 0.6891, "step": 1125 }, { "epoch": 0.5255542590431739, "grad_norm": 0.73046875, "learning_rate": 0.00019866219109590397, "loss": 0.5487, "step": 1126 }, { "epoch": 0.5260210035005835, "grad_norm": 0.6875, "learning_rate": 0.00019865979788679768, "loss": 0.5028, "step": 1127 }, { "epoch": 0.526487747957993, "grad_norm": 0.82421875, "learning_rate": 0.00019865740255343645, "loss": 0.6483, "step": 1128 }, { "epoch": 0.5269544924154026, "grad_norm": 0.89453125, "learning_rate": 0.0001986550050958719, "loss": 0.5406, "step": 1129 }, { "epoch": 0.5274212368728122, "grad_norm": 0.84375, "learning_rate": 0.00019865260551415564, "loss": 0.6311, "step": 1130 }, { "epoch": 0.5278879813302217, "grad_norm": 0.7421875, "learning_rate": 0.00019865020380833927, "loss": 0.5721, "step": 1131 }, { "epoch": 0.5283547257876313, "grad_norm": 0.7734375, "learning_rate": 0.00019864779997847462, "loss": 0.5387, "step": 1132 }, { "epoch": 0.5288214702450409, "grad_norm": 0.8125, "learning_rate": 0.00019864539402461335, "loss": 0.6383, "step": 1133 }, { "epoch": 0.5292882147024504, "grad_norm": 0.890625, "learning_rate": 0.0001986429859468073, "loss": 0.6705, "step": 1134 }, { "epoch": 0.5297549591598599, "grad_norm": 0.796875, "learning_rate": 0.00019864057574510834, "loss": 0.5996, "step": 1135 }, { "epoch": 0.5302217036172695, "grad_norm": 0.83984375, "learning_rate": 0.0001986381634195683, "loss": 0.7158, "step": 1136 }, { "epoch": 0.5306884480746791, "grad_norm": 0.7109375, "learning_rate": 0.0001986357489702392, "loss": 0.584, "step": 1137 }, { "epoch": 0.5311551925320886, "grad_norm": 0.6640625, "learning_rate": 0.00019863333239717297, "loss": 0.6267, "step": 1138 }, { "epoch": 0.5316219369894982, "grad_norm": 0.734375, "learning_rate": 0.00019863091370042165, "loss": 0.631, "step": 1139 }, { "epoch": 0.5320886814469078, "grad_norm": 0.6875, "learning_rate": 0.00019862849288003736, "loss": 0.5929, "step": 1140 }, { "epoch": 0.5325554259043174, "grad_norm": 0.703125, "learning_rate": 0.00019862606993607213, "loss": 0.5792, "step": 1141 }, { "epoch": 0.5330221703617269, "grad_norm": 0.65625, "learning_rate": 0.00019862364486857823, "loss": 0.4699, "step": 1142 }, { "epoch": 0.5334889148191365, "grad_norm": 0.75, "learning_rate": 0.00019862121767760783, "loss": 0.6793, "step": 1143 }, { "epoch": 0.5339556592765461, "grad_norm": 0.8046875, "learning_rate": 0.00019861878836321316, "loss": 0.6824, "step": 1144 }, { "epoch": 0.5344224037339557, "grad_norm": 0.71484375, "learning_rate": 0.00019861635692544657, "loss": 0.5772, "step": 1145 }, { "epoch": 0.5348891481913652, "grad_norm": 0.7421875, "learning_rate": 0.0001986139233643604, "loss": 0.6505, "step": 1146 }, { "epoch": 0.5353558926487748, "grad_norm": 0.69921875, "learning_rate": 0.00019861148768000707, "loss": 0.5209, "step": 1147 }, { "epoch": 0.5358226371061844, "grad_norm": 0.75390625, "learning_rate": 0.00019860904987243898, "loss": 0.6217, "step": 1148 }, { "epoch": 0.536289381563594, "grad_norm": 0.70703125, "learning_rate": 0.00019860660994170862, "loss": 0.6072, "step": 1149 }, { "epoch": 0.5367561260210035, "grad_norm": 0.69921875, "learning_rate": 0.0001986041678878686, "loss": 0.6351, "step": 1150 }, { "epoch": 0.5372228704784131, "grad_norm": 0.609375, "learning_rate": 0.00019860172371097138, "loss": 0.4966, "step": 1151 }, { "epoch": 0.5376896149358227, "grad_norm": 0.76171875, "learning_rate": 0.0001985992774110697, "loss": 0.6, "step": 1152 }, { "epoch": 0.5381563593932323, "grad_norm": 0.69921875, "learning_rate": 0.00019859682898821612, "loss": 0.5285, "step": 1153 }, { "epoch": 0.5386231038506418, "grad_norm": 0.61328125, "learning_rate": 0.00019859437844246347, "loss": 0.565, "step": 1154 }, { "epoch": 0.5390898483080513, "grad_norm": 0.83984375, "learning_rate": 0.00019859192577386444, "loss": 0.6159, "step": 1155 }, { "epoch": 0.5395565927654609, "grad_norm": 0.7890625, "learning_rate": 0.00019858947098247185, "loss": 0.6855, "step": 1156 }, { "epoch": 0.5400233372228704, "grad_norm": 0.90234375, "learning_rate": 0.00019858701406833858, "loss": 0.6716, "step": 1157 }, { "epoch": 0.54049008168028, "grad_norm": 0.89453125, "learning_rate": 0.00019858455503151749, "loss": 0.6272, "step": 1158 }, { "epoch": 0.5409568261376896, "grad_norm": 0.8203125, "learning_rate": 0.00019858209387206154, "loss": 0.5411, "step": 1159 }, { "epoch": 0.5414235705950992, "grad_norm": 0.86328125, "learning_rate": 0.00019857963059002377, "loss": 0.5304, "step": 1160 }, { "epoch": 0.5418903150525087, "grad_norm": 0.94921875, "learning_rate": 0.00019857716518545716, "loss": 0.6087, "step": 1161 }, { "epoch": 0.5423570595099183, "grad_norm": 0.78515625, "learning_rate": 0.00019857469765841482, "loss": 0.6002, "step": 1162 }, { "epoch": 0.5428238039673279, "grad_norm": 0.7890625, "learning_rate": 0.00019857222800894982, "loss": 0.6597, "step": 1163 }, { "epoch": 0.5432905484247375, "grad_norm": 0.66015625, "learning_rate": 0.00019856975623711544, "loss": 0.5125, "step": 1164 }, { "epoch": 0.543757292882147, "grad_norm": 0.84765625, "learning_rate": 0.00019856728234296482, "loss": 0.6435, "step": 1165 }, { "epoch": 0.5442240373395566, "grad_norm": 0.6484375, "learning_rate": 0.00019856480632655127, "loss": 0.5924, "step": 1166 }, { "epoch": 0.5446907817969662, "grad_norm": 0.64453125, "learning_rate": 0.00019856232818792806, "loss": 0.6101, "step": 1167 }, { "epoch": 0.5451575262543757, "grad_norm": 0.72265625, "learning_rate": 0.00019855984792714856, "loss": 0.5965, "step": 1168 }, { "epoch": 0.5456242707117853, "grad_norm": 0.69140625, "learning_rate": 0.0001985573655442662, "loss": 0.5914, "step": 1169 }, { "epoch": 0.5460910151691949, "grad_norm": 0.74609375, "learning_rate": 0.0001985548810393344, "loss": 0.5135, "step": 1170 }, { "epoch": 0.5465577596266045, "grad_norm": 0.703125, "learning_rate": 0.00019855239441240666, "loss": 0.3963, "step": 1171 }, { "epoch": 0.547024504084014, "grad_norm": 0.85546875, "learning_rate": 0.00019854990566353653, "loss": 0.6302, "step": 1172 }, { "epoch": 0.5474912485414236, "grad_norm": 0.87109375, "learning_rate": 0.00019854741479277758, "loss": 0.6625, "step": 1173 }, { "epoch": 0.5479579929988332, "grad_norm": 0.7265625, "learning_rate": 0.00019854492180018345, "loss": 0.4819, "step": 1174 }, { "epoch": 0.5484247374562428, "grad_norm": 0.76953125, "learning_rate": 0.0001985424266858078, "loss": 0.6229, "step": 1175 }, { "epoch": 0.5488914819136522, "grad_norm": 0.78515625, "learning_rate": 0.0001985399294497044, "loss": 0.5842, "step": 1176 }, { "epoch": 0.5493582263710618, "grad_norm": 0.83984375, "learning_rate": 0.00019853743009192698, "loss": 0.6479, "step": 1177 }, { "epoch": 0.5498249708284714, "grad_norm": 0.703125, "learning_rate": 0.00019853492861252937, "loss": 0.4992, "step": 1178 }, { "epoch": 0.550291715285881, "grad_norm": 0.80859375, "learning_rate": 0.0001985324250115654, "loss": 0.586, "step": 1179 }, { "epoch": 0.5507584597432905, "grad_norm": 0.84765625, "learning_rate": 0.000198529919289089, "loss": 0.6077, "step": 1180 }, { "epoch": 0.5512252042007001, "grad_norm": 0.65234375, "learning_rate": 0.0001985274114451541, "loss": 0.5771, "step": 1181 }, { "epoch": 0.5516919486581097, "grad_norm": 0.76171875, "learning_rate": 0.00019852490147981473, "loss": 0.6446, "step": 1182 }, { "epoch": 0.5521586931155192, "grad_norm": 0.71875, "learning_rate": 0.00019852238939312492, "loss": 0.6201, "step": 1183 }, { "epoch": 0.5526254375729288, "grad_norm": 0.80078125, "learning_rate": 0.00019851987518513874, "loss": 0.5849, "step": 1184 }, { "epoch": 0.5530921820303384, "grad_norm": 0.71484375, "learning_rate": 0.00019851735885591034, "loss": 0.5069, "step": 1185 }, { "epoch": 0.553558926487748, "grad_norm": 0.74609375, "learning_rate": 0.0001985148404054939, "loss": 0.5743, "step": 1186 }, { "epoch": 0.5540256709451575, "grad_norm": 0.828125, "learning_rate": 0.00019851231983394367, "loss": 0.6359, "step": 1187 }, { "epoch": 0.5544924154025671, "grad_norm": 0.83984375, "learning_rate": 0.00019850979714131385, "loss": 0.6098, "step": 1188 }, { "epoch": 0.5549591598599767, "grad_norm": 0.7734375, "learning_rate": 0.00019850727232765883, "loss": 0.5488, "step": 1189 }, { "epoch": 0.5554259043173863, "grad_norm": 0.78125, "learning_rate": 0.00019850474539303295, "loss": 0.5868, "step": 1190 }, { "epoch": 0.5558926487747958, "grad_norm": 0.8046875, "learning_rate": 0.00019850221633749057, "loss": 0.5783, "step": 1191 }, { "epoch": 0.5563593932322054, "grad_norm": 0.80078125, "learning_rate": 0.00019849968516108622, "loss": 0.5875, "step": 1192 }, { "epoch": 0.556826137689615, "grad_norm": 0.72265625, "learning_rate": 0.00019849715186387435, "loss": 0.6392, "step": 1193 }, { "epoch": 0.5572928821470245, "grad_norm": 0.84765625, "learning_rate": 0.00019849461644590947, "loss": 0.6698, "step": 1194 }, { "epoch": 0.5577596266044341, "grad_norm": 0.69140625, "learning_rate": 0.00019849207890724628, "loss": 0.5326, "step": 1195 }, { "epoch": 0.5582263710618437, "grad_norm": 0.71484375, "learning_rate": 0.00019848953924793931, "loss": 0.5651, "step": 1196 }, { "epoch": 0.5586931155192532, "grad_norm": 0.8828125, "learning_rate": 0.0001984869974680433, "loss": 0.7025, "step": 1197 }, { "epoch": 0.5591598599766627, "grad_norm": 0.703125, "learning_rate": 0.00019848445356761296, "loss": 0.5442, "step": 1198 }, { "epoch": 0.5596266044340723, "grad_norm": 0.76171875, "learning_rate": 0.00019848190754670307, "loss": 0.617, "step": 1199 }, { "epoch": 0.5600933488914819, "grad_norm": 0.83203125, "learning_rate": 0.00019847935940536844, "loss": 0.6329, "step": 1200 }, { "epoch": 0.5605600933488915, "grad_norm": 0.6875, "learning_rate": 0.00019847680914366397, "loss": 0.5358, "step": 1201 }, { "epoch": 0.561026837806301, "grad_norm": 0.69921875, "learning_rate": 0.0001984742567616445, "loss": 0.6069, "step": 1202 }, { "epoch": 0.5614935822637106, "grad_norm": 0.671875, "learning_rate": 0.000198471702259365, "loss": 0.6207, "step": 1203 }, { "epoch": 0.5619603267211202, "grad_norm": 0.69140625, "learning_rate": 0.00019846914563688054, "loss": 0.5826, "step": 1204 }, { "epoch": 0.5624270711785297, "grad_norm": 0.69140625, "learning_rate": 0.0001984665868942461, "loss": 0.5606, "step": 1205 }, { "epoch": 0.5628938156359393, "grad_norm": 0.6953125, "learning_rate": 0.0001984640260315168, "loss": 0.537, "step": 1206 }, { "epoch": 0.5633605600933489, "grad_norm": 0.88671875, "learning_rate": 0.00019846146304874777, "loss": 0.6786, "step": 1207 }, { "epoch": 0.5638273045507585, "grad_norm": 0.73828125, "learning_rate": 0.00019845889794599416, "loss": 0.4961, "step": 1208 }, { "epoch": 0.564294049008168, "grad_norm": 0.8125, "learning_rate": 0.0001984563307233113, "loss": 0.5691, "step": 1209 }, { "epoch": 0.5647607934655776, "grad_norm": 0.7421875, "learning_rate": 0.00019845376138075436, "loss": 0.5507, "step": 1210 }, { "epoch": 0.5652275379229872, "grad_norm": 0.80078125, "learning_rate": 0.0001984511899183787, "loss": 0.5836, "step": 1211 }, { "epoch": 0.5656942823803968, "grad_norm": 0.828125, "learning_rate": 0.0001984486163362397, "loss": 0.64, "step": 1212 }, { "epoch": 0.5661610268378063, "grad_norm": 0.76171875, "learning_rate": 0.00019844604063439273, "loss": 0.6291, "step": 1213 }, { "epoch": 0.5666277712952159, "grad_norm": 0.78515625, "learning_rate": 0.0001984434628128933, "loss": 0.6231, "step": 1214 }, { "epoch": 0.5670945157526255, "grad_norm": 0.7890625, "learning_rate": 0.00019844088287179691, "loss": 0.4757, "step": 1215 }, { "epoch": 0.5675612602100351, "grad_norm": 0.78515625, "learning_rate": 0.00019843830081115908, "loss": 0.6567, "step": 1216 }, { "epoch": 0.5680280046674445, "grad_norm": 0.78125, "learning_rate": 0.00019843571663103542, "loss": 0.6016, "step": 1217 }, { "epoch": 0.5684947491248541, "grad_norm": 0.75, "learning_rate": 0.00019843313033148156, "loss": 0.4611, "step": 1218 }, { "epoch": 0.5689614935822637, "grad_norm": 0.8046875, "learning_rate": 0.00019843054191255317, "loss": 0.6219, "step": 1219 }, { "epoch": 0.5694282380396732, "grad_norm": 0.78125, "learning_rate": 0.000198427951374306, "loss": 0.5978, "step": 1220 }, { "epoch": 0.5698949824970828, "grad_norm": 0.70703125, "learning_rate": 0.00019842535871679586, "loss": 0.5723, "step": 1221 }, { "epoch": 0.5703617269544924, "grad_norm": 0.76171875, "learning_rate": 0.00019842276394007856, "loss": 0.5891, "step": 1222 }, { "epoch": 0.570828471411902, "grad_norm": 0.76171875, "learning_rate": 0.00019842016704420989, "loss": 0.5347, "step": 1223 }, { "epoch": 0.5712952158693115, "grad_norm": 0.72265625, "learning_rate": 0.00019841756802924584, "loss": 0.5659, "step": 1224 }, { "epoch": 0.5717619603267211, "grad_norm": 0.76953125, "learning_rate": 0.00019841496689524237, "loss": 0.552, "step": 1225 }, { "epoch": 0.5722287047841307, "grad_norm": 0.7578125, "learning_rate": 0.00019841236364225546, "loss": 0.5454, "step": 1226 }, { "epoch": 0.5726954492415403, "grad_norm": 0.87890625, "learning_rate": 0.00019840975827034121, "loss": 0.5268, "step": 1227 }, { "epoch": 0.5731621936989498, "grad_norm": 0.88671875, "learning_rate": 0.00019840715077955562, "loss": 0.6266, "step": 1228 }, { "epoch": 0.5736289381563594, "grad_norm": 0.8046875, "learning_rate": 0.00019840454116995494, "loss": 0.5017, "step": 1229 }, { "epoch": 0.574095682613769, "grad_norm": 0.76171875, "learning_rate": 0.00019840192944159526, "loss": 0.5586, "step": 1230 }, { "epoch": 0.5745624270711785, "grad_norm": 0.8125, "learning_rate": 0.00019839931559453286, "loss": 0.508, "step": 1231 }, { "epoch": 0.5750291715285881, "grad_norm": 0.75, "learning_rate": 0.00019839669962882405, "loss": 0.4471, "step": 1232 }, { "epoch": 0.5754959159859977, "grad_norm": 0.97265625, "learning_rate": 0.00019839408154452513, "loss": 0.6899, "step": 1233 }, { "epoch": 0.5759626604434073, "grad_norm": 0.90625, "learning_rate": 0.00019839146134169243, "loss": 0.553, "step": 1234 }, { "epoch": 0.5764294049008168, "grad_norm": 0.6953125, "learning_rate": 0.00019838883902038244, "loss": 0.4862, "step": 1235 }, { "epoch": 0.5768961493582264, "grad_norm": 0.70703125, "learning_rate": 0.00019838621458065155, "loss": 0.4894, "step": 1236 }, { "epoch": 0.577362893815636, "grad_norm": 0.78125, "learning_rate": 0.0001983835880225563, "loss": 0.4977, "step": 1237 }, { "epoch": 0.5778296382730455, "grad_norm": 0.7890625, "learning_rate": 0.00019838095934615323, "loss": 0.6053, "step": 1238 }, { "epoch": 0.578296382730455, "grad_norm": 0.859375, "learning_rate": 0.00019837832855149897, "loss": 0.6234, "step": 1239 }, { "epoch": 0.5787631271878646, "grad_norm": 0.87109375, "learning_rate": 0.00019837569563865018, "loss": 0.5609, "step": 1240 }, { "epoch": 0.5792298716452742, "grad_norm": 0.84375, "learning_rate": 0.00019837306060766345, "loss": 0.5618, "step": 1241 }, { "epoch": 0.5796966161026837, "grad_norm": 0.87890625, "learning_rate": 0.0001983704234585956, "loss": 0.6144, "step": 1242 }, { "epoch": 0.5801633605600933, "grad_norm": 0.6796875, "learning_rate": 0.0001983677841915034, "loss": 0.5141, "step": 1243 }, { "epoch": 0.5806301050175029, "grad_norm": 0.96484375, "learning_rate": 0.00019836514280644366, "loss": 0.7074, "step": 1244 }, { "epoch": 0.5810968494749125, "grad_norm": 0.6875, "learning_rate": 0.00019836249930347326, "loss": 0.568, "step": 1245 }, { "epoch": 0.581563593932322, "grad_norm": 0.68359375, "learning_rate": 0.00019835985368264912, "loss": 0.5369, "step": 1246 }, { "epoch": 0.5820303383897316, "grad_norm": 0.69921875, "learning_rate": 0.0001983572059440282, "loss": 0.5687, "step": 1247 }, { "epoch": 0.5824970828471412, "grad_norm": 0.78125, "learning_rate": 0.00019835455608766746, "loss": 0.5049, "step": 1248 }, { "epoch": 0.5829638273045508, "grad_norm": 0.71875, "learning_rate": 0.00019835190411362407, "loss": 0.504, "step": 1249 }, { "epoch": 0.5834305717619603, "grad_norm": 0.828125, "learning_rate": 0.000198349250021955, "loss": 0.5745, "step": 1250 }, { "epoch": 0.5838973162193699, "grad_norm": 0.8671875, "learning_rate": 0.0001983465938127175, "loss": 0.5861, "step": 1251 }, { "epoch": 0.5843640606767795, "grad_norm": 0.66796875, "learning_rate": 0.00019834393548596869, "loss": 0.4657, "step": 1252 }, { "epoch": 0.5848308051341891, "grad_norm": 0.95703125, "learning_rate": 0.00019834127504176583, "loss": 0.6865, "step": 1253 }, { "epoch": 0.5852975495915986, "grad_norm": 0.7890625, "learning_rate": 0.00019833861248016622, "loss": 0.6339, "step": 1254 }, { "epoch": 0.5857642940490082, "grad_norm": 0.7734375, "learning_rate": 0.00019833594780122718, "loss": 0.6107, "step": 1255 }, { "epoch": 0.5862310385064178, "grad_norm": 0.80859375, "learning_rate": 0.00019833328100500608, "loss": 0.5271, "step": 1256 }, { "epoch": 0.5866977829638274, "grad_norm": 0.8359375, "learning_rate": 0.00019833061209156033, "loss": 0.5909, "step": 1257 }, { "epoch": 0.5871645274212369, "grad_norm": 0.765625, "learning_rate": 0.00019832794106094742, "loss": 0.5531, "step": 1258 }, { "epoch": 0.5876312718786464, "grad_norm": 0.7109375, "learning_rate": 0.00019832526791322484, "loss": 0.5723, "step": 1259 }, { "epoch": 0.588098016336056, "grad_norm": 0.73828125, "learning_rate": 0.00019832259264845013, "loss": 0.6176, "step": 1260 }, { "epoch": 0.5885647607934655, "grad_norm": 0.83984375, "learning_rate": 0.0001983199152666809, "loss": 0.5575, "step": 1261 }, { "epoch": 0.5890315052508751, "grad_norm": 0.765625, "learning_rate": 0.00019831723576797482, "loss": 0.5004, "step": 1262 }, { "epoch": 0.5894982497082847, "grad_norm": 0.69140625, "learning_rate": 0.0001983145541523896, "loss": 0.4624, "step": 1263 }, { "epoch": 0.5899649941656943, "grad_norm": 0.94140625, "learning_rate": 0.0001983118704199829, "loss": 0.6366, "step": 1264 }, { "epoch": 0.5904317386231038, "grad_norm": 0.85546875, "learning_rate": 0.00019830918457081257, "loss": 0.5971, "step": 1265 }, { "epoch": 0.5908984830805134, "grad_norm": 0.84765625, "learning_rate": 0.00019830649660493643, "loss": 0.5568, "step": 1266 }, { "epoch": 0.591365227537923, "grad_norm": 0.8828125, "learning_rate": 0.00019830380652241232, "loss": 0.6277, "step": 1267 }, { "epoch": 0.5918319719953326, "grad_norm": 0.94140625, "learning_rate": 0.00019830111432329818, "loss": 0.5998, "step": 1268 }, { "epoch": 0.5922987164527421, "grad_norm": 0.71875, "learning_rate": 0.00019829842000765205, "loss": 0.4945, "step": 1269 }, { "epoch": 0.5927654609101517, "grad_norm": 0.84765625, "learning_rate": 0.0001982957235755318, "loss": 0.5901, "step": 1270 }, { "epoch": 0.5932322053675613, "grad_norm": 0.7734375, "learning_rate": 0.00019829302502699558, "loss": 0.5444, "step": 1271 }, { "epoch": 0.5936989498249708, "grad_norm": 0.7578125, "learning_rate": 0.0001982903243621015, "loss": 0.4735, "step": 1272 }, { "epoch": 0.5941656942823804, "grad_norm": 0.85546875, "learning_rate": 0.00019828762158090766, "loss": 0.6055, "step": 1273 }, { "epoch": 0.59463243873979, "grad_norm": 0.79296875, "learning_rate": 0.00019828491668347225, "loss": 0.5696, "step": 1274 }, { "epoch": 0.5950991831971996, "grad_norm": 0.7265625, "learning_rate": 0.00019828220966985357, "loss": 0.5917, "step": 1275 }, { "epoch": 0.5955659276546091, "grad_norm": 0.8046875, "learning_rate": 0.00019827950054010987, "loss": 0.5807, "step": 1276 }, { "epoch": 0.5960326721120187, "grad_norm": 0.69921875, "learning_rate": 0.00019827678929429946, "loss": 0.5153, "step": 1277 }, { "epoch": 0.5964994165694283, "grad_norm": 0.9296875, "learning_rate": 0.00019827407593248075, "loss": 0.5854, "step": 1278 }, { "epoch": 0.5969661610268379, "grad_norm": 0.93359375, "learning_rate": 0.00019827136045471214, "loss": 0.5567, "step": 1279 }, { "epoch": 0.5974329054842473, "grad_norm": 0.7265625, "learning_rate": 0.0001982686428610521, "loss": 0.424, "step": 1280 }, { "epoch": 0.5978996499416569, "grad_norm": 0.8515625, "learning_rate": 0.00019826592315155917, "loss": 0.5982, "step": 1281 }, { "epoch": 0.5983663943990665, "grad_norm": 0.84375, "learning_rate": 0.00019826320132629188, "loss": 0.4398, "step": 1282 }, { "epoch": 0.598833138856476, "grad_norm": 0.953125, "learning_rate": 0.00019826047738530882, "loss": 0.5948, "step": 1283 }, { "epoch": 0.5992998833138856, "grad_norm": 0.8203125, "learning_rate": 0.00019825775132866868, "loss": 0.5216, "step": 1284 }, { "epoch": 0.5997666277712952, "grad_norm": 0.93359375, "learning_rate": 0.00019825502315643013, "loss": 0.5817, "step": 1285 }, { "epoch": 0.6002333722287048, "grad_norm": 0.75390625, "learning_rate": 0.00019825229286865194, "loss": 0.5673, "step": 1286 }, { "epoch": 0.6007001166861143, "grad_norm": 0.921875, "learning_rate": 0.00019824956046539285, "loss": 0.5603, "step": 1287 }, { "epoch": 0.6011668611435239, "grad_norm": 0.7734375, "learning_rate": 0.0001982468259467117, "loss": 0.6177, "step": 1288 }, { "epoch": 0.6016336056009335, "grad_norm": 0.8359375, "learning_rate": 0.0001982440893126674, "loss": 0.5323, "step": 1289 }, { "epoch": 0.6021003500583431, "grad_norm": 0.859375, "learning_rate": 0.00019824135056331884, "loss": 0.6421, "step": 1290 }, { "epoch": 0.6025670945157526, "grad_norm": 0.78125, "learning_rate": 0.00019823860969872501, "loss": 0.5715, "step": 1291 }, { "epoch": 0.6030338389731622, "grad_norm": 0.59765625, "learning_rate": 0.0001982358667189449, "loss": 0.4738, "step": 1292 }, { "epoch": 0.6035005834305718, "grad_norm": 0.734375, "learning_rate": 0.00019823312162403763, "loss": 0.5069, "step": 1293 }, { "epoch": 0.6039673278879814, "grad_norm": 0.796875, "learning_rate": 0.0001982303744140622, "loss": 0.5627, "step": 1294 }, { "epoch": 0.6044340723453909, "grad_norm": 0.79296875, "learning_rate": 0.00019822762508907787, "loss": 0.6167, "step": 1295 }, { "epoch": 0.6049008168028005, "grad_norm": 0.7265625, "learning_rate": 0.00019822487364914374, "loss": 0.4521, "step": 1296 }, { "epoch": 0.6053675612602101, "grad_norm": 0.890625, "learning_rate": 0.00019822212009431913, "loss": 0.6713, "step": 1297 }, { "epoch": 0.6058343057176196, "grad_norm": 0.90625, "learning_rate": 0.0001982193644246633, "loss": 0.5946, "step": 1298 }, { "epoch": 0.6063010501750292, "grad_norm": 0.80859375, "learning_rate": 0.00019821660664023555, "loss": 0.5451, "step": 1299 }, { "epoch": 0.6067677946324388, "grad_norm": 0.98828125, "learning_rate": 0.0001982138467410953, "loss": 0.5154, "step": 1300 }, { "epoch": 0.6072345390898483, "grad_norm": 0.796875, "learning_rate": 0.00019821108472730197, "loss": 0.5641, "step": 1301 }, { "epoch": 0.6077012835472578, "grad_norm": 0.875, "learning_rate": 0.00019820832059891502, "loss": 0.5768, "step": 1302 }, { "epoch": 0.6081680280046674, "grad_norm": 0.8046875, "learning_rate": 0.00019820555435599393, "loss": 0.5741, "step": 1303 }, { "epoch": 0.608634772462077, "grad_norm": 0.6484375, "learning_rate": 0.00019820278599859832, "loss": 0.5361, "step": 1304 }, { "epoch": 0.6091015169194866, "grad_norm": 0.79296875, "learning_rate": 0.00019820001552678777, "loss": 0.6414, "step": 1305 }, { "epoch": 0.6095682613768961, "grad_norm": 0.68359375, "learning_rate": 0.00019819724294062194, "loss": 0.5521, "step": 1306 }, { "epoch": 0.6100350058343057, "grad_norm": 0.875, "learning_rate": 0.0001981944682401605, "loss": 0.5643, "step": 1307 }, { "epoch": 0.6105017502917153, "grad_norm": 0.6328125, "learning_rate": 0.0001981916914254632, "loss": 0.4732, "step": 1308 }, { "epoch": 0.6109684947491248, "grad_norm": 0.8359375, "learning_rate": 0.00019818891249658987, "loss": 0.5662, "step": 1309 }, { "epoch": 0.6114352392065344, "grad_norm": 0.8046875, "learning_rate": 0.0001981861314536003, "loss": 0.524, "step": 1310 }, { "epoch": 0.611901983663944, "grad_norm": 0.8515625, "learning_rate": 0.00019818334829655435, "loss": 0.5412, "step": 1311 }, { "epoch": 0.6123687281213536, "grad_norm": 0.90234375, "learning_rate": 0.000198180563025512, "loss": 0.6389, "step": 1312 }, { "epoch": 0.6128354725787631, "grad_norm": 0.8046875, "learning_rate": 0.0001981777756405332, "loss": 0.4883, "step": 1313 }, { "epoch": 0.6133022170361727, "grad_norm": 0.86328125, "learning_rate": 0.00019817498614167796, "loss": 0.4852, "step": 1314 }, { "epoch": 0.6137689614935823, "grad_norm": 0.85546875, "learning_rate": 0.00019817219452900636, "loss": 0.5479, "step": 1315 }, { "epoch": 0.6142357059509919, "grad_norm": 0.9140625, "learning_rate": 0.00019816940080257843, "loss": 0.5989, "step": 1316 }, { "epoch": 0.6147024504084014, "grad_norm": 0.71875, "learning_rate": 0.00019816660496245443, "loss": 0.5139, "step": 1317 }, { "epoch": 0.615169194865811, "grad_norm": 0.8515625, "learning_rate": 0.0001981638070086945, "loss": 0.4746, "step": 1318 }, { "epoch": 0.6156359393232206, "grad_norm": 0.91015625, "learning_rate": 0.00019816100694135887, "loss": 0.5103, "step": 1319 }, { "epoch": 0.6161026837806302, "grad_norm": 0.8203125, "learning_rate": 0.00019815820476050785, "loss": 0.4566, "step": 1320 }, { "epoch": 0.6165694282380396, "grad_norm": 1.0, "learning_rate": 0.0001981554004662018, "loss": 0.5635, "step": 1321 }, { "epoch": 0.6170361726954492, "grad_norm": 1.0390625, "learning_rate": 0.00019815259405850104, "loss": 0.5734, "step": 1322 }, { "epoch": 0.6175029171528588, "grad_norm": 0.921875, "learning_rate": 0.00019814978553746605, "loss": 0.6637, "step": 1323 }, { "epoch": 0.6179696616102683, "grad_norm": 0.81640625, "learning_rate": 0.0001981469749031573, "loss": 0.5198, "step": 1324 }, { "epoch": 0.6184364060676779, "grad_norm": 0.7265625, "learning_rate": 0.00019814416215563525, "loss": 0.5453, "step": 1325 }, { "epoch": 0.6189031505250875, "grad_norm": 0.640625, "learning_rate": 0.0001981413472949605, "loss": 0.4322, "step": 1326 }, { "epoch": 0.6193698949824971, "grad_norm": 0.8046875, "learning_rate": 0.00019813853032119366, "loss": 0.5662, "step": 1327 }, { "epoch": 0.6198366394399066, "grad_norm": 0.875, "learning_rate": 0.00019813571123439535, "loss": 0.5567, "step": 1328 }, { "epoch": 0.6203033838973162, "grad_norm": 0.78125, "learning_rate": 0.00019813289003462633, "loss": 0.527, "step": 1329 }, { "epoch": 0.6207701283547258, "grad_norm": 0.77734375, "learning_rate": 0.00019813006672194728, "loss": 0.5942, "step": 1330 }, { "epoch": 0.6212368728121354, "grad_norm": 0.859375, "learning_rate": 0.00019812724129641903, "loss": 0.6001, "step": 1331 }, { "epoch": 0.6217036172695449, "grad_norm": 0.6796875, "learning_rate": 0.0001981244137581024, "loss": 0.5326, "step": 1332 }, { "epoch": 0.6221703617269545, "grad_norm": 0.859375, "learning_rate": 0.0001981215841070583, "loss": 0.6015, "step": 1333 }, { "epoch": 0.6226371061843641, "grad_norm": 0.796875, "learning_rate": 0.00019811875234334756, "loss": 0.4905, "step": 1334 }, { "epoch": 0.6231038506417736, "grad_norm": 0.79296875, "learning_rate": 0.00019811591846703128, "loss": 0.5234, "step": 1335 }, { "epoch": 0.6235705950991832, "grad_norm": 0.8515625, "learning_rate": 0.00019811308247817038, "loss": 0.5209, "step": 1336 }, { "epoch": 0.6240373395565928, "grad_norm": 0.75, "learning_rate": 0.00019811024437682594, "loss": 0.5024, "step": 1337 }, { "epoch": 0.6245040840140024, "grad_norm": 0.921875, "learning_rate": 0.00019810740416305913, "loss": 0.549, "step": 1338 }, { "epoch": 0.6249708284714119, "grad_norm": 0.9921875, "learning_rate": 0.00019810456183693104, "loss": 0.6548, "step": 1339 }, { "epoch": 0.6254375729288215, "grad_norm": 0.875, "learning_rate": 0.00019810171739850286, "loss": 0.4852, "step": 1340 }, { "epoch": 0.6259043173862311, "grad_norm": 0.953125, "learning_rate": 0.0001980988708478359, "loss": 0.4537, "step": 1341 }, { "epoch": 0.6263710618436406, "grad_norm": 0.85546875, "learning_rate": 0.00019809602218499137, "loss": 0.6128, "step": 1342 }, { "epoch": 0.6268378063010501, "grad_norm": 0.9296875, "learning_rate": 0.00019809317141003066, "loss": 0.5842, "step": 1343 }, { "epoch": 0.6273045507584597, "grad_norm": 0.80859375, "learning_rate": 0.00019809031852301514, "loss": 0.5787, "step": 1344 }, { "epoch": 0.6277712952158693, "grad_norm": 0.79296875, "learning_rate": 0.00019808746352400624, "loss": 0.5334, "step": 1345 }, { "epoch": 0.6282380396732788, "grad_norm": 0.765625, "learning_rate": 0.0001980846064130654, "loss": 0.4776, "step": 1346 }, { "epoch": 0.6287047841306884, "grad_norm": 0.71875, "learning_rate": 0.00019808174719025413, "loss": 0.3992, "step": 1347 }, { "epoch": 0.629171528588098, "grad_norm": 0.78125, "learning_rate": 0.00019807888585563404, "loss": 0.4611, "step": 1348 }, { "epoch": 0.6296382730455076, "grad_norm": 0.7578125, "learning_rate": 0.00019807602240926674, "loss": 0.4907, "step": 1349 }, { "epoch": 0.6301050175029171, "grad_norm": 0.85546875, "learning_rate": 0.00019807315685121385, "loss": 0.5409, "step": 1350 }, { "epoch": 0.6305717619603267, "grad_norm": 0.79296875, "learning_rate": 0.00019807028918153707, "loss": 0.4067, "step": 1351 }, { "epoch": 0.6310385064177363, "grad_norm": 0.8203125, "learning_rate": 0.00019806741940029817, "loss": 0.4746, "step": 1352 }, { "epoch": 0.6315052508751459, "grad_norm": 0.94140625, "learning_rate": 0.0001980645475075589, "loss": 0.5216, "step": 1353 }, { "epoch": 0.6319719953325554, "grad_norm": 0.9140625, "learning_rate": 0.00019806167350338113, "loss": 0.5447, "step": 1354 }, { "epoch": 0.632438739789965, "grad_norm": 0.7421875, "learning_rate": 0.00019805879738782673, "loss": 0.4504, "step": 1355 }, { "epoch": 0.6329054842473746, "grad_norm": 1.0, "learning_rate": 0.00019805591916095762, "loss": 0.6486, "step": 1356 }, { "epoch": 0.6333722287047842, "grad_norm": 0.8359375, "learning_rate": 0.00019805303882283577, "loss": 0.5157, "step": 1357 }, { "epoch": 0.6338389731621937, "grad_norm": 0.92578125, "learning_rate": 0.00019805015637352322, "loss": 0.5648, "step": 1358 }, { "epoch": 0.6343057176196033, "grad_norm": 0.9140625, "learning_rate": 0.000198047271813082, "loss": 0.5111, "step": 1359 }, { "epoch": 0.6347724620770129, "grad_norm": 0.9765625, "learning_rate": 0.0001980443851415742, "loss": 0.5553, "step": 1360 }, { "epoch": 0.6352392065344225, "grad_norm": 0.84375, "learning_rate": 0.00019804149635906207, "loss": 0.5729, "step": 1361 }, { "epoch": 0.635705950991832, "grad_norm": 0.73046875, "learning_rate": 0.0001980386054656077, "loss": 0.5166, "step": 1362 }, { "epoch": 0.6361726954492415, "grad_norm": 0.7890625, "learning_rate": 0.0001980357124612734, "loss": 0.4623, "step": 1363 }, { "epoch": 0.6366394399066511, "grad_norm": 0.890625, "learning_rate": 0.00019803281734612145, "loss": 0.5095, "step": 1364 }, { "epoch": 0.6371061843640606, "grad_norm": 0.78515625, "learning_rate": 0.00019802992012021414, "loss": 0.4926, "step": 1365 }, { "epoch": 0.6375729288214702, "grad_norm": 0.83984375, "learning_rate": 0.0001980270207836139, "loss": 0.5714, "step": 1366 }, { "epoch": 0.6380396732788798, "grad_norm": 0.81640625, "learning_rate": 0.00019802411933638313, "loss": 0.4658, "step": 1367 }, { "epoch": 0.6385064177362894, "grad_norm": 0.859375, "learning_rate": 0.00019802121577858434, "loss": 0.5335, "step": 1368 }, { "epoch": 0.6389731621936989, "grad_norm": 0.796875, "learning_rate": 0.00019801831011027998, "loss": 0.5037, "step": 1369 }, { "epoch": 0.6394399066511085, "grad_norm": 0.85546875, "learning_rate": 0.00019801540233153268, "loss": 0.5258, "step": 1370 }, { "epoch": 0.6399066511085181, "grad_norm": 0.875, "learning_rate": 0.000198012492442405, "loss": 0.5598, "step": 1371 }, { "epoch": 0.6403733955659276, "grad_norm": 0.8046875, "learning_rate": 0.00019800958044295964, "loss": 0.551, "step": 1372 }, { "epoch": 0.6408401400233372, "grad_norm": 1.0390625, "learning_rate": 0.00019800666633325924, "loss": 0.6314, "step": 1373 }, { "epoch": 0.6413068844807468, "grad_norm": 0.82421875, "learning_rate": 0.00019800375011336656, "loss": 0.4747, "step": 1374 }, { "epoch": 0.6417736289381564, "grad_norm": 0.74609375, "learning_rate": 0.00019800083178334442, "loss": 0.4619, "step": 1375 }, { "epoch": 0.6422403733955659, "grad_norm": 0.76953125, "learning_rate": 0.00019799791134325566, "loss": 0.495, "step": 1376 }, { "epoch": 0.6427071178529755, "grad_norm": 0.8984375, "learning_rate": 0.0001979949887931631, "loss": 0.5978, "step": 1377 }, { "epoch": 0.6431738623103851, "grad_norm": 1.0078125, "learning_rate": 0.00019799206413312974, "loss": 0.6492, "step": 1378 }, { "epoch": 0.6436406067677947, "grad_norm": 0.7578125, "learning_rate": 0.0001979891373632185, "loss": 0.4621, "step": 1379 }, { "epoch": 0.6441073512252042, "grad_norm": 0.828125, "learning_rate": 0.0001979862084834924, "loss": 0.5321, "step": 1380 }, { "epoch": 0.6445740956826138, "grad_norm": 0.92578125, "learning_rate": 0.00019798327749401452, "loss": 0.5326, "step": 1381 }, { "epoch": 0.6450408401400234, "grad_norm": 0.98046875, "learning_rate": 0.00019798034439484798, "loss": 0.5767, "step": 1382 }, { "epoch": 0.645507584597433, "grad_norm": 0.6640625, "learning_rate": 0.00019797740918605587, "loss": 0.4531, "step": 1383 }, { "epoch": 0.6459743290548424, "grad_norm": 0.890625, "learning_rate": 0.00019797447186770147, "loss": 0.4831, "step": 1384 }, { "epoch": 0.646441073512252, "grad_norm": 0.78515625, "learning_rate": 0.00019797153243984798, "loss": 0.5255, "step": 1385 }, { "epoch": 0.6469078179696616, "grad_norm": 0.7734375, "learning_rate": 0.0001979685909025587, "loss": 0.4433, "step": 1386 }, { "epoch": 0.6473745624270711, "grad_norm": 0.8984375, "learning_rate": 0.00019796564725589697, "loss": 0.5535, "step": 1387 }, { "epoch": 0.6478413068844807, "grad_norm": 0.765625, "learning_rate": 0.00019796270149992614, "loss": 0.4754, "step": 1388 }, { "epoch": 0.6483080513418903, "grad_norm": 0.8125, "learning_rate": 0.00019795975363470964, "loss": 0.4592, "step": 1389 }, { "epoch": 0.6487747957992999, "grad_norm": 0.92578125, "learning_rate": 0.00019795680366031096, "loss": 0.5216, "step": 1390 }, { "epoch": 0.6492415402567094, "grad_norm": 0.8671875, "learning_rate": 0.00019795385157679365, "loss": 0.5227, "step": 1391 }, { "epoch": 0.649708284714119, "grad_norm": 0.79296875, "learning_rate": 0.0001979508973842212, "loss": 0.4431, "step": 1392 }, { "epoch": 0.6501750291715286, "grad_norm": 0.72265625, "learning_rate": 0.00019794794108265727, "loss": 0.5014, "step": 1393 }, { "epoch": 0.6506417736289382, "grad_norm": 1.09375, "learning_rate": 0.00019794498267216548, "loss": 0.6202, "step": 1394 }, { "epoch": 0.6511085180863477, "grad_norm": 0.83984375, "learning_rate": 0.00019794202215280956, "loss": 0.5228, "step": 1395 }, { "epoch": 0.6515752625437573, "grad_norm": 0.76953125, "learning_rate": 0.00019793905952465322, "loss": 0.5027, "step": 1396 }, { "epoch": 0.6520420070011669, "grad_norm": 0.91015625, "learning_rate": 0.00019793609478776025, "loss": 0.5725, "step": 1397 }, { "epoch": 0.6525087514585765, "grad_norm": 0.9609375, "learning_rate": 0.0001979331279421945, "loss": 0.4994, "step": 1398 }, { "epoch": 0.652975495915986, "grad_norm": 0.828125, "learning_rate": 0.00019793015898801985, "loss": 0.425, "step": 1399 }, { "epoch": 0.6534422403733956, "grad_norm": 0.828125, "learning_rate": 0.00019792718792530024, "loss": 0.5431, "step": 1400 }, { "epoch": 0.6539089848308052, "grad_norm": 0.84765625, "learning_rate": 0.0001979242147540996, "loss": 0.5034, "step": 1401 }, { "epoch": 0.6543757292882147, "grad_norm": 0.97265625, "learning_rate": 0.000197921239474482, "loss": 0.5028, "step": 1402 }, { "epoch": 0.6548424737456243, "grad_norm": 0.953125, "learning_rate": 0.00019791826208651143, "loss": 0.5547, "step": 1403 }, { "epoch": 0.6553092182030339, "grad_norm": 0.9296875, "learning_rate": 0.00019791528259025207, "loss": 0.6541, "step": 1404 }, { "epoch": 0.6557759626604434, "grad_norm": 0.859375, "learning_rate": 0.000197912300985768, "loss": 0.5839, "step": 1405 }, { "epoch": 0.6562427071178529, "grad_norm": 0.8046875, "learning_rate": 0.00019790931727312348, "loss": 0.5855, "step": 1406 }, { "epoch": 0.6567094515752625, "grad_norm": 0.75, "learning_rate": 0.0001979063314523827, "loss": 0.5087, "step": 1407 }, { "epoch": 0.6571761960326721, "grad_norm": 0.87109375, "learning_rate": 0.00019790334352361, "loss": 0.4872, "step": 1408 }, { "epoch": 0.6576429404900817, "grad_norm": 0.76953125, "learning_rate": 0.0001979003534868697, "loss": 0.478, "step": 1409 }, { "epoch": 0.6581096849474912, "grad_norm": 0.8671875, "learning_rate": 0.00019789736134222615, "loss": 0.5371, "step": 1410 }, { "epoch": 0.6585764294049008, "grad_norm": 0.89453125, "learning_rate": 0.00019789436708974377, "loss": 0.523, "step": 1411 }, { "epoch": 0.6590431738623104, "grad_norm": 0.99609375, "learning_rate": 0.0001978913707294871, "loss": 0.6405, "step": 1412 }, { "epoch": 0.65950991831972, "grad_norm": 0.859375, "learning_rate": 0.00019788837226152056, "loss": 0.5186, "step": 1413 }, { "epoch": 0.6599766627771295, "grad_norm": 0.8046875, "learning_rate": 0.0001978853716859088, "loss": 0.5361, "step": 1414 }, { "epoch": 0.6604434072345391, "grad_norm": 0.73046875, "learning_rate": 0.00019788236900271635, "loss": 0.4894, "step": 1415 }, { "epoch": 0.6609101516919487, "grad_norm": 0.76953125, "learning_rate": 0.0001978793642120079, "loss": 0.5222, "step": 1416 }, { "epoch": 0.6613768961493582, "grad_norm": 0.7109375, "learning_rate": 0.00019787635731384815, "loss": 0.4927, "step": 1417 }, { "epoch": 0.6618436406067678, "grad_norm": 0.69921875, "learning_rate": 0.0001978733483083018, "loss": 0.4338, "step": 1418 }, { "epoch": 0.6623103850641774, "grad_norm": 0.7421875, "learning_rate": 0.0001978703371954337, "loss": 0.4584, "step": 1419 }, { "epoch": 0.662777129521587, "grad_norm": 0.96875, "learning_rate": 0.00019786732397530864, "loss": 0.5202, "step": 1420 }, { "epoch": 0.6632438739789965, "grad_norm": 0.76953125, "learning_rate": 0.0001978643086479915, "loss": 0.5412, "step": 1421 }, { "epoch": 0.6637106184364061, "grad_norm": 0.7890625, "learning_rate": 0.00019786129121354722, "loss": 0.5155, "step": 1422 }, { "epoch": 0.6641773628938157, "grad_norm": 0.9296875, "learning_rate": 0.00019785827167204077, "loss": 0.551, "step": 1423 }, { "epoch": 0.6646441073512253, "grad_norm": 0.953125, "learning_rate": 0.00019785525002353715, "loss": 0.5832, "step": 1424 }, { "epoch": 0.6651108518086347, "grad_norm": 1.0390625, "learning_rate": 0.00019785222626810144, "loss": 0.4761, "step": 1425 }, { "epoch": 0.6655775962660443, "grad_norm": 0.95703125, "learning_rate": 0.0001978492004057987, "loss": 0.4961, "step": 1426 }, { "epoch": 0.6660443407234539, "grad_norm": 0.89453125, "learning_rate": 0.0001978461724366941, "loss": 0.4185, "step": 1427 }, { "epoch": 0.6665110851808634, "grad_norm": 0.8671875, "learning_rate": 0.00019784314236085288, "loss": 0.4806, "step": 1428 }, { "epoch": 0.666977829638273, "grad_norm": 0.84375, "learning_rate": 0.00019784011017834024, "loss": 0.4095, "step": 1429 }, { "epoch": 0.6674445740956826, "grad_norm": 1.046875, "learning_rate": 0.00019783707588922144, "loss": 0.4009, "step": 1430 }, { "epoch": 0.6679113185530922, "grad_norm": 1.140625, "learning_rate": 0.00019783403949356188, "loss": 0.5247, "step": 1431 }, { "epoch": 0.6683780630105017, "grad_norm": 0.91796875, "learning_rate": 0.0001978310009914269, "loss": 0.5265, "step": 1432 }, { "epoch": 0.6688448074679113, "grad_norm": 0.80859375, "learning_rate": 0.00019782796038288192, "loss": 0.4718, "step": 1433 }, { "epoch": 0.6693115519253209, "grad_norm": 0.796875, "learning_rate": 0.0001978249176679924, "loss": 0.5344, "step": 1434 }, { "epoch": 0.6697782963827305, "grad_norm": 0.89453125, "learning_rate": 0.0001978218728468239, "loss": 0.4821, "step": 1435 }, { "epoch": 0.67024504084014, "grad_norm": 0.7734375, "learning_rate": 0.00019781882591944188, "loss": 0.3894, "step": 1436 }, { "epoch": 0.6707117852975496, "grad_norm": 0.75390625, "learning_rate": 0.00019781577688591205, "loss": 0.4959, "step": 1437 }, { "epoch": 0.6711785297549592, "grad_norm": 0.91796875, "learning_rate": 0.00019781272574629998, "loss": 0.6066, "step": 1438 }, { "epoch": 0.6716452742123687, "grad_norm": 0.91015625, "learning_rate": 0.0001978096725006714, "loss": 0.4791, "step": 1439 }, { "epoch": 0.6721120186697783, "grad_norm": 0.79296875, "learning_rate": 0.0001978066171490921, "loss": 0.4337, "step": 1440 }, { "epoch": 0.6725787631271879, "grad_norm": 0.82421875, "learning_rate": 0.0001978035596916278, "loss": 0.4816, "step": 1441 }, { "epoch": 0.6730455075845975, "grad_norm": 0.99609375, "learning_rate": 0.00019780050012834432, "loss": 0.444, "step": 1442 }, { "epoch": 0.673512252042007, "grad_norm": 1.0390625, "learning_rate": 0.0001977974384593076, "loss": 0.5353, "step": 1443 }, { "epoch": 0.6739789964994166, "grad_norm": 0.73828125, "learning_rate": 0.00019779437468458346, "loss": 0.4419, "step": 1444 }, { "epoch": 0.6744457409568262, "grad_norm": 0.79296875, "learning_rate": 0.00019779130880423796, "loss": 0.5248, "step": 1445 }, { "epoch": 0.6749124854142357, "grad_norm": 0.8671875, "learning_rate": 0.0001977882408183371, "loss": 0.4598, "step": 1446 }, { "epoch": 0.6753792298716452, "grad_norm": 0.89453125, "learning_rate": 0.00019778517072694687, "loss": 0.5538, "step": 1447 }, { "epoch": 0.6758459743290548, "grad_norm": 0.8125, "learning_rate": 0.00019778209853013349, "loss": 0.5071, "step": 1448 }, { "epoch": 0.6763127187864644, "grad_norm": 0.91015625, "learning_rate": 0.00019777902422796296, "loss": 0.5546, "step": 1449 }, { "epoch": 0.676779463243874, "grad_norm": 0.84375, "learning_rate": 0.0001977759478205016, "loss": 0.4922, "step": 1450 }, { "epoch": 0.6772462077012835, "grad_norm": 0.7890625, "learning_rate": 0.00019777286930781557, "loss": 0.5289, "step": 1451 }, { "epoch": 0.6777129521586931, "grad_norm": 0.859375, "learning_rate": 0.0001977697886899712, "loss": 0.4832, "step": 1452 }, { "epoch": 0.6781796966161027, "grad_norm": 0.77734375, "learning_rate": 0.00019776670596703482, "loss": 0.4692, "step": 1453 }, { "epoch": 0.6786464410735122, "grad_norm": 0.7421875, "learning_rate": 0.00019776362113907276, "loss": 0.5184, "step": 1454 }, { "epoch": 0.6791131855309218, "grad_norm": 0.83984375, "learning_rate": 0.0001977605342061515, "loss": 0.5701, "step": 1455 }, { "epoch": 0.6795799299883314, "grad_norm": 0.8828125, "learning_rate": 0.0001977574451683374, "loss": 0.553, "step": 1456 }, { "epoch": 0.680046674445741, "grad_norm": 0.74609375, "learning_rate": 0.0001977543540256971, "loss": 0.4372, "step": 1457 }, { "epoch": 0.6805134189031505, "grad_norm": 1.015625, "learning_rate": 0.0001977512607782971, "loss": 0.451, "step": 1458 }, { "epoch": 0.6809801633605601, "grad_norm": 0.77734375, "learning_rate": 0.000197748165426204, "loss": 0.5201, "step": 1459 }, { "epoch": 0.6814469078179697, "grad_norm": 0.84375, "learning_rate": 0.0001977450679694844, "loss": 0.541, "step": 1460 }, { "epoch": 0.6819136522753793, "grad_norm": 0.7265625, "learning_rate": 0.00019774196840820506, "loss": 0.5176, "step": 1461 }, { "epoch": 0.6823803967327888, "grad_norm": 0.91796875, "learning_rate": 0.0001977388667424327, "loss": 0.591, "step": 1462 }, { "epoch": 0.6828471411901984, "grad_norm": 0.875, "learning_rate": 0.0001977357629722341, "loss": 0.5705, "step": 1463 }, { "epoch": 0.683313885647608, "grad_norm": 0.94921875, "learning_rate": 0.00019773265709767607, "loss": 0.405, "step": 1464 }, { "epoch": 0.6837806301050176, "grad_norm": 0.78125, "learning_rate": 0.0001977295491188255, "loss": 0.4849, "step": 1465 }, { "epoch": 0.6842473745624271, "grad_norm": 0.76953125, "learning_rate": 0.0001977264390357493, "loss": 0.5683, "step": 1466 }, { "epoch": 0.6847141190198366, "grad_norm": 0.734375, "learning_rate": 0.00019772332684851444, "loss": 0.4424, "step": 1467 }, { "epoch": 0.6851808634772462, "grad_norm": 0.88671875, "learning_rate": 0.00019772021255718797, "loss": 0.5637, "step": 1468 }, { "epoch": 0.6856476079346557, "grad_norm": 1.0546875, "learning_rate": 0.00019771709616183685, "loss": 0.6238, "step": 1469 }, { "epoch": 0.6861143523920653, "grad_norm": 0.91796875, "learning_rate": 0.00019771397766252827, "loss": 0.5114, "step": 1470 }, { "epoch": 0.6865810968494749, "grad_norm": 0.90625, "learning_rate": 0.0001977108570593293, "loss": 0.5068, "step": 1471 }, { "epoch": 0.6870478413068845, "grad_norm": 0.90625, "learning_rate": 0.00019770773435230716, "loss": 0.5077, "step": 1472 }, { "epoch": 0.687514585764294, "grad_norm": 0.90234375, "learning_rate": 0.00019770460954152908, "loss": 0.596, "step": 1473 }, { "epoch": 0.6879813302217036, "grad_norm": 0.7421875, "learning_rate": 0.00019770148262706242, "loss": 0.4074, "step": 1474 }, { "epoch": 0.6884480746791132, "grad_norm": 0.9453125, "learning_rate": 0.0001976983536089744, "loss": 0.4277, "step": 1475 }, { "epoch": 0.6889148191365227, "grad_norm": 0.890625, "learning_rate": 0.00019769522248733241, "loss": 0.4967, "step": 1476 }, { "epoch": 0.6893815635939323, "grad_norm": 0.79296875, "learning_rate": 0.0001976920892622039, "loss": 0.4541, "step": 1477 }, { "epoch": 0.6898483080513419, "grad_norm": 0.9375, "learning_rate": 0.0001976889539336563, "loss": 0.4746, "step": 1478 }, { "epoch": 0.6903150525087515, "grad_norm": 0.94140625, "learning_rate": 0.00019768581650175717, "loss": 0.5254, "step": 1479 }, { "epoch": 0.690781796966161, "grad_norm": 0.9375, "learning_rate": 0.00019768267696657402, "loss": 0.4079, "step": 1480 }, { "epoch": 0.6912485414235706, "grad_norm": 0.94140625, "learning_rate": 0.00019767953532817442, "loss": 0.4263, "step": 1481 }, { "epoch": 0.6917152858809802, "grad_norm": 0.9140625, "learning_rate": 0.00019767639158662606, "loss": 0.4549, "step": 1482 }, { "epoch": 0.6921820303383898, "grad_norm": 0.8828125, "learning_rate": 0.00019767324574199665, "loss": 0.4927, "step": 1483 }, { "epoch": 0.6926487747957993, "grad_norm": 1.0625, "learning_rate": 0.00019767009779435383, "loss": 0.5501, "step": 1484 }, { "epoch": 0.6931155192532089, "grad_norm": 1.0390625, "learning_rate": 0.0001976669477437655, "loss": 0.5278, "step": 1485 }, { "epoch": 0.6935822637106185, "grad_norm": 0.796875, "learning_rate": 0.0001976637955902994, "loss": 0.5078, "step": 1486 }, { "epoch": 0.6940490081680281, "grad_norm": 0.88671875, "learning_rate": 0.0001976606413340234, "loss": 0.5094, "step": 1487 }, { "epoch": 0.6945157526254375, "grad_norm": 0.90625, "learning_rate": 0.00019765748497500545, "loss": 0.5271, "step": 1488 }, { "epoch": 0.6949824970828471, "grad_norm": 0.75390625, "learning_rate": 0.0001976543265133135, "loss": 0.4738, "step": 1489 }, { "epoch": 0.6954492415402567, "grad_norm": 0.9453125, "learning_rate": 0.00019765116594901554, "loss": 0.5253, "step": 1490 }, { "epoch": 0.6959159859976662, "grad_norm": 0.83984375, "learning_rate": 0.00019764800328217962, "loss": 0.5159, "step": 1491 }, { "epoch": 0.6963827304550758, "grad_norm": 0.82421875, "learning_rate": 0.00019764483851287387, "loss": 0.5285, "step": 1492 }, { "epoch": 0.6968494749124854, "grad_norm": 0.7421875, "learning_rate": 0.0001976416716411664, "loss": 0.4709, "step": 1493 }, { "epoch": 0.697316219369895, "grad_norm": 0.7109375, "learning_rate": 0.00019763850266712543, "loss": 0.3935, "step": 1494 }, { "epoch": 0.6977829638273045, "grad_norm": 0.73828125, "learning_rate": 0.00019763533159081913, "loss": 0.5108, "step": 1495 }, { "epoch": 0.6982497082847141, "grad_norm": 0.87890625, "learning_rate": 0.0001976321584123158, "loss": 0.4132, "step": 1496 }, { "epoch": 0.6987164527421237, "grad_norm": 0.83984375, "learning_rate": 0.0001976289831316838, "loss": 0.5321, "step": 1497 }, { "epoch": 0.6991831971995333, "grad_norm": 0.90234375, "learning_rate": 0.00019762580574899147, "loss": 0.5543, "step": 1498 }, { "epoch": 0.6996499416569428, "grad_norm": 0.89453125, "learning_rate": 0.00019762262626430723, "loss": 0.4831, "step": 1499 }, { "epoch": 0.7001166861143524, "grad_norm": 1.0234375, "learning_rate": 0.00019761944467769956, "loss": 0.4984, "step": 1500 }, { "epoch": 0.700583430571762, "grad_norm": 0.765625, "learning_rate": 0.00019761626098923685, "loss": 0.4496, "step": 1501 }, { "epoch": 0.7010501750291716, "grad_norm": 1.0078125, "learning_rate": 0.00019761307519898782, "loss": 0.541, "step": 1502 }, { "epoch": 0.7015169194865811, "grad_norm": 0.73046875, "learning_rate": 0.00019760988730702095, "loss": 0.4288, "step": 1503 }, { "epoch": 0.7019836639439907, "grad_norm": 0.69921875, "learning_rate": 0.0001976066973134049, "loss": 0.4098, "step": 1504 }, { "epoch": 0.7024504084014003, "grad_norm": 1.0234375, "learning_rate": 0.00019760350521820836, "loss": 0.4283, "step": 1505 }, { "epoch": 0.7029171528588098, "grad_norm": 0.87109375, "learning_rate": 0.00019760031102150006, "loss": 0.4578, "step": 1506 }, { "epoch": 0.7033838973162194, "grad_norm": 0.9609375, "learning_rate": 0.00019759711472334875, "loss": 0.602, "step": 1507 }, { "epoch": 0.703850641773629, "grad_norm": 0.7890625, "learning_rate": 0.0001975939163238233, "loss": 0.4655, "step": 1508 }, { "epoch": 0.7043173862310385, "grad_norm": 0.9140625, "learning_rate": 0.00019759071582299255, "loss": 0.5103, "step": 1509 }, { "epoch": 0.704784130688448, "grad_norm": 0.92578125, "learning_rate": 0.0001975875132209254, "loss": 0.572, "step": 1510 }, { "epoch": 0.7052508751458576, "grad_norm": 0.92578125, "learning_rate": 0.00019758430851769086, "loss": 0.5164, "step": 1511 }, { "epoch": 0.7057176196032672, "grad_norm": 0.890625, "learning_rate": 0.00019758110171335785, "loss": 0.435, "step": 1512 }, { "epoch": 0.7061843640606768, "grad_norm": 0.8203125, "learning_rate": 0.00019757789280799544, "loss": 0.3969, "step": 1513 }, { "epoch": 0.7066511085180863, "grad_norm": 0.6328125, "learning_rate": 0.00019757468180167274, "loss": 0.3589, "step": 1514 }, { "epoch": 0.7071178529754959, "grad_norm": 0.8671875, "learning_rate": 0.00019757146869445888, "loss": 0.5392, "step": 1515 }, { "epoch": 0.7075845974329055, "grad_norm": 0.73828125, "learning_rate": 0.00019756825348642306, "loss": 0.3913, "step": 1516 }, { "epoch": 0.708051341890315, "grad_norm": 1.0, "learning_rate": 0.00019756503617763448, "loss": 0.4911, "step": 1517 }, { "epoch": 0.7085180863477246, "grad_norm": 0.9296875, "learning_rate": 0.00019756181676816242, "loss": 0.5328, "step": 1518 }, { "epoch": 0.7089848308051342, "grad_norm": 0.83984375, "learning_rate": 0.00019755859525807617, "loss": 0.5472, "step": 1519 }, { "epoch": 0.7094515752625438, "grad_norm": 0.98046875, "learning_rate": 0.00019755537164744516, "loss": 0.5794, "step": 1520 }, { "epoch": 0.7099183197199533, "grad_norm": 0.91015625, "learning_rate": 0.00019755214593633873, "loss": 0.547, "step": 1521 }, { "epoch": 0.7103850641773629, "grad_norm": 0.8984375, "learning_rate": 0.00019754891812482636, "loss": 0.4875, "step": 1522 }, { "epoch": 0.7108518086347725, "grad_norm": 0.953125, "learning_rate": 0.00019754568821297758, "loss": 0.5612, "step": 1523 }, { "epoch": 0.7113185530921821, "grad_norm": 0.84765625, "learning_rate": 0.0001975424562008619, "loss": 0.4908, "step": 1524 }, { "epoch": 0.7117852975495916, "grad_norm": 0.75390625, "learning_rate": 0.00019753922208854888, "loss": 0.4195, "step": 1525 }, { "epoch": 0.7122520420070012, "grad_norm": 0.8203125, "learning_rate": 0.00019753598587610822, "loss": 0.493, "step": 1526 }, { "epoch": 0.7127187864644108, "grad_norm": 0.85546875, "learning_rate": 0.00019753274756360956, "loss": 0.4796, "step": 1527 }, { "epoch": 0.7131855309218204, "grad_norm": 1.1171875, "learning_rate": 0.0001975295071511226, "loss": 0.5781, "step": 1528 }, { "epoch": 0.7136522753792298, "grad_norm": 0.765625, "learning_rate": 0.00019752626463871717, "loss": 0.3916, "step": 1529 }, { "epoch": 0.7141190198366394, "grad_norm": 0.84375, "learning_rate": 0.00019752302002646306, "loss": 0.557, "step": 1530 }, { "epoch": 0.714585764294049, "grad_norm": 0.87890625, "learning_rate": 0.00019751977331443013, "loss": 0.4787, "step": 1531 }, { "epoch": 0.7150525087514585, "grad_norm": 0.8671875, "learning_rate": 0.00019751652450268825, "loss": 0.4198, "step": 1532 }, { "epoch": 0.7155192532088681, "grad_norm": 0.97265625, "learning_rate": 0.00019751327359130742, "loss": 0.478, "step": 1533 }, { "epoch": 0.7159859976662777, "grad_norm": 0.8828125, "learning_rate": 0.00019751002058035765, "loss": 0.5439, "step": 1534 }, { "epoch": 0.7164527421236873, "grad_norm": 0.74609375, "learning_rate": 0.00019750676546990888, "loss": 0.4628, "step": 1535 }, { "epoch": 0.7169194865810968, "grad_norm": 1.0546875, "learning_rate": 0.0001975035082600313, "loss": 0.5295, "step": 1536 }, { "epoch": 0.7173862310385064, "grad_norm": 0.953125, "learning_rate": 0.00019750024895079499, "loss": 0.4579, "step": 1537 }, { "epoch": 0.717852975495916, "grad_norm": 1.03125, "learning_rate": 0.00019749698754227018, "loss": 0.4844, "step": 1538 }, { "epoch": 0.7183197199533256, "grad_norm": 0.77734375, "learning_rate": 0.00019749372403452702, "loss": 0.4228, "step": 1539 }, { "epoch": 0.7187864644107351, "grad_norm": 1.046875, "learning_rate": 0.0001974904584276358, "loss": 0.5105, "step": 1540 }, { "epoch": 0.7192532088681447, "grad_norm": 0.80859375, "learning_rate": 0.00019748719072166685, "loss": 0.5078, "step": 1541 }, { "epoch": 0.7197199533255543, "grad_norm": 0.8125, "learning_rate": 0.00019748392091669054, "loss": 0.4045, "step": 1542 }, { "epoch": 0.7201866977829638, "grad_norm": 0.8203125, "learning_rate": 0.00019748064901277724, "loss": 0.3985, "step": 1543 }, { "epoch": 0.7206534422403734, "grad_norm": 0.828125, "learning_rate": 0.00019747737500999741, "loss": 0.4223, "step": 1544 }, { "epoch": 0.721120186697783, "grad_norm": 0.75, "learning_rate": 0.00019747409890842155, "loss": 0.3918, "step": 1545 }, { "epoch": 0.7215869311551926, "grad_norm": 0.70703125, "learning_rate": 0.00019747082070812016, "loss": 0.4111, "step": 1546 }, { "epoch": 0.7220536756126021, "grad_norm": 0.95703125, "learning_rate": 0.0001974675404091639, "loss": 0.4737, "step": 1547 }, { "epoch": 0.7225204200700117, "grad_norm": 1.2734375, "learning_rate": 0.00019746425801162333, "loss": 0.474, "step": 1548 }, { "epoch": 0.7229871645274213, "grad_norm": 1.015625, "learning_rate": 0.00019746097351556915, "loss": 0.5133, "step": 1549 }, { "epoch": 0.7234539089848308, "grad_norm": 0.90234375, "learning_rate": 0.00019745768692107208, "loss": 0.4541, "step": 1550 }, { "epoch": 0.7239206534422403, "grad_norm": 0.88671875, "learning_rate": 0.0001974543982282029, "loss": 0.477, "step": 1551 }, { "epoch": 0.7243873978996499, "grad_norm": 1.0546875, "learning_rate": 0.00019745110743703235, "loss": 0.5925, "step": 1552 }, { "epoch": 0.7248541423570595, "grad_norm": 0.81640625, "learning_rate": 0.00019744781454763134, "loss": 0.4749, "step": 1553 }, { "epoch": 0.725320886814469, "grad_norm": 0.71875, "learning_rate": 0.0001974445195600708, "loss": 0.4109, "step": 1554 }, { "epoch": 0.7257876312718786, "grad_norm": 0.8125, "learning_rate": 0.0001974412224744216, "loss": 0.4807, "step": 1555 }, { "epoch": 0.7262543757292882, "grad_norm": 0.87890625, "learning_rate": 0.0001974379232907548, "loss": 0.525, "step": 1556 }, { "epoch": 0.7267211201866978, "grad_norm": 0.89453125, "learning_rate": 0.00019743462200914137, "loss": 0.463, "step": 1557 }, { "epoch": 0.7271878646441073, "grad_norm": 0.7265625, "learning_rate": 0.00019743131862965245, "loss": 0.3741, "step": 1558 }, { "epoch": 0.7276546091015169, "grad_norm": 0.92578125, "learning_rate": 0.0001974280131523591, "loss": 0.492, "step": 1559 }, { "epoch": 0.7281213535589265, "grad_norm": 0.89453125, "learning_rate": 0.00019742470557733256, "loss": 0.5732, "step": 1560 }, { "epoch": 0.7285880980163361, "grad_norm": 0.8984375, "learning_rate": 0.00019742139590464404, "loss": 0.3877, "step": 1561 }, { "epoch": 0.7290548424737456, "grad_norm": 0.77734375, "learning_rate": 0.00019741808413436474, "loss": 0.3923, "step": 1562 }, { "epoch": 0.7295215869311552, "grad_norm": 0.921875, "learning_rate": 0.00019741477026656598, "loss": 0.4943, "step": 1563 }, { "epoch": 0.7299883313885648, "grad_norm": 0.79296875, "learning_rate": 0.00019741145430131915, "loss": 0.4176, "step": 1564 }, { "epoch": 0.7304550758459744, "grad_norm": 0.89453125, "learning_rate": 0.00019740813623869566, "loss": 0.4785, "step": 1565 }, { "epoch": 0.7309218203033839, "grad_norm": 0.9453125, "learning_rate": 0.00019740481607876688, "loss": 0.4781, "step": 1566 }, { "epoch": 0.7313885647607935, "grad_norm": 0.98828125, "learning_rate": 0.00019740149382160437, "loss": 0.4586, "step": 1567 }, { "epoch": 0.7318553092182031, "grad_norm": 0.94921875, "learning_rate": 0.00019739816946727962, "loss": 0.4585, "step": 1568 }, { "epoch": 0.7323220536756126, "grad_norm": 0.89453125, "learning_rate": 0.0001973948430158642, "loss": 0.4897, "step": 1569 }, { "epoch": 0.7327887981330222, "grad_norm": 0.72265625, "learning_rate": 0.00019739151446742975, "loss": 0.3999, "step": 1570 }, { "epoch": 0.7332555425904317, "grad_norm": 0.86328125, "learning_rate": 0.00019738818382204794, "loss": 0.4348, "step": 1571 }, { "epoch": 0.7337222870478413, "grad_norm": 1.0, "learning_rate": 0.00019738485107979048, "loss": 0.4347, "step": 1572 }, { "epoch": 0.7341890315052508, "grad_norm": 0.9453125, "learning_rate": 0.00019738151624072915, "loss": 0.4766, "step": 1573 }, { "epoch": 0.7346557759626604, "grad_norm": 0.8125, "learning_rate": 0.0001973781793049357, "loss": 0.4679, "step": 1574 }, { "epoch": 0.73512252042007, "grad_norm": 0.9296875, "learning_rate": 0.00019737484027248202, "loss": 0.5101, "step": 1575 }, { "epoch": 0.7355892648774796, "grad_norm": 0.8828125, "learning_rate": 0.00019737149914343996, "loss": 0.4748, "step": 1576 }, { "epoch": 0.7360560093348891, "grad_norm": 0.86328125, "learning_rate": 0.00019736815591788154, "loss": 0.4756, "step": 1577 }, { "epoch": 0.7365227537922987, "grad_norm": 1.0546875, "learning_rate": 0.00019736481059587864, "loss": 0.5661, "step": 1578 }, { "epoch": 0.7369894982497083, "grad_norm": 0.99609375, "learning_rate": 0.00019736146317750336, "loss": 0.4576, "step": 1579 }, { "epoch": 0.7374562427071178, "grad_norm": 0.80859375, "learning_rate": 0.00019735811366282778, "loss": 0.4464, "step": 1580 }, { "epoch": 0.7379229871645274, "grad_norm": 0.92578125, "learning_rate": 0.00019735476205192396, "loss": 0.5366, "step": 1581 }, { "epoch": 0.738389731621937, "grad_norm": 0.92578125, "learning_rate": 0.0001973514083448641, "loss": 0.4377, "step": 1582 }, { "epoch": 0.7388564760793466, "grad_norm": 1.0625, "learning_rate": 0.0001973480525417204, "loss": 0.4715, "step": 1583 }, { "epoch": 0.7393232205367561, "grad_norm": 0.8515625, "learning_rate": 0.00019734469464256513, "loss": 0.5448, "step": 1584 }, { "epoch": 0.7397899649941657, "grad_norm": 1.1328125, "learning_rate": 0.0001973413346474706, "loss": 0.4515, "step": 1585 }, { "epoch": 0.7402567094515753, "grad_norm": 0.91015625, "learning_rate": 0.00019733797255650908, "loss": 0.3848, "step": 1586 }, { "epoch": 0.7407234539089849, "grad_norm": 0.96875, "learning_rate": 0.00019733460836975306, "loss": 0.4414, "step": 1587 }, { "epoch": 0.7411901983663944, "grad_norm": 0.95703125, "learning_rate": 0.00019733124208727492, "loss": 0.4362, "step": 1588 }, { "epoch": 0.741656942823804, "grad_norm": 0.83984375, "learning_rate": 0.00019732787370914712, "loss": 0.4298, "step": 1589 }, { "epoch": 0.7421236872812136, "grad_norm": 0.875, "learning_rate": 0.00019732450323544222, "loss": 0.3198, "step": 1590 }, { "epoch": 0.7425904317386232, "grad_norm": 0.95703125, "learning_rate": 0.0001973211306662328, "loss": 0.4627, "step": 1591 }, { "epoch": 0.7430571761960326, "grad_norm": 0.8203125, "learning_rate": 0.00019731775600159143, "loss": 0.4042, "step": 1592 }, { "epoch": 0.7435239206534422, "grad_norm": 1.015625, "learning_rate": 0.00019731437924159085, "loss": 0.4888, "step": 1593 }, { "epoch": 0.7439906651108518, "grad_norm": 0.953125, "learning_rate": 0.00019731100038630366, "loss": 0.4372, "step": 1594 }, { "epoch": 0.7444574095682613, "grad_norm": 1.09375, "learning_rate": 0.0001973076194358027, "loss": 0.5551, "step": 1595 }, { "epoch": 0.7449241540256709, "grad_norm": 0.8203125, "learning_rate": 0.00019730423639016068, "loss": 0.5257, "step": 1596 }, { "epoch": 0.7453908984830805, "grad_norm": 0.91796875, "learning_rate": 0.0001973008512494505, "loss": 0.5248, "step": 1597 }, { "epoch": 0.7458576429404901, "grad_norm": 0.81640625, "learning_rate": 0.0001972974640137451, "loss": 0.456, "step": 1598 }, { "epoch": 0.7463243873978996, "grad_norm": 0.87109375, "learning_rate": 0.0001972940746831173, "loss": 0.4842, "step": 1599 }, { "epoch": 0.7467911318553092, "grad_norm": 0.75390625, "learning_rate": 0.0001972906832576401, "loss": 0.4286, "step": 1600 }, { "epoch": 0.7472578763127188, "grad_norm": 0.87109375, "learning_rate": 0.00019728728973738657, "loss": 0.4016, "step": 1601 }, { "epoch": 0.7477246207701284, "grad_norm": 0.82421875, "learning_rate": 0.00019728389412242977, "loss": 0.5307, "step": 1602 }, { "epoch": 0.7481913652275379, "grad_norm": 0.91796875, "learning_rate": 0.00019728049641284275, "loss": 0.5228, "step": 1603 }, { "epoch": 0.7486581096849475, "grad_norm": 0.7109375, "learning_rate": 0.0001972770966086987, "loss": 0.3824, "step": 1604 }, { "epoch": 0.7491248541423571, "grad_norm": 0.85546875, "learning_rate": 0.0001972736947100709, "loss": 0.452, "step": 1605 }, { "epoch": 0.7495915985997667, "grad_norm": 0.80859375, "learning_rate": 0.00019727029071703247, "loss": 0.4048, "step": 1606 }, { "epoch": 0.7500583430571762, "grad_norm": 0.97265625, "learning_rate": 0.00019726688462965678, "loss": 0.5633, "step": 1607 }, { "epoch": 0.7505250875145858, "grad_norm": 0.796875, "learning_rate": 0.0001972634764480171, "loss": 0.4046, "step": 1608 }, { "epoch": 0.7505250875145858, "eval_loss": 1.0971609354019165, "eval_runtime": 94.2121, "eval_samples_per_second": 19.148, "eval_steps_per_second": 2.399, "step": 1608 }, { "epoch": 0.7509918319719954, "grad_norm": 0.8828125, "learning_rate": 0.0001972600661721869, "loss": 0.4298, "step": 1609 }, { "epoch": 0.751458576429405, "grad_norm": 0.87109375, "learning_rate": 0.00019725665380223957, "loss": 0.3477, "step": 1610 }, { "epoch": 0.7519253208868145, "grad_norm": 0.890625, "learning_rate": 0.00019725323933824856, "loss": 0.3404, "step": 1611 }, { "epoch": 0.7523920653442241, "grad_norm": 0.9375, "learning_rate": 0.00019724982278028738, "loss": 0.4663, "step": 1612 }, { "epoch": 0.7528588098016336, "grad_norm": 1.1328125, "learning_rate": 0.00019724640412842966, "loss": 0.53, "step": 1613 }, { "epoch": 0.7533255542590431, "grad_norm": 0.93359375, "learning_rate": 0.0001972429833827489, "loss": 0.3965, "step": 1614 }, { "epoch": 0.7537922987164527, "grad_norm": 1.0390625, "learning_rate": 0.00019723956054331886, "loss": 0.4167, "step": 1615 }, { "epoch": 0.7542590431738623, "grad_norm": 1.0546875, "learning_rate": 0.00019723613561021318, "loss": 0.5089, "step": 1616 }, { "epoch": 0.7547257876312718, "grad_norm": 0.88671875, "learning_rate": 0.00019723270858350563, "loss": 0.3663, "step": 1617 }, { "epoch": 0.7551925320886814, "grad_norm": 0.8359375, "learning_rate": 0.00019722927946326995, "loss": 0.4208, "step": 1618 }, { "epoch": 0.755659276546091, "grad_norm": 0.93359375, "learning_rate": 0.00019722584824958007, "loss": 0.473, "step": 1619 }, { "epoch": 0.7561260210035006, "grad_norm": 0.98828125, "learning_rate": 0.00019722241494250976, "loss": 0.3862, "step": 1620 }, { "epoch": 0.7565927654609101, "grad_norm": 0.96875, "learning_rate": 0.000197218979542133, "loss": 0.4701, "step": 1621 }, { "epoch": 0.7570595099183197, "grad_norm": 0.8203125, "learning_rate": 0.0001972155420485237, "loss": 0.4606, "step": 1622 }, { "epoch": 0.7575262543757293, "grad_norm": 0.8828125, "learning_rate": 0.00019721210246175595, "loss": 0.4542, "step": 1623 }, { "epoch": 0.7579929988331389, "grad_norm": 0.91796875, "learning_rate": 0.0001972086607819038, "loss": 0.5826, "step": 1624 }, { "epoch": 0.7584597432905484, "grad_norm": 0.81640625, "learning_rate": 0.0001972052170090413, "loss": 0.4587, "step": 1625 }, { "epoch": 0.758926487747958, "grad_norm": 0.77734375, "learning_rate": 0.00019720177114324262, "loss": 0.3716, "step": 1626 }, { "epoch": 0.7593932322053676, "grad_norm": 0.8984375, "learning_rate": 0.00019719832318458198, "loss": 0.4412, "step": 1627 }, { "epoch": 0.7598599766627772, "grad_norm": 0.94140625, "learning_rate": 0.0001971948731331336, "loss": 0.526, "step": 1628 }, { "epoch": 0.7603267211201867, "grad_norm": 0.8828125, "learning_rate": 0.00019719142098897173, "loss": 0.4982, "step": 1629 }, { "epoch": 0.7607934655775963, "grad_norm": 0.9453125, "learning_rate": 0.00019718796675217073, "loss": 0.489, "step": 1630 }, { "epoch": 0.7612602100350059, "grad_norm": 0.80859375, "learning_rate": 0.000197184510422805, "loss": 0.4201, "step": 1631 }, { "epoch": 0.7617269544924155, "grad_norm": 0.84765625, "learning_rate": 0.00019718105200094895, "loss": 0.3797, "step": 1632 }, { "epoch": 0.7621936989498249, "grad_norm": 0.98046875, "learning_rate": 0.00019717759148667698, "loss": 0.4649, "step": 1633 }, { "epoch": 0.7626604434072345, "grad_norm": 1.0078125, "learning_rate": 0.00019717412888006365, "loss": 0.5103, "step": 1634 }, { "epoch": 0.7631271878646441, "grad_norm": 0.9375, "learning_rate": 0.00019717066418118356, "loss": 0.4454, "step": 1635 }, { "epoch": 0.7635939323220536, "grad_norm": 0.9765625, "learning_rate": 0.0001971671973901112, "loss": 0.3624, "step": 1636 }, { "epoch": 0.7640606767794632, "grad_norm": 0.8671875, "learning_rate": 0.0001971637285069213, "loss": 0.379, "step": 1637 }, { "epoch": 0.7645274212368728, "grad_norm": 0.859375, "learning_rate": 0.0001971602575316885, "loss": 0.3831, "step": 1638 }, { "epoch": 0.7649941656942824, "grad_norm": 0.90234375, "learning_rate": 0.0001971567844644876, "loss": 0.4298, "step": 1639 }, { "epoch": 0.7654609101516919, "grad_norm": 0.96875, "learning_rate": 0.0001971533093053933, "loss": 0.4778, "step": 1640 }, { "epoch": 0.7659276546091015, "grad_norm": 0.9140625, "learning_rate": 0.00019714983205448044, "loss": 0.4568, "step": 1641 }, { "epoch": 0.7663943990665111, "grad_norm": 0.9296875, "learning_rate": 0.00019714635271182393, "loss": 0.5514, "step": 1642 }, { "epoch": 0.7668611435239207, "grad_norm": 0.85546875, "learning_rate": 0.00019714287127749867, "loss": 0.4077, "step": 1643 }, { "epoch": 0.7673278879813302, "grad_norm": 0.7734375, "learning_rate": 0.0001971393877515796, "loss": 0.4071, "step": 1644 }, { "epoch": 0.7677946324387398, "grad_norm": 0.81640625, "learning_rate": 0.00019713590213414172, "loss": 0.4482, "step": 1645 }, { "epoch": 0.7682613768961494, "grad_norm": 0.79296875, "learning_rate": 0.00019713241442526012, "loss": 0.4094, "step": 1646 }, { "epoch": 0.768728121353559, "grad_norm": 0.921875, "learning_rate": 0.00019712892462500983, "loss": 0.4571, "step": 1647 }, { "epoch": 0.7691948658109685, "grad_norm": 0.94140625, "learning_rate": 0.00019712543273346604, "loss": 0.419, "step": 1648 }, { "epoch": 0.7696616102683781, "grad_norm": 0.828125, "learning_rate": 0.00019712193875070396, "loss": 0.4194, "step": 1649 }, { "epoch": 0.7701283547257877, "grad_norm": 0.953125, "learning_rate": 0.00019711844267679873, "loss": 0.4515, "step": 1650 }, { "epoch": 0.7705950991831972, "grad_norm": 0.796875, "learning_rate": 0.0001971149445118257, "loss": 0.4023, "step": 1651 }, { "epoch": 0.7710618436406068, "grad_norm": 1.03125, "learning_rate": 0.00019711144425586016, "loss": 0.5049, "step": 1652 }, { "epoch": 0.7715285880980164, "grad_norm": 0.7734375, "learning_rate": 0.00019710794190897746, "loss": 0.3869, "step": 1653 }, { "epoch": 0.7719953325554259, "grad_norm": 0.90234375, "learning_rate": 0.00019710443747125305, "loss": 0.4698, "step": 1654 }, { "epoch": 0.7724620770128354, "grad_norm": 0.88671875, "learning_rate": 0.00019710093094276235, "loss": 0.3737, "step": 1655 }, { "epoch": 0.772928821470245, "grad_norm": 0.8671875, "learning_rate": 0.00019709742232358086, "loss": 0.4001, "step": 1656 }, { "epoch": 0.7733955659276546, "grad_norm": 0.89453125, "learning_rate": 0.00019709391161378415, "loss": 0.4949, "step": 1657 }, { "epoch": 0.7738623103850641, "grad_norm": 0.8046875, "learning_rate": 0.00019709039881344781, "loss": 0.3657, "step": 1658 }, { "epoch": 0.7743290548424737, "grad_norm": 1.0625, "learning_rate": 0.00019708688392264744, "loss": 0.4652, "step": 1659 }, { "epoch": 0.7747957992998833, "grad_norm": 0.8984375, "learning_rate": 0.00019708336694145874, "loss": 0.4408, "step": 1660 }, { "epoch": 0.7752625437572929, "grad_norm": 0.88671875, "learning_rate": 0.00019707984786995745, "loss": 0.3676, "step": 1661 }, { "epoch": 0.7757292882147024, "grad_norm": 0.84765625, "learning_rate": 0.0001970763267082193, "loss": 0.3739, "step": 1662 }, { "epoch": 0.776196032672112, "grad_norm": 0.99609375, "learning_rate": 0.00019707280345632015, "loss": 0.4358, "step": 1663 }, { "epoch": 0.7766627771295216, "grad_norm": 0.87890625, "learning_rate": 0.0001970692781143358, "loss": 0.3696, "step": 1664 }, { "epoch": 0.7771295215869312, "grad_norm": 0.8359375, "learning_rate": 0.00019706575068234225, "loss": 0.3856, "step": 1665 }, { "epoch": 0.7775962660443407, "grad_norm": 0.96484375, "learning_rate": 0.00019706222116041536, "loss": 0.4888, "step": 1666 }, { "epoch": 0.7780630105017503, "grad_norm": 0.9453125, "learning_rate": 0.00019705868954863115, "loss": 0.4437, "step": 1667 }, { "epoch": 0.7785297549591599, "grad_norm": 0.875, "learning_rate": 0.00019705515584706568, "loss": 0.4813, "step": 1668 }, { "epoch": 0.7789964994165695, "grad_norm": 1.015625, "learning_rate": 0.000197051620055795, "loss": 0.4835, "step": 1669 }, { "epoch": 0.779463243873979, "grad_norm": 0.98046875, "learning_rate": 0.00019704808217489527, "loss": 0.4474, "step": 1670 }, { "epoch": 0.7799299883313886, "grad_norm": 0.90234375, "learning_rate": 0.00019704454220444267, "loss": 0.4925, "step": 1671 }, { "epoch": 0.7803967327887982, "grad_norm": 0.80078125, "learning_rate": 0.00019704100014451337, "loss": 0.3988, "step": 1672 }, { "epoch": 0.7808634772462077, "grad_norm": 0.72265625, "learning_rate": 0.0001970374559951837, "loss": 0.3584, "step": 1673 }, { "epoch": 0.7813302217036173, "grad_norm": 0.91015625, "learning_rate": 0.00019703390975652993, "loss": 0.4052, "step": 1674 }, { "epoch": 0.7817969661610268, "grad_norm": 0.90234375, "learning_rate": 0.0001970303614286284, "loss": 0.4827, "step": 1675 }, { "epoch": 0.7822637106184364, "grad_norm": 0.890625, "learning_rate": 0.00019702681101155557, "loss": 0.4047, "step": 1676 }, { "epoch": 0.7827304550758459, "grad_norm": 0.9609375, "learning_rate": 0.0001970232585053878, "loss": 0.4237, "step": 1677 }, { "epoch": 0.7831971995332555, "grad_norm": 1.1328125, "learning_rate": 0.00019701970391020166, "loss": 0.4886, "step": 1678 }, { "epoch": 0.7836639439906651, "grad_norm": 0.96484375, "learning_rate": 0.00019701614722607364, "loss": 0.4002, "step": 1679 }, { "epoch": 0.7841306884480747, "grad_norm": 0.9140625, "learning_rate": 0.00019701258845308036, "loss": 0.3882, "step": 1680 }, { "epoch": 0.7845974329054842, "grad_norm": 0.859375, "learning_rate": 0.00019700902759129836, "loss": 0.4207, "step": 1681 }, { "epoch": 0.7850641773628938, "grad_norm": 0.8984375, "learning_rate": 0.0001970054646408044, "loss": 0.3959, "step": 1682 }, { "epoch": 0.7855309218203034, "grad_norm": 0.875, "learning_rate": 0.00019700189960167514, "loss": 0.4273, "step": 1683 }, { "epoch": 0.785997666277713, "grad_norm": 0.8828125, "learning_rate": 0.00019699833247398735, "loss": 0.3282, "step": 1684 }, { "epoch": 0.7864644107351225, "grad_norm": 1.015625, "learning_rate": 0.00019699476325781785, "loss": 0.4213, "step": 1685 }, { "epoch": 0.7869311551925321, "grad_norm": 0.79296875, "learning_rate": 0.00019699119195324346, "loss": 0.4118, "step": 1686 }, { "epoch": 0.7873978996499417, "grad_norm": 0.8046875, "learning_rate": 0.0001969876185603411, "loss": 0.3371, "step": 1687 }, { "epoch": 0.7878646441073512, "grad_norm": 0.9375, "learning_rate": 0.00019698404307918774, "loss": 0.4357, "step": 1688 }, { "epoch": 0.7883313885647608, "grad_norm": 1.1328125, "learning_rate": 0.0001969804655098603, "loss": 0.4939, "step": 1689 }, { "epoch": 0.7887981330221704, "grad_norm": 0.90234375, "learning_rate": 0.00019697688585243583, "loss": 0.3687, "step": 1690 }, { "epoch": 0.78926487747958, "grad_norm": 0.87109375, "learning_rate": 0.0001969733041069914, "loss": 0.4065, "step": 1691 }, { "epoch": 0.7897316219369895, "grad_norm": 0.91015625, "learning_rate": 0.00019696972027360418, "loss": 0.3539, "step": 1692 }, { "epoch": 0.7901983663943991, "grad_norm": 0.96484375, "learning_rate": 0.00019696613435235126, "loss": 0.4587, "step": 1693 }, { "epoch": 0.7906651108518087, "grad_norm": 1.0234375, "learning_rate": 0.00019696254634330988, "loss": 0.4311, "step": 1694 }, { "epoch": 0.7911318553092183, "grad_norm": 0.83984375, "learning_rate": 0.00019695895624655731, "loss": 0.3925, "step": 1695 }, { "epoch": 0.7915985997666277, "grad_norm": 0.75, "learning_rate": 0.00019695536406217083, "loss": 0.3238, "step": 1696 }, { "epoch": 0.7920653442240373, "grad_norm": 0.91015625, "learning_rate": 0.0001969517697902278, "loss": 0.3983, "step": 1697 }, { "epoch": 0.7925320886814469, "grad_norm": 0.8125, "learning_rate": 0.0001969481734308056, "loss": 0.3313, "step": 1698 }, { "epoch": 0.7929988331388564, "grad_norm": 1.0546875, "learning_rate": 0.00019694457498398163, "loss": 0.4683, "step": 1699 }, { "epoch": 0.793465577596266, "grad_norm": 1.03125, "learning_rate": 0.0001969409744498334, "loss": 0.3512, "step": 1700 }, { "epoch": 0.7939323220536756, "grad_norm": 0.87109375, "learning_rate": 0.00019693737182843846, "loss": 0.4548, "step": 1701 }, { "epoch": 0.7943990665110852, "grad_norm": 1.015625, "learning_rate": 0.00019693376711987432, "loss": 0.4215, "step": 1702 }, { "epoch": 0.7948658109684947, "grad_norm": 1.0390625, "learning_rate": 0.00019693016032421864, "loss": 0.5173, "step": 1703 }, { "epoch": 0.7953325554259043, "grad_norm": 1.09375, "learning_rate": 0.00019692655144154907, "loss": 0.4404, "step": 1704 }, { "epoch": 0.7957992998833139, "grad_norm": 1.0234375, "learning_rate": 0.0001969229404719433, "loss": 0.4476, "step": 1705 }, { "epoch": 0.7962660443407235, "grad_norm": 0.81640625, "learning_rate": 0.00019691932741547908, "loss": 0.397, "step": 1706 }, { "epoch": 0.796732788798133, "grad_norm": 0.890625, "learning_rate": 0.0001969157122722342, "loss": 0.4198, "step": 1707 }, { "epoch": 0.7971995332555426, "grad_norm": 0.95703125, "learning_rate": 0.00019691209504228653, "loss": 0.5072, "step": 1708 }, { "epoch": 0.7976662777129522, "grad_norm": 0.8671875, "learning_rate": 0.0001969084757257139, "loss": 0.4204, "step": 1709 }, { "epoch": 0.7981330221703618, "grad_norm": 0.86328125, "learning_rate": 0.0001969048543225943, "loss": 0.3841, "step": 1710 }, { "epoch": 0.7985997666277713, "grad_norm": 1.0234375, "learning_rate": 0.00019690123083300563, "loss": 0.4777, "step": 1711 }, { "epoch": 0.7990665110851809, "grad_norm": 0.96875, "learning_rate": 0.00019689760525702594, "loss": 0.4967, "step": 1712 }, { "epoch": 0.7995332555425905, "grad_norm": 0.73046875, "learning_rate": 0.00019689397759473334, "loss": 0.365, "step": 1713 }, { "epoch": 0.8, "grad_norm": 1.03125, "learning_rate": 0.00019689034784620585, "loss": 0.5012, "step": 1714 }, { "epoch": 0.8004667444574096, "grad_norm": 0.9609375, "learning_rate": 0.0001968867160115217, "loss": 0.4253, "step": 1715 }, { "epoch": 0.8009334889148192, "grad_norm": 0.8984375, "learning_rate": 0.00019688308209075907, "loss": 0.3764, "step": 1716 }, { "epoch": 0.8014002333722287, "grad_norm": 0.9921875, "learning_rate": 0.00019687944608399617, "loss": 0.4565, "step": 1717 }, { "epoch": 0.8018669778296382, "grad_norm": 0.83984375, "learning_rate": 0.0001968758079913113, "loss": 0.363, "step": 1718 }, { "epoch": 0.8023337222870478, "grad_norm": 1.0625, "learning_rate": 0.0001968721678127828, "loss": 0.5048, "step": 1719 }, { "epoch": 0.8028004667444574, "grad_norm": 0.89453125, "learning_rate": 0.00019686852554848906, "loss": 0.4447, "step": 1720 }, { "epoch": 0.803267211201867, "grad_norm": 1.0703125, "learning_rate": 0.0001968648811985085, "loss": 0.4441, "step": 1721 }, { "epoch": 0.8037339556592765, "grad_norm": 1.0078125, "learning_rate": 0.00019686123476291955, "loss": 0.3931, "step": 1722 }, { "epoch": 0.8042007001166861, "grad_norm": 0.95703125, "learning_rate": 0.00019685758624180073, "loss": 0.4077, "step": 1723 }, { "epoch": 0.8046674445740957, "grad_norm": 0.953125, "learning_rate": 0.00019685393563523065, "loss": 0.458, "step": 1724 }, { "epoch": 0.8051341890315052, "grad_norm": 0.9921875, "learning_rate": 0.00019685028294328785, "loss": 0.4118, "step": 1725 }, { "epoch": 0.8056009334889148, "grad_norm": 0.859375, "learning_rate": 0.00019684662816605103, "loss": 0.4255, "step": 1726 }, { "epoch": 0.8060676779463244, "grad_norm": 0.953125, "learning_rate": 0.0001968429713035988, "loss": 0.4846, "step": 1727 }, { "epoch": 0.806534422403734, "grad_norm": 0.9296875, "learning_rate": 0.00019683931235600998, "loss": 0.4882, "step": 1728 }, { "epoch": 0.8070011668611435, "grad_norm": 1.1171875, "learning_rate": 0.00019683565132336335, "loss": 0.4408, "step": 1729 }, { "epoch": 0.8074679113185531, "grad_norm": 0.80859375, "learning_rate": 0.00019683198820573768, "loss": 0.4586, "step": 1730 }, { "epoch": 0.8079346557759627, "grad_norm": 0.8671875, "learning_rate": 0.00019682832300321184, "loss": 0.3996, "step": 1731 }, { "epoch": 0.8084014002333723, "grad_norm": 0.9375, "learning_rate": 0.00019682465571586478, "loss": 0.3814, "step": 1732 }, { "epoch": 0.8088681446907818, "grad_norm": 0.78515625, "learning_rate": 0.00019682098634377548, "loss": 0.2916, "step": 1733 }, { "epoch": 0.8093348891481914, "grad_norm": 0.96875, "learning_rate": 0.00019681731488702293, "loss": 0.4154, "step": 1734 }, { "epoch": 0.809801633605601, "grad_norm": 0.8984375, "learning_rate": 0.00019681364134568611, "loss": 0.3865, "step": 1735 }, { "epoch": 0.8102683780630106, "grad_norm": 0.96484375, "learning_rate": 0.00019680996571984425, "loss": 0.3899, "step": 1736 }, { "epoch": 0.81073512252042, "grad_norm": 0.85546875, "learning_rate": 0.00019680628800957637, "loss": 0.3522, "step": 1737 }, { "epoch": 0.8112018669778296, "grad_norm": 1.0546875, "learning_rate": 0.0001968026082149617, "loss": 0.3967, "step": 1738 }, { "epoch": 0.8116686114352392, "grad_norm": 1.0078125, "learning_rate": 0.0001967989263360795, "loss": 0.4102, "step": 1739 }, { "epoch": 0.8121353558926487, "grad_norm": 1.03125, "learning_rate": 0.00019679524237300897, "loss": 0.4287, "step": 1740 }, { "epoch": 0.8126021003500583, "grad_norm": 1.03125, "learning_rate": 0.00019679155632582954, "loss": 0.4413, "step": 1741 }, { "epoch": 0.8130688448074679, "grad_norm": 1.21875, "learning_rate": 0.00019678786819462041, "loss": 0.5034, "step": 1742 }, { "epoch": 0.8135355892648775, "grad_norm": 0.94921875, "learning_rate": 0.00019678417797946117, "loss": 0.4361, "step": 1743 }, { "epoch": 0.814002333722287, "grad_norm": 0.83203125, "learning_rate": 0.0001967804856804312, "loss": 0.3384, "step": 1744 }, { "epoch": 0.8144690781796966, "grad_norm": 0.8828125, "learning_rate": 0.00019677679129760994, "loss": 0.3776, "step": 1745 }, { "epoch": 0.8149358226371062, "grad_norm": 0.984375, "learning_rate": 0.00019677309483107704, "loss": 0.4965, "step": 1746 }, { "epoch": 0.8154025670945158, "grad_norm": 0.90625, "learning_rate": 0.00019676939628091202, "loss": 0.4591, "step": 1747 }, { "epoch": 0.8158693115519253, "grad_norm": 0.921875, "learning_rate": 0.0001967656956471945, "loss": 0.4155, "step": 1748 }, { "epoch": 0.8163360560093349, "grad_norm": 0.71875, "learning_rate": 0.00019676199293000424, "loss": 0.3471, "step": 1749 }, { "epoch": 0.8168028004667445, "grad_norm": 0.875, "learning_rate": 0.00019675828812942088, "loss": 0.4256, "step": 1750 }, { "epoch": 0.817269544924154, "grad_norm": 0.9375, "learning_rate": 0.00019675458124552423, "loss": 0.4129, "step": 1751 }, { "epoch": 0.8177362893815636, "grad_norm": 0.77734375, "learning_rate": 0.00019675087227839409, "loss": 0.3291, "step": 1752 }, { "epoch": 0.8182030338389732, "grad_norm": 0.87890625, "learning_rate": 0.0001967471612281103, "loss": 0.3793, "step": 1753 }, { "epoch": 0.8186697782963828, "grad_norm": 0.9140625, "learning_rate": 0.00019674344809475285, "loss": 0.4082, "step": 1754 }, { "epoch": 0.8191365227537923, "grad_norm": 0.77734375, "learning_rate": 0.00019673973287840158, "loss": 0.3314, "step": 1755 }, { "epoch": 0.8196032672112019, "grad_norm": 1.046875, "learning_rate": 0.0001967360155791365, "loss": 0.4393, "step": 1756 }, { "epoch": 0.8200700116686115, "grad_norm": 1.0703125, "learning_rate": 0.00019673229619703774, "loss": 0.533, "step": 1757 }, { "epoch": 0.820536756126021, "grad_norm": 0.93359375, "learning_rate": 0.00019672857473218526, "loss": 0.4376, "step": 1758 }, { "epoch": 0.8210035005834305, "grad_norm": 1.0, "learning_rate": 0.00019672485118465928, "loss": 0.387, "step": 1759 }, { "epoch": 0.8214702450408401, "grad_norm": 0.875, "learning_rate": 0.00019672112555453993, "loss": 0.4112, "step": 1760 }, { "epoch": 0.8219369894982497, "grad_norm": 0.98046875, "learning_rate": 0.0001967173978419074, "loss": 0.4057, "step": 1761 }, { "epoch": 0.8224037339556592, "grad_norm": 0.9453125, "learning_rate": 0.00019671366804684205, "loss": 0.4317, "step": 1762 }, { "epoch": 0.8228704784130688, "grad_norm": 0.828125, "learning_rate": 0.00019670993616942406, "loss": 0.4131, "step": 1763 }, { "epoch": 0.8233372228704784, "grad_norm": 0.8515625, "learning_rate": 0.00019670620220973387, "loss": 0.4334, "step": 1764 }, { "epoch": 0.823803967327888, "grad_norm": 1.0078125, "learning_rate": 0.0001967024661678518, "loss": 0.3953, "step": 1765 }, { "epoch": 0.8242707117852975, "grad_norm": 0.6953125, "learning_rate": 0.00019669872804385841, "loss": 0.3453, "step": 1766 }, { "epoch": 0.8247374562427071, "grad_norm": 1.046875, "learning_rate": 0.0001966949878378341, "loss": 0.4282, "step": 1767 }, { "epoch": 0.8252042007001167, "grad_norm": 0.8515625, "learning_rate": 0.0001966912455498594, "loss": 0.4296, "step": 1768 }, { "epoch": 0.8256709451575263, "grad_norm": 0.86328125, "learning_rate": 0.0001966875011800149, "loss": 0.3504, "step": 1769 }, { "epoch": 0.8261376896149358, "grad_norm": 0.9375, "learning_rate": 0.00019668375472838126, "loss": 0.4587, "step": 1770 }, { "epoch": 0.8266044340723454, "grad_norm": 1.1015625, "learning_rate": 0.00019668000619503905, "loss": 0.5271, "step": 1771 }, { "epoch": 0.827071178529755, "grad_norm": 0.98046875, "learning_rate": 0.00019667625558006908, "loss": 0.4177, "step": 1772 }, { "epoch": 0.8275379229871646, "grad_norm": 1.046875, "learning_rate": 0.00019667250288355205, "loss": 0.5163, "step": 1773 }, { "epoch": 0.8280046674445741, "grad_norm": 0.8671875, "learning_rate": 0.00019666874810556878, "loss": 0.3354, "step": 1774 }, { "epoch": 0.8284714119019837, "grad_norm": 0.84765625, "learning_rate": 0.0001966649912462001, "loss": 0.3692, "step": 1775 }, { "epoch": 0.8289381563593933, "grad_norm": 0.84375, "learning_rate": 0.00019666123230552689, "loss": 0.4289, "step": 1776 }, { "epoch": 0.8294049008168028, "grad_norm": 0.96484375, "learning_rate": 0.00019665747128363013, "loss": 0.446, "step": 1777 }, { "epoch": 0.8298716452742124, "grad_norm": 0.97265625, "learning_rate": 0.00019665370818059078, "loss": 0.3976, "step": 1778 }, { "epoch": 0.8303383897316219, "grad_norm": 0.96875, "learning_rate": 0.00019664994299648983, "loss": 0.417, "step": 1779 }, { "epoch": 0.8308051341890315, "grad_norm": 0.8671875, "learning_rate": 0.00019664617573140835, "loss": 0.3878, "step": 1780 }, { "epoch": 0.831271878646441, "grad_norm": 0.921875, "learning_rate": 0.00019664240638542753, "loss": 0.4081, "step": 1781 }, { "epoch": 0.8317386231038506, "grad_norm": 0.984375, "learning_rate": 0.0001966386349586284, "loss": 0.4148, "step": 1782 }, { "epoch": 0.8322053675612602, "grad_norm": 1.1875, "learning_rate": 0.00019663486145109232, "loss": 0.5447, "step": 1783 }, { "epoch": 0.8326721120186698, "grad_norm": 0.9453125, "learning_rate": 0.0001966310858629004, "loss": 0.3902, "step": 1784 }, { "epoch": 0.8331388564760793, "grad_norm": 0.94921875, "learning_rate": 0.00019662730819413403, "loss": 0.3607, "step": 1785 }, { "epoch": 0.8336056009334889, "grad_norm": 1.0859375, "learning_rate": 0.00019662352844487447, "loss": 0.3457, "step": 1786 }, { "epoch": 0.8340723453908985, "grad_norm": 1.015625, "learning_rate": 0.00019661974661520313, "loss": 0.356, "step": 1787 }, { "epoch": 0.834539089848308, "grad_norm": 0.94140625, "learning_rate": 0.00019661596270520147, "loss": 0.3302, "step": 1788 }, { "epoch": 0.8350058343057176, "grad_norm": 0.88671875, "learning_rate": 0.00019661217671495092, "loss": 0.3902, "step": 1789 }, { "epoch": 0.8354725787631272, "grad_norm": 1.3046875, "learning_rate": 0.000196608388644533, "loss": 0.4734, "step": 1790 }, { "epoch": 0.8359393232205368, "grad_norm": 1.0625, "learning_rate": 0.00019660459849402934, "loss": 0.388, "step": 1791 }, { "epoch": 0.8364060676779463, "grad_norm": 1.015625, "learning_rate": 0.00019660080626352145, "loss": 0.3567, "step": 1792 }, { "epoch": 0.8368728121353559, "grad_norm": 0.875, "learning_rate": 0.000196597011953091, "loss": 0.3428, "step": 1793 }, { "epoch": 0.8373395565927655, "grad_norm": 1.1484375, "learning_rate": 0.00019659321556281972, "loss": 0.4481, "step": 1794 }, { "epoch": 0.8378063010501751, "grad_norm": 1.0234375, "learning_rate": 0.00019658941709278934, "loss": 0.4069, "step": 1795 }, { "epoch": 0.8382730455075846, "grad_norm": 1.015625, "learning_rate": 0.0001965856165430817, "loss": 0.3826, "step": 1796 }, { "epoch": 0.8387397899649942, "grad_norm": 0.70703125, "learning_rate": 0.00019658181391377848, "loss": 0.2763, "step": 1797 }, { "epoch": 0.8392065344224038, "grad_norm": 0.9375, "learning_rate": 0.0001965780092049617, "loss": 0.3585, "step": 1798 }, { "epoch": 0.8396732788798134, "grad_norm": 1.0234375, "learning_rate": 0.0001965742024167132, "loss": 0.3959, "step": 1799 }, { "epoch": 0.8401400233372228, "grad_norm": 1.046875, "learning_rate": 0.000196570393549115, "loss": 0.4705, "step": 1800 }, { "epoch": 0.8406067677946324, "grad_norm": 1.0703125, "learning_rate": 0.00019656658260224904, "loss": 0.4451, "step": 1801 }, { "epoch": 0.841073512252042, "grad_norm": 1.0625, "learning_rate": 0.00019656276957619742, "loss": 0.4259, "step": 1802 }, { "epoch": 0.8415402567094515, "grad_norm": 0.99609375, "learning_rate": 0.00019655895447104227, "loss": 0.4195, "step": 1803 }, { "epoch": 0.8420070011668611, "grad_norm": 0.953125, "learning_rate": 0.0001965551372868657, "loss": 0.3651, "step": 1804 }, { "epoch": 0.8424737456242707, "grad_norm": 0.91796875, "learning_rate": 0.00019655131802374985, "loss": 0.3945, "step": 1805 }, { "epoch": 0.8429404900816803, "grad_norm": 0.84765625, "learning_rate": 0.00019654749668177702, "loss": 0.3596, "step": 1806 }, { "epoch": 0.8434072345390898, "grad_norm": 0.87890625, "learning_rate": 0.00019654367326102948, "loss": 0.3782, "step": 1807 }, { "epoch": 0.8438739789964994, "grad_norm": 0.890625, "learning_rate": 0.00019653984776158952, "loss": 0.3764, "step": 1808 }, { "epoch": 0.844340723453909, "grad_norm": 1.03125, "learning_rate": 0.00019653602018353952, "loss": 0.3736, "step": 1809 }, { "epoch": 0.8448074679113186, "grad_norm": 0.82421875, "learning_rate": 0.0001965321905269619, "loss": 0.325, "step": 1810 }, { "epoch": 0.8452742123687281, "grad_norm": 0.6875, "learning_rate": 0.00019652835879193912, "loss": 0.3127, "step": 1811 }, { "epoch": 0.8457409568261377, "grad_norm": 0.99609375, "learning_rate": 0.00019652452497855366, "loss": 0.4186, "step": 1812 }, { "epoch": 0.8462077012835473, "grad_norm": 1.0625, "learning_rate": 0.00019652068908688813, "loss": 0.4679, "step": 1813 }, { "epoch": 0.8466744457409568, "grad_norm": 0.8203125, "learning_rate": 0.000196516851117025, "loss": 0.3121, "step": 1814 }, { "epoch": 0.8471411901983664, "grad_norm": 1.2265625, "learning_rate": 0.000196513011069047, "loss": 0.3522, "step": 1815 }, { "epoch": 0.847607934655776, "grad_norm": 0.87890625, "learning_rate": 0.00019650916894303682, "loss": 0.3794, "step": 1816 }, { "epoch": 0.8480746791131856, "grad_norm": 1.1796875, "learning_rate": 0.00019650532473907716, "loss": 0.4495, "step": 1817 }, { "epoch": 0.8485414235705951, "grad_norm": 0.890625, "learning_rate": 0.00019650147845725076, "loss": 0.3639, "step": 1818 }, { "epoch": 0.8490081680280047, "grad_norm": 1.0390625, "learning_rate": 0.00019649763009764044, "loss": 0.3879, "step": 1819 }, { "epoch": 0.8494749124854143, "grad_norm": 0.78125, "learning_rate": 0.00019649377966032909, "loss": 0.3394, "step": 1820 }, { "epoch": 0.8499416569428238, "grad_norm": 1.03125, "learning_rate": 0.00019648992714539962, "loss": 0.3928, "step": 1821 }, { "epoch": 0.8504084014002333, "grad_norm": 1.0546875, "learning_rate": 0.00019648607255293493, "loss": 0.3423, "step": 1822 }, { "epoch": 0.8508751458576429, "grad_norm": 1.1875, "learning_rate": 0.0001964822158830181, "loss": 0.4007, "step": 1823 }, { "epoch": 0.8513418903150525, "grad_norm": 1.1171875, "learning_rate": 0.00019647835713573208, "loss": 0.3958, "step": 1824 }, { "epoch": 0.851808634772462, "grad_norm": 0.921875, "learning_rate": 0.00019647449631115998, "loss": 0.3731, "step": 1825 }, { "epoch": 0.8522753792298716, "grad_norm": 0.94921875, "learning_rate": 0.00019647063340938491, "loss": 0.324, "step": 1826 }, { "epoch": 0.8527421236872812, "grad_norm": 1.046875, "learning_rate": 0.00019646676843049012, "loss": 0.4611, "step": 1827 }, { "epoch": 0.8532088681446908, "grad_norm": 0.9921875, "learning_rate": 0.00019646290137455874, "loss": 0.4185, "step": 1828 }, { "epoch": 0.8536756126021003, "grad_norm": 1.078125, "learning_rate": 0.00019645903224167408, "loss": 0.3925, "step": 1829 }, { "epoch": 0.8541423570595099, "grad_norm": 0.91015625, "learning_rate": 0.00019645516103191944, "loss": 0.3905, "step": 1830 }, { "epoch": 0.8546091015169195, "grad_norm": 1.0234375, "learning_rate": 0.00019645128774537816, "loss": 0.3788, "step": 1831 }, { "epoch": 0.8550758459743291, "grad_norm": 0.96875, "learning_rate": 0.00019644741238213363, "loss": 0.4176, "step": 1832 }, { "epoch": 0.8555425904317386, "grad_norm": 0.859375, "learning_rate": 0.0001964435349422693, "loss": 0.3912, "step": 1833 }, { "epoch": 0.8560093348891482, "grad_norm": 1.015625, "learning_rate": 0.00019643965542586863, "loss": 0.4296, "step": 1834 }, { "epoch": 0.8564760793465578, "grad_norm": 0.921875, "learning_rate": 0.00019643577383301523, "loss": 0.4221, "step": 1835 }, { "epoch": 0.8569428238039674, "grad_norm": 0.87890625, "learning_rate": 0.00019643189016379262, "loss": 0.3477, "step": 1836 }, { "epoch": 0.8574095682613769, "grad_norm": 0.8984375, "learning_rate": 0.00019642800441828438, "loss": 0.3722, "step": 1837 }, { "epoch": 0.8578763127187865, "grad_norm": 0.92578125, "learning_rate": 0.00019642411659657425, "loss": 0.4089, "step": 1838 }, { "epoch": 0.8583430571761961, "grad_norm": 0.88671875, "learning_rate": 0.0001964202266987459, "loss": 0.3811, "step": 1839 }, { "epoch": 0.8588098016336057, "grad_norm": 0.92578125, "learning_rate": 0.00019641633472488308, "loss": 0.3723, "step": 1840 }, { "epoch": 0.8592765460910151, "grad_norm": 0.9921875, "learning_rate": 0.0001964124406750696, "loss": 0.3523, "step": 1841 }, { "epoch": 0.8597432905484247, "grad_norm": 0.9765625, "learning_rate": 0.00019640854454938927, "loss": 0.3542, "step": 1842 }, { "epoch": 0.8602100350058343, "grad_norm": 1.140625, "learning_rate": 0.00019640464634792607, "loss": 0.3185, "step": 1843 }, { "epoch": 0.8606767794632438, "grad_norm": 1.0703125, "learning_rate": 0.00019640074607076385, "loss": 0.3849, "step": 1844 }, { "epoch": 0.8611435239206534, "grad_norm": 0.98828125, "learning_rate": 0.00019639684371798656, "loss": 0.4132, "step": 1845 }, { "epoch": 0.861610268378063, "grad_norm": 0.98828125, "learning_rate": 0.00019639293928967836, "loss": 0.4004, "step": 1846 }, { "epoch": 0.8620770128354726, "grad_norm": 1.078125, "learning_rate": 0.00019638903278592317, "loss": 0.3093, "step": 1847 }, { "epoch": 0.8625437572928821, "grad_norm": 0.9140625, "learning_rate": 0.00019638512420680514, "loss": 0.4005, "step": 1848 }, { "epoch": 0.8630105017502917, "grad_norm": 0.91796875, "learning_rate": 0.00019638121355240848, "loss": 0.3725, "step": 1849 }, { "epoch": 0.8634772462077013, "grad_norm": 0.84765625, "learning_rate": 0.00019637730082281736, "loss": 0.3596, "step": 1850 }, { "epoch": 0.8639439906651109, "grad_norm": 0.859375, "learning_rate": 0.000196373386018116, "loss": 0.3901, "step": 1851 }, { "epoch": 0.8644107351225204, "grad_norm": 0.93359375, "learning_rate": 0.0001963694691383887, "loss": 0.3743, "step": 1852 }, { "epoch": 0.86487747957993, "grad_norm": 0.90625, "learning_rate": 0.00019636555018371984, "loss": 0.3376, "step": 1853 }, { "epoch": 0.8653442240373396, "grad_norm": 0.94921875, "learning_rate": 0.00019636162915419374, "loss": 0.4384, "step": 1854 }, { "epoch": 0.8658109684947491, "grad_norm": 1.0, "learning_rate": 0.00019635770604989485, "loss": 0.3543, "step": 1855 }, { "epoch": 0.8662777129521587, "grad_norm": 0.94921875, "learning_rate": 0.00019635378087090764, "loss": 0.4132, "step": 1856 }, { "epoch": 0.8667444574095683, "grad_norm": 1.1796875, "learning_rate": 0.00019634985361731663, "loss": 0.4052, "step": 1857 }, { "epoch": 0.8672112018669779, "grad_norm": 1.015625, "learning_rate": 0.00019634592428920637, "loss": 0.4537, "step": 1858 }, { "epoch": 0.8676779463243874, "grad_norm": 1.3046875, "learning_rate": 0.00019634199288666147, "loss": 0.4343, "step": 1859 }, { "epoch": 0.868144690781797, "grad_norm": 0.87890625, "learning_rate": 0.00019633805940976655, "loss": 0.3985, "step": 1860 }, { "epoch": 0.8686114352392066, "grad_norm": 0.78515625, "learning_rate": 0.00019633412385860634, "loss": 0.3521, "step": 1861 }, { "epoch": 0.869078179696616, "grad_norm": 1.140625, "learning_rate": 0.00019633018623326556, "loss": 0.3505, "step": 1862 }, { "epoch": 0.8695449241540256, "grad_norm": 0.87109375, "learning_rate": 0.00019632624653382897, "loss": 0.3098, "step": 1863 }, { "epoch": 0.8700116686114352, "grad_norm": 0.93359375, "learning_rate": 0.0001963223047603814, "loss": 0.5098, "step": 1864 }, { "epoch": 0.8704784130688448, "grad_norm": 0.98046875, "learning_rate": 0.0001963183609130078, "loss": 0.4233, "step": 1865 }, { "epoch": 0.8709451575262543, "grad_norm": 0.91015625, "learning_rate": 0.00019631441499179297, "loss": 0.3872, "step": 1866 }, { "epoch": 0.8714119019836639, "grad_norm": 0.796875, "learning_rate": 0.00019631046699682193, "loss": 0.3203, "step": 1867 }, { "epoch": 0.8718786464410735, "grad_norm": 0.94921875, "learning_rate": 0.0001963065169281797, "loss": 0.3691, "step": 1868 }, { "epoch": 0.8723453908984831, "grad_norm": 0.921875, "learning_rate": 0.00019630256478595128, "loss": 0.429, "step": 1869 }, { "epoch": 0.8728121353558926, "grad_norm": 0.88671875, "learning_rate": 0.00019629861057022182, "loss": 0.3601, "step": 1870 }, { "epoch": 0.8732788798133022, "grad_norm": 1.0859375, "learning_rate": 0.0001962946542810764, "loss": 0.4536, "step": 1871 }, { "epoch": 0.8737456242707118, "grad_norm": 0.97265625, "learning_rate": 0.00019629069591860026, "loss": 0.3148, "step": 1872 }, { "epoch": 0.8742123687281214, "grad_norm": 0.98046875, "learning_rate": 0.00019628673548287856, "loss": 0.3374, "step": 1873 }, { "epoch": 0.8746791131855309, "grad_norm": 1.0390625, "learning_rate": 0.00019628277297399667, "loss": 0.4128, "step": 1874 }, { "epoch": 0.8751458576429405, "grad_norm": 1.0546875, "learning_rate": 0.0001962788083920398, "loss": 0.4096, "step": 1875 }, { "epoch": 0.8756126021003501, "grad_norm": 1.21875, "learning_rate": 0.00019627484173709338, "loss": 0.3788, "step": 1876 }, { "epoch": 0.8760793465577597, "grad_norm": 1.0078125, "learning_rate": 0.0001962708730092428, "loss": 0.2991, "step": 1877 }, { "epoch": 0.8765460910151692, "grad_norm": 0.9296875, "learning_rate": 0.0001962669022085735, "loss": 0.4103, "step": 1878 }, { "epoch": 0.8770128354725788, "grad_norm": 0.8671875, "learning_rate": 0.000196262929335171, "loss": 0.338, "step": 1879 }, { "epoch": 0.8774795799299884, "grad_norm": 0.90234375, "learning_rate": 0.0001962589543891208, "loss": 0.2937, "step": 1880 }, { "epoch": 0.877946324387398, "grad_norm": 0.9375, "learning_rate": 0.00019625497737050856, "loss": 0.3802, "step": 1881 }, { "epoch": 0.8784130688448075, "grad_norm": 0.87109375, "learning_rate": 0.00019625099827941985, "loss": 0.3681, "step": 1882 }, { "epoch": 0.878879813302217, "grad_norm": 1.03125, "learning_rate": 0.00019624701711594032, "loss": 0.3591, "step": 1883 }, { "epoch": 0.8793465577596266, "grad_norm": 0.94921875, "learning_rate": 0.00019624303388015576, "loss": 0.3714, "step": 1884 }, { "epoch": 0.8798133022170361, "grad_norm": 0.9296875, "learning_rate": 0.00019623904857215185, "loss": 0.3315, "step": 1885 }, { "epoch": 0.8802800466744457, "grad_norm": 0.9453125, "learning_rate": 0.00019623506119201446, "loss": 0.4366, "step": 1886 }, { "epoch": 0.8807467911318553, "grad_norm": 0.90625, "learning_rate": 0.00019623107173982946, "loss": 0.3618, "step": 1887 }, { "epoch": 0.8812135355892649, "grad_norm": 1.109375, "learning_rate": 0.0001962270802156827, "loss": 0.431, "step": 1888 }, { "epoch": 0.8816802800466744, "grad_norm": 0.93359375, "learning_rate": 0.0001962230866196601, "loss": 0.3255, "step": 1889 }, { "epoch": 0.882147024504084, "grad_norm": 0.9140625, "learning_rate": 0.00019621909095184775, "loss": 0.3385, "step": 1890 }, { "epoch": 0.8826137689614936, "grad_norm": 1.15625, "learning_rate": 0.0001962150932123316, "loss": 0.3374, "step": 1891 }, { "epoch": 0.8830805134189031, "grad_norm": 1.0078125, "learning_rate": 0.00019621109340119767, "loss": 0.4671, "step": 1892 }, { "epoch": 0.8835472578763127, "grad_norm": 1.1796875, "learning_rate": 0.0001962070915185322, "loss": 0.4194, "step": 1893 }, { "epoch": 0.8840140023337223, "grad_norm": 0.97265625, "learning_rate": 0.0001962030875644213, "loss": 0.4178, "step": 1894 }, { "epoch": 0.8844807467911319, "grad_norm": 1.0546875, "learning_rate": 0.00019619908153895122, "loss": 0.366, "step": 1895 }, { "epoch": 0.8849474912485414, "grad_norm": 0.98828125, "learning_rate": 0.00019619507344220815, "loss": 0.3652, "step": 1896 }, { "epoch": 0.885414235705951, "grad_norm": 0.8671875, "learning_rate": 0.00019619106327427844, "loss": 0.3465, "step": 1897 }, { "epoch": 0.8858809801633606, "grad_norm": 0.90625, "learning_rate": 0.00019618705103524836, "loss": 0.3645, "step": 1898 }, { "epoch": 0.8863477246207702, "grad_norm": 0.92578125, "learning_rate": 0.0001961830367252044, "loss": 0.3429, "step": 1899 }, { "epoch": 0.8868144690781797, "grad_norm": 0.98046875, "learning_rate": 0.00019617902034423294, "loss": 0.3952, "step": 1900 }, { "epoch": 0.8872812135355893, "grad_norm": 0.99609375, "learning_rate": 0.00019617500189242045, "loss": 0.3707, "step": 1901 }, { "epoch": 0.8877479579929989, "grad_norm": 1.0546875, "learning_rate": 0.00019617098136985345, "loss": 0.4329, "step": 1902 }, { "epoch": 0.8882147024504085, "grad_norm": 0.83984375, "learning_rate": 0.00019616695877661851, "loss": 0.3569, "step": 1903 }, { "epoch": 0.8886814469078179, "grad_norm": 1.0546875, "learning_rate": 0.00019616293411280227, "loss": 0.3814, "step": 1904 }, { "epoch": 0.8891481913652275, "grad_norm": 0.95703125, "learning_rate": 0.00019615890737849137, "loss": 0.3459, "step": 1905 }, { "epoch": 0.8896149358226371, "grad_norm": 1.109375, "learning_rate": 0.00019615487857377247, "loss": 0.394, "step": 1906 }, { "epoch": 0.8900816802800466, "grad_norm": 0.8515625, "learning_rate": 0.0001961508476987324, "loss": 0.3676, "step": 1907 }, { "epoch": 0.8905484247374562, "grad_norm": 0.97265625, "learning_rate": 0.00019614681475345783, "loss": 0.3543, "step": 1908 }, { "epoch": 0.8910151691948658, "grad_norm": 1.0078125, "learning_rate": 0.00019614277973803573, "loss": 0.3679, "step": 1909 }, { "epoch": 0.8914819136522754, "grad_norm": 1.0, "learning_rate": 0.00019613874265255287, "loss": 0.369, "step": 1910 }, { "epoch": 0.8919486581096849, "grad_norm": 1.0, "learning_rate": 0.0001961347034970962, "loss": 0.3688, "step": 1911 }, { "epoch": 0.8924154025670945, "grad_norm": 0.875, "learning_rate": 0.00019613066227175275, "loss": 0.2986, "step": 1912 }, { "epoch": 0.8928821470245041, "grad_norm": 0.93359375, "learning_rate": 0.00019612661897660947, "loss": 0.3513, "step": 1913 }, { "epoch": 0.8933488914819137, "grad_norm": 1.0546875, "learning_rate": 0.00019612257361175338, "loss": 0.3795, "step": 1914 }, { "epoch": 0.8938156359393232, "grad_norm": 0.96875, "learning_rate": 0.00019611852617727168, "loss": 0.3989, "step": 1915 }, { "epoch": 0.8942823803967328, "grad_norm": 0.8046875, "learning_rate": 0.00019611447667325143, "loss": 0.325, "step": 1916 }, { "epoch": 0.8947491248541424, "grad_norm": 0.85546875, "learning_rate": 0.0001961104250997799, "loss": 0.2928, "step": 1917 }, { "epoch": 0.895215869311552, "grad_norm": 0.921875, "learning_rate": 0.00019610637145694426, "loss": 0.2861, "step": 1918 }, { "epoch": 0.8956826137689615, "grad_norm": 1.0, "learning_rate": 0.0001961023157448318, "loss": 0.3887, "step": 1919 }, { "epoch": 0.8961493582263711, "grad_norm": 0.96484375, "learning_rate": 0.00019609825796352987, "loss": 0.3222, "step": 1920 }, { "epoch": 0.8966161026837807, "grad_norm": 0.97265625, "learning_rate": 0.00019609419811312582, "loss": 0.3636, "step": 1921 }, { "epoch": 0.8970828471411902, "grad_norm": 0.80078125, "learning_rate": 0.00019609013619370706, "loss": 0.3066, "step": 1922 }, { "epoch": 0.8975495915985998, "grad_norm": 1.1171875, "learning_rate": 0.00019608607220536107, "loss": 0.4214, "step": 1923 }, { "epoch": 0.8980163360560094, "grad_norm": 1.046875, "learning_rate": 0.00019608200614817532, "loss": 0.4406, "step": 1924 }, { "epoch": 0.8984830805134189, "grad_norm": 0.89453125, "learning_rate": 0.00019607793802223737, "loss": 0.3549, "step": 1925 }, { "epoch": 0.8989498249708284, "grad_norm": 0.90625, "learning_rate": 0.00019607386782763482, "loss": 0.3276, "step": 1926 }, { "epoch": 0.899416569428238, "grad_norm": 0.96875, "learning_rate": 0.0001960697955644553, "loss": 0.2895, "step": 1927 }, { "epoch": 0.8998833138856476, "grad_norm": 0.9375, "learning_rate": 0.0001960657212327865, "loss": 0.3163, "step": 1928 }, { "epoch": 0.9003500583430571, "grad_norm": 0.953125, "learning_rate": 0.00019606164483271612, "loss": 0.4171, "step": 1929 }, { "epoch": 0.9008168028004667, "grad_norm": 0.7890625, "learning_rate": 0.00019605756636433195, "loss": 0.3123, "step": 1930 }, { "epoch": 0.9012835472578763, "grad_norm": 0.9453125, "learning_rate": 0.0001960534858277218, "loss": 0.3176, "step": 1931 }, { "epoch": 0.9017502917152859, "grad_norm": 0.890625, "learning_rate": 0.00019604940322297352, "loss": 0.2874, "step": 1932 }, { "epoch": 0.9022170361726954, "grad_norm": 0.9296875, "learning_rate": 0.00019604531855017503, "loss": 0.3031, "step": 1933 }, { "epoch": 0.902683780630105, "grad_norm": 0.94140625, "learning_rate": 0.00019604123180941424, "loss": 0.3409, "step": 1934 }, { "epoch": 0.9031505250875146, "grad_norm": 0.953125, "learning_rate": 0.0001960371430007792, "loss": 0.3479, "step": 1935 }, { "epoch": 0.9036172695449242, "grad_norm": 0.83203125, "learning_rate": 0.00019603305212435792, "loss": 0.3327, "step": 1936 }, { "epoch": 0.9040840140023337, "grad_norm": 0.953125, "learning_rate": 0.00019602895918023846, "loss": 0.3258, "step": 1937 }, { "epoch": 0.9045507584597433, "grad_norm": 1.265625, "learning_rate": 0.00019602486416850896, "loss": 0.4291, "step": 1938 }, { "epoch": 0.9050175029171529, "grad_norm": 0.76171875, "learning_rate": 0.00019602076708925763, "loss": 0.275, "step": 1939 }, { "epoch": 0.9054842473745625, "grad_norm": 0.94140625, "learning_rate": 0.00019601666794257262, "loss": 0.3257, "step": 1940 }, { "epoch": 0.905950991831972, "grad_norm": 1.1484375, "learning_rate": 0.0001960125667285422, "loss": 0.3307, "step": 1941 }, { "epoch": 0.9064177362893816, "grad_norm": 0.88671875, "learning_rate": 0.00019600846344725473, "loss": 0.3245, "step": 1942 }, { "epoch": 0.9068844807467912, "grad_norm": 0.9921875, "learning_rate": 0.00019600435809879847, "loss": 0.3343, "step": 1943 }, { "epoch": 0.9073512252042008, "grad_norm": 0.90234375, "learning_rate": 0.00019600025068326188, "loss": 0.3594, "step": 1944 }, { "epoch": 0.9078179696616102, "grad_norm": 0.98828125, "learning_rate": 0.0001959961412007334, "loss": 0.257, "step": 1945 }, { "epoch": 0.9082847141190198, "grad_norm": 0.9453125, "learning_rate": 0.00019599202965130146, "loss": 0.2809, "step": 1946 }, { "epoch": 0.9087514585764294, "grad_norm": 0.91015625, "learning_rate": 0.00019598791603505465, "loss": 0.346, "step": 1947 }, { "epoch": 0.9092182030338389, "grad_norm": 0.9765625, "learning_rate": 0.00019598380035208148, "loss": 0.3983, "step": 1948 }, { "epoch": 0.9096849474912485, "grad_norm": 1.0078125, "learning_rate": 0.0001959796826024706, "loss": 0.4167, "step": 1949 }, { "epoch": 0.9101516919486581, "grad_norm": 0.9921875, "learning_rate": 0.0001959755627863107, "loss": 0.3533, "step": 1950 }, { "epoch": 0.9106184364060677, "grad_norm": 0.859375, "learning_rate": 0.0001959714409036904, "loss": 0.2936, "step": 1951 }, { "epoch": 0.9110851808634772, "grad_norm": 1.0390625, "learning_rate": 0.0001959673169546985, "loss": 0.38, "step": 1952 }, { "epoch": 0.9115519253208868, "grad_norm": 0.91796875, "learning_rate": 0.00019596319093942376, "loss": 0.3085, "step": 1953 }, { "epoch": 0.9120186697782964, "grad_norm": 0.98828125, "learning_rate": 0.0001959590628579551, "loss": 0.3357, "step": 1954 }, { "epoch": 0.912485414235706, "grad_norm": 0.7421875, "learning_rate": 0.00019595493271038134, "loss": 0.2537, "step": 1955 }, { "epoch": 0.9129521586931155, "grad_norm": 1.03125, "learning_rate": 0.0001959508004967914, "loss": 0.3756, "step": 1956 }, { "epoch": 0.9134189031505251, "grad_norm": 0.8671875, "learning_rate": 0.00019594666621727426, "loss": 0.3032, "step": 1957 }, { "epoch": 0.9138856476079347, "grad_norm": 0.89453125, "learning_rate": 0.00019594252987191894, "loss": 0.2906, "step": 1958 }, { "epoch": 0.9143523920653442, "grad_norm": 0.85546875, "learning_rate": 0.00019593839146081452, "loss": 0.3394, "step": 1959 }, { "epoch": 0.9148191365227538, "grad_norm": 1.0546875, "learning_rate": 0.0001959342509840501, "loss": 0.3655, "step": 1960 }, { "epoch": 0.9152858809801634, "grad_norm": 1.0078125, "learning_rate": 0.0001959301084417148, "loss": 0.3079, "step": 1961 }, { "epoch": 0.915752625437573, "grad_norm": 0.98828125, "learning_rate": 0.00019592596383389782, "loss": 0.3948, "step": 1962 }, { "epoch": 0.9162193698949825, "grad_norm": 1.59375, "learning_rate": 0.0001959218171606884, "loss": 0.3659, "step": 1963 }, { "epoch": 0.9166861143523921, "grad_norm": 1.0625, "learning_rate": 0.00019591766842217583, "loss": 0.3954, "step": 1964 }, { "epoch": 0.9171528588098017, "grad_norm": 0.94921875, "learning_rate": 0.00019591351761844944, "loss": 0.2578, "step": 1965 }, { "epoch": 0.9176196032672111, "grad_norm": 0.91015625, "learning_rate": 0.00019590936474959863, "loss": 0.253, "step": 1966 }, { "epoch": 0.9180863477246207, "grad_norm": 0.9140625, "learning_rate": 0.00019590520981571275, "loss": 0.3245, "step": 1967 }, { "epoch": 0.9185530921820303, "grad_norm": 0.84375, "learning_rate": 0.0001959010528168813, "loss": 0.3099, "step": 1968 }, { "epoch": 0.9190198366394399, "grad_norm": 1.1171875, "learning_rate": 0.0001958968937531938, "loss": 0.39, "step": 1969 }, { "epoch": 0.9194865810968494, "grad_norm": 1.109375, "learning_rate": 0.00019589273262473974, "loss": 0.4934, "step": 1970 }, { "epoch": 0.919953325554259, "grad_norm": 0.8046875, "learning_rate": 0.0001958885694316088, "loss": 0.2898, "step": 1971 }, { "epoch": 0.9204200700116686, "grad_norm": 1.0703125, "learning_rate": 0.00019588440417389052, "loss": 0.3874, "step": 1972 }, { "epoch": 0.9208868144690782, "grad_norm": 0.87890625, "learning_rate": 0.00019588023685167464, "loss": 0.3158, "step": 1973 }, { "epoch": 0.9213535589264877, "grad_norm": 0.95703125, "learning_rate": 0.00019587606746505093, "loss": 0.3268, "step": 1974 }, { "epoch": 0.9218203033838973, "grad_norm": 0.98828125, "learning_rate": 0.00019587189601410904, "loss": 0.3633, "step": 1975 }, { "epoch": 0.9222870478413069, "grad_norm": 1.0234375, "learning_rate": 0.0001958677224989389, "loss": 0.3194, "step": 1976 }, { "epoch": 0.9227537922987165, "grad_norm": 1.125, "learning_rate": 0.0001958635469196303, "loss": 0.3088, "step": 1977 }, { "epoch": 0.923220536756126, "grad_norm": 0.87890625, "learning_rate": 0.0001958593692762732, "loss": 0.2646, "step": 1978 }, { "epoch": 0.9236872812135356, "grad_norm": 1.0546875, "learning_rate": 0.00019585518956895752, "loss": 0.3572, "step": 1979 }, { "epoch": 0.9241540256709452, "grad_norm": 0.76171875, "learning_rate": 0.00019585100779777326, "loss": 0.2942, "step": 1980 }, { "epoch": 0.9246207701283548, "grad_norm": 0.9453125, "learning_rate": 0.00019584682396281045, "loss": 0.3331, "step": 1981 }, { "epoch": 0.9250875145857643, "grad_norm": 1.078125, "learning_rate": 0.00019584263806415915, "loss": 0.3521, "step": 1982 }, { "epoch": 0.9255542590431739, "grad_norm": 1.0234375, "learning_rate": 0.00019583845010190954, "loss": 0.3404, "step": 1983 }, { "epoch": 0.9260210035005835, "grad_norm": 0.890625, "learning_rate": 0.00019583426007615176, "loss": 0.2842, "step": 1984 }, { "epoch": 0.926487747957993, "grad_norm": 1.0859375, "learning_rate": 0.000195830067986976, "loss": 0.3825, "step": 1985 }, { "epoch": 0.9269544924154026, "grad_norm": 1.1328125, "learning_rate": 0.00019582587383447258, "loss": 0.3745, "step": 1986 }, { "epoch": 0.9274212368728121, "grad_norm": 1.0234375, "learning_rate": 0.0001958216776187318, "loss": 0.3401, "step": 1987 }, { "epoch": 0.9278879813302217, "grad_norm": 0.92578125, "learning_rate": 0.00019581747933984395, "loss": 0.3232, "step": 1988 }, { "epoch": 0.9283547257876312, "grad_norm": 0.8984375, "learning_rate": 0.00019581327899789947, "loss": 0.3038, "step": 1989 }, { "epoch": 0.9288214702450408, "grad_norm": 0.94921875, "learning_rate": 0.00019580907659298878, "loss": 0.3631, "step": 1990 }, { "epoch": 0.9292882147024504, "grad_norm": 0.90625, "learning_rate": 0.00019580487212520238, "loss": 0.3289, "step": 1991 }, { "epoch": 0.92975495915986, "grad_norm": 0.93359375, "learning_rate": 0.00019580066559463078, "loss": 0.3263, "step": 1992 }, { "epoch": 0.9302217036172695, "grad_norm": 1.0078125, "learning_rate": 0.00019579645700136452, "loss": 0.3576, "step": 1993 }, { "epoch": 0.9306884480746791, "grad_norm": 0.921875, "learning_rate": 0.00019579224634549432, "loss": 0.3478, "step": 1994 }, { "epoch": 0.9311551925320887, "grad_norm": 1.0546875, "learning_rate": 0.00019578803362711074, "loss": 0.4191, "step": 1995 }, { "epoch": 0.9316219369894982, "grad_norm": 0.96484375, "learning_rate": 0.0001957838188463045, "loss": 0.3105, "step": 1996 }, { "epoch": 0.9320886814469078, "grad_norm": 1.109375, "learning_rate": 0.0001957796020031664, "loss": 0.401, "step": 1997 }, { "epoch": 0.9325554259043174, "grad_norm": 0.94921875, "learning_rate": 0.0001957753830977872, "loss": 0.3612, "step": 1998 }, { "epoch": 0.933022170361727, "grad_norm": 0.75390625, "learning_rate": 0.0001957711621302577, "loss": 0.2855, "step": 1999 }, { "epoch": 0.9334889148191365, "grad_norm": 0.8671875, "learning_rate": 0.00019576693910066883, "loss": 0.3532, "step": 2000 }, { "epoch": 0.9339556592765461, "grad_norm": 0.96484375, "learning_rate": 0.00019576271400911153, "loss": 0.3911, "step": 2001 }, { "epoch": 0.9344224037339557, "grad_norm": 0.81640625, "learning_rate": 0.00019575848685567674, "loss": 0.3296, "step": 2002 }, { "epoch": 0.9348891481913653, "grad_norm": 0.828125, "learning_rate": 0.00019575425764045546, "loss": 0.3503, "step": 2003 }, { "epoch": 0.9353558926487748, "grad_norm": 1.015625, "learning_rate": 0.00019575002636353878, "loss": 0.3773, "step": 2004 }, { "epoch": 0.9358226371061844, "grad_norm": 0.93359375, "learning_rate": 0.00019574579302501782, "loss": 0.3565, "step": 2005 }, { "epoch": 0.936289381563594, "grad_norm": 1.0546875, "learning_rate": 0.00019574155762498369, "loss": 0.3327, "step": 2006 }, { "epoch": 0.9367561260210036, "grad_norm": 0.87109375, "learning_rate": 0.00019573732016352758, "loss": 0.3242, "step": 2007 }, { "epoch": 0.937222870478413, "grad_norm": 0.95703125, "learning_rate": 0.00019573308064074077, "loss": 0.3316, "step": 2008 }, { "epoch": 0.9376896149358226, "grad_norm": 1.0234375, "learning_rate": 0.0001957288390567145, "loss": 0.339, "step": 2009 }, { "epoch": 0.9381563593932322, "grad_norm": 0.87109375, "learning_rate": 0.00019572459541154013, "loss": 0.2443, "step": 2010 }, { "epoch": 0.9386231038506417, "grad_norm": 0.9765625, "learning_rate": 0.00019572034970530896, "loss": 0.3403, "step": 2011 }, { "epoch": 0.9390898483080513, "grad_norm": 1.28125, "learning_rate": 0.00019571610193811254, "loss": 0.4092, "step": 2012 }, { "epoch": 0.9395565927654609, "grad_norm": 0.96484375, "learning_rate": 0.0001957118521100422, "loss": 0.3658, "step": 2013 }, { "epoch": 0.9400233372228705, "grad_norm": 0.9609375, "learning_rate": 0.00019570760022118947, "loss": 0.3343, "step": 2014 }, { "epoch": 0.94049008168028, "grad_norm": 0.8984375, "learning_rate": 0.00019570334627164594, "loss": 0.33, "step": 2015 }, { "epoch": 0.9409568261376896, "grad_norm": 0.9609375, "learning_rate": 0.00019569909026150318, "loss": 0.3458, "step": 2016 }, { "epoch": 0.9414235705950992, "grad_norm": 0.95703125, "learning_rate": 0.00019569483219085285, "loss": 0.3078, "step": 2017 }, { "epoch": 0.9418903150525088, "grad_norm": 0.8984375, "learning_rate": 0.00019569057205978658, "loss": 0.2643, "step": 2018 }, { "epoch": 0.9423570595099183, "grad_norm": 0.89453125, "learning_rate": 0.00019568630986839614, "loss": 0.2847, "step": 2019 }, { "epoch": 0.9428238039673279, "grad_norm": 0.94140625, "learning_rate": 0.00019568204561677325, "loss": 0.2386, "step": 2020 }, { "epoch": 0.9432905484247375, "grad_norm": 1.2421875, "learning_rate": 0.0001956777793050098, "loss": 0.3289, "step": 2021 }, { "epoch": 0.943757292882147, "grad_norm": 0.9921875, "learning_rate": 0.00019567351093319757, "loss": 0.3323, "step": 2022 }, { "epoch": 0.9442240373395566, "grad_norm": 1.046875, "learning_rate": 0.00019566924050142852, "loss": 0.3465, "step": 2023 }, { "epoch": 0.9446907817969662, "grad_norm": 0.87109375, "learning_rate": 0.00019566496800979457, "loss": 0.2917, "step": 2024 }, { "epoch": 0.9451575262543758, "grad_norm": 0.984375, "learning_rate": 0.0001956606934583877, "loss": 0.3368, "step": 2025 }, { "epoch": 0.9456242707117853, "grad_norm": 0.875, "learning_rate": 0.00019565641684729998, "loss": 0.271, "step": 2026 }, { "epoch": 0.9460910151691949, "grad_norm": 0.90625, "learning_rate": 0.00019565213817662348, "loss": 0.2888, "step": 2027 }, { "epoch": 0.9465577596266045, "grad_norm": 0.8359375, "learning_rate": 0.0001956478574464503, "loss": 0.3175, "step": 2028 }, { "epoch": 0.947024504084014, "grad_norm": 1.0234375, "learning_rate": 0.00019564357465687263, "loss": 0.3547, "step": 2029 }, { "epoch": 0.9474912485414235, "grad_norm": 0.98046875, "learning_rate": 0.00019563928980798268, "loss": 0.2719, "step": 2030 }, { "epoch": 0.9479579929988331, "grad_norm": 0.91796875, "learning_rate": 0.00019563500289987272, "loss": 0.3066, "step": 2031 }, { "epoch": 0.9484247374562427, "grad_norm": 0.83984375, "learning_rate": 0.000195630713932635, "loss": 0.2879, "step": 2032 }, { "epoch": 0.9488914819136522, "grad_norm": 1.09375, "learning_rate": 0.00019562642290636193, "loss": 0.4229, "step": 2033 }, { "epoch": 0.9493582263710618, "grad_norm": 1.1015625, "learning_rate": 0.0001956221298211459, "loss": 0.3117, "step": 2034 }, { "epoch": 0.9498249708284714, "grad_norm": 0.99609375, "learning_rate": 0.0001956178346770793, "loss": 0.3059, "step": 2035 }, { "epoch": 0.950291715285881, "grad_norm": 1.1484375, "learning_rate": 0.0001956135374742546, "loss": 0.4111, "step": 2036 }, { "epoch": 0.9507584597432905, "grad_norm": 1.0078125, "learning_rate": 0.0001956092382127644, "loss": 0.3413, "step": 2037 }, { "epoch": 0.9512252042007001, "grad_norm": 1.0390625, "learning_rate": 0.0001956049368927012, "loss": 0.389, "step": 2038 }, { "epoch": 0.9516919486581097, "grad_norm": 1.296875, "learning_rate": 0.00019560063351415762, "loss": 0.4335, "step": 2039 }, { "epoch": 0.9521586931155193, "grad_norm": 0.98828125, "learning_rate": 0.00019559632807722633, "loss": 0.3232, "step": 2040 }, { "epoch": 0.9526254375729288, "grad_norm": 0.890625, "learning_rate": 0.00019559202058200005, "loss": 0.3001, "step": 2041 }, { "epoch": 0.9530921820303384, "grad_norm": 1.0859375, "learning_rate": 0.00019558771102857152, "loss": 0.3469, "step": 2042 }, { "epoch": 0.953558926487748, "grad_norm": 0.98828125, "learning_rate": 0.0001955833994170335, "loss": 0.3511, "step": 2043 }, { "epoch": 0.9540256709451576, "grad_norm": 0.984375, "learning_rate": 0.00019557908574747884, "loss": 0.3203, "step": 2044 }, { "epoch": 0.9544924154025671, "grad_norm": 1.1484375, "learning_rate": 0.0001955747700200004, "loss": 0.4013, "step": 2045 }, { "epoch": 0.9549591598599767, "grad_norm": 1.0234375, "learning_rate": 0.00019557045223469113, "loss": 0.4068, "step": 2046 }, { "epoch": 0.9554259043173863, "grad_norm": 1.0390625, "learning_rate": 0.000195566132391644, "loss": 0.3385, "step": 2047 }, { "epoch": 0.9558926487747959, "grad_norm": 0.9375, "learning_rate": 0.00019556181049095196, "loss": 0.3085, "step": 2048 }, { "epoch": 0.9563593932322053, "grad_norm": 0.83203125, "learning_rate": 0.00019555748653270815, "loss": 0.3056, "step": 2049 }, { "epoch": 0.9568261376896149, "grad_norm": 0.9921875, "learning_rate": 0.00019555316051700564, "loss": 0.2945, "step": 2050 }, { "epoch": 0.9572928821470245, "grad_norm": 1.0, "learning_rate": 0.00019554883244393754, "loss": 0.2822, "step": 2051 }, { "epoch": 0.957759626604434, "grad_norm": 0.91796875, "learning_rate": 0.0001955445023135971, "loss": 0.32, "step": 2052 }, { "epoch": 0.9582263710618436, "grad_norm": 1.0390625, "learning_rate": 0.0001955401701260775, "loss": 0.2972, "step": 2053 }, { "epoch": 0.9586931155192532, "grad_norm": 0.93359375, "learning_rate": 0.00019553583588147202, "loss": 0.3058, "step": 2054 }, { "epoch": 0.9591598599766628, "grad_norm": 1.015625, "learning_rate": 0.000195531499579874, "loss": 0.3197, "step": 2055 }, { "epoch": 0.9596266044340723, "grad_norm": 0.98046875, "learning_rate": 0.0001955271612213768, "loss": 0.2878, "step": 2056 }, { "epoch": 0.9600933488914819, "grad_norm": 0.97265625, "learning_rate": 0.00019552282080607385, "loss": 0.2634, "step": 2057 }, { "epoch": 0.9605600933488915, "grad_norm": 0.984375, "learning_rate": 0.00019551847833405855, "loss": 0.2922, "step": 2058 }, { "epoch": 0.961026837806301, "grad_norm": 0.90234375, "learning_rate": 0.00019551413380542446, "loss": 0.272, "step": 2059 }, { "epoch": 0.9614935822637106, "grad_norm": 1.0234375, "learning_rate": 0.0001955097872202651, "loss": 0.3444, "step": 2060 }, { "epoch": 0.9619603267211202, "grad_norm": 1.1953125, "learning_rate": 0.000195505438578674, "loss": 0.4296, "step": 2061 }, { "epoch": 0.9624270711785298, "grad_norm": 0.98046875, "learning_rate": 0.0001955010878807449, "loss": 0.376, "step": 2062 }, { "epoch": 0.9628938156359393, "grad_norm": 1.0, "learning_rate": 0.00019549673512657138, "loss": 0.3584, "step": 2063 }, { "epoch": 0.9633605600933489, "grad_norm": 0.9375, "learning_rate": 0.0001954923803162472, "loss": 0.3325, "step": 2064 }, { "epoch": 0.9638273045507585, "grad_norm": 0.86328125, "learning_rate": 0.00019548802344986616, "loss": 0.3009, "step": 2065 }, { "epoch": 0.9642940490081681, "grad_norm": 0.953125, "learning_rate": 0.00019548366452752197, "loss": 0.3958, "step": 2066 }, { "epoch": 0.9647607934655776, "grad_norm": 0.83984375, "learning_rate": 0.0001954793035493086, "loss": 0.3345, "step": 2067 }, { "epoch": 0.9652275379229872, "grad_norm": 1.0390625, "learning_rate": 0.00019547494051531987, "loss": 0.3474, "step": 2068 }, { "epoch": 0.9656942823803968, "grad_norm": 0.984375, "learning_rate": 0.00019547057542564973, "loss": 0.3191, "step": 2069 }, { "epoch": 0.9661610268378062, "grad_norm": 0.85546875, "learning_rate": 0.00019546620828039216, "loss": 0.3123, "step": 2070 }, { "epoch": 0.9666277712952158, "grad_norm": 0.90625, "learning_rate": 0.0001954618390796412, "loss": 0.299, "step": 2071 }, { "epoch": 0.9670945157526254, "grad_norm": 1.0546875, "learning_rate": 0.00019545746782349097, "loss": 0.4014, "step": 2072 }, { "epoch": 0.967561260210035, "grad_norm": 1.1328125, "learning_rate": 0.00019545309451203554, "loss": 0.3896, "step": 2073 }, { "epoch": 0.9680280046674445, "grad_norm": 0.92578125, "learning_rate": 0.00019544871914536907, "loss": 0.3294, "step": 2074 }, { "epoch": 0.9684947491248541, "grad_norm": 1.015625, "learning_rate": 0.00019544434172358576, "loss": 0.3798, "step": 2075 }, { "epoch": 0.9689614935822637, "grad_norm": 0.875, "learning_rate": 0.00019543996224677987, "loss": 0.2892, "step": 2076 }, { "epoch": 0.9694282380396733, "grad_norm": 1.0859375, "learning_rate": 0.00019543558071504573, "loss": 0.3419, "step": 2077 }, { "epoch": 0.9698949824970828, "grad_norm": 1.03125, "learning_rate": 0.00019543119712847764, "loss": 0.3206, "step": 2078 }, { "epoch": 0.9703617269544924, "grad_norm": 0.96484375, "learning_rate": 0.00019542681148716998, "loss": 0.3337, "step": 2079 }, { "epoch": 0.970828471411902, "grad_norm": 0.96875, "learning_rate": 0.00019542242379121718, "loss": 0.3223, "step": 2080 }, { "epoch": 0.9712952158693116, "grad_norm": 0.8671875, "learning_rate": 0.00019541803404071376, "loss": 0.2642, "step": 2081 }, { "epoch": 0.9717619603267211, "grad_norm": 1.03125, "learning_rate": 0.00019541364223575416, "loss": 0.3075, "step": 2082 }, { "epoch": 0.9722287047841307, "grad_norm": 1.0859375, "learning_rate": 0.00019540924837643299, "loss": 0.2677, "step": 2083 }, { "epoch": 0.9726954492415403, "grad_norm": 1.0390625, "learning_rate": 0.00019540485246284485, "loss": 0.3498, "step": 2084 }, { "epoch": 0.9731621936989499, "grad_norm": 1.0546875, "learning_rate": 0.0001954004544950844, "loss": 0.3377, "step": 2085 }, { "epoch": 0.9736289381563594, "grad_norm": 1.234375, "learning_rate": 0.0001953960544732463, "loss": 0.343, "step": 2086 }, { "epoch": 0.974095682613769, "grad_norm": 1.0546875, "learning_rate": 0.0001953916523974253, "loss": 0.2828, "step": 2087 }, { "epoch": 0.9745624270711786, "grad_norm": 1.078125, "learning_rate": 0.0001953872482677162, "loss": 0.305, "step": 2088 }, { "epoch": 0.9750291715285881, "grad_norm": 1.1015625, "learning_rate": 0.00019538284208421377, "loss": 0.2998, "step": 2089 }, { "epoch": 0.9754959159859977, "grad_norm": 0.953125, "learning_rate": 0.000195378433847013, "loss": 0.3505, "step": 2090 }, { "epoch": 0.9759626604434072, "grad_norm": 0.97265625, "learning_rate": 0.00019537402355620866, "loss": 0.333, "step": 2091 }, { "epoch": 0.9764294049008168, "grad_norm": 1.046875, "learning_rate": 0.00019536961121189577, "loss": 0.3503, "step": 2092 }, { "epoch": 0.9768961493582263, "grad_norm": 1.1796875, "learning_rate": 0.00019536519681416937, "loss": 0.3627, "step": 2093 }, { "epoch": 0.9773628938156359, "grad_norm": 0.92578125, "learning_rate": 0.00019536078036312446, "loss": 0.2954, "step": 2094 }, { "epoch": 0.9778296382730455, "grad_norm": 0.85546875, "learning_rate": 0.00019535636185885613, "loss": 0.2887, "step": 2095 }, { "epoch": 0.978296382730455, "grad_norm": 0.8203125, "learning_rate": 0.00019535194130145954, "loss": 0.2876, "step": 2096 }, { "epoch": 0.9787631271878646, "grad_norm": 1.03125, "learning_rate": 0.00019534751869102986, "loss": 0.3709, "step": 2097 }, { "epoch": 0.9792298716452742, "grad_norm": 0.8828125, "learning_rate": 0.00019534309402766234, "loss": 0.2436, "step": 2098 }, { "epoch": 0.9796966161026838, "grad_norm": 0.80859375, "learning_rate": 0.00019533866731145217, "loss": 0.2366, "step": 2099 }, { "epoch": 0.9801633605600933, "grad_norm": 0.9140625, "learning_rate": 0.00019533423854249473, "loss": 0.2663, "step": 2100 }, { "epoch": 0.9806301050175029, "grad_norm": 0.92578125, "learning_rate": 0.00019532980772088537, "loss": 0.3062, "step": 2101 }, { "epoch": 0.9810968494749125, "grad_norm": 1.03125, "learning_rate": 0.0001953253748467195, "loss": 0.3535, "step": 2102 }, { "epoch": 0.9815635939323221, "grad_norm": 1.1796875, "learning_rate": 0.00019532093992009252, "loss": 0.387, "step": 2103 }, { "epoch": 0.9820303383897316, "grad_norm": 1.1015625, "learning_rate": 0.00019531650294109994, "loss": 0.3244, "step": 2104 }, { "epoch": 0.9824970828471412, "grad_norm": 0.90234375, "learning_rate": 0.00019531206390983728, "loss": 0.2721, "step": 2105 }, { "epoch": 0.9829638273045508, "grad_norm": 1.1171875, "learning_rate": 0.00019530762282640016, "loss": 0.3763, "step": 2106 }, { "epoch": 0.9834305717619604, "grad_norm": 0.98828125, "learning_rate": 0.00019530317969088416, "loss": 0.2861, "step": 2107 }, { "epoch": 0.9838973162193699, "grad_norm": 0.79296875, "learning_rate": 0.00019529873450338498, "loss": 0.2865, "step": 2108 }, { "epoch": 0.9843640606767795, "grad_norm": 0.91796875, "learning_rate": 0.0001952942872639983, "loss": 0.3348, "step": 2109 }, { "epoch": 0.9848308051341891, "grad_norm": 1.03125, "learning_rate": 0.00019528983797281987, "loss": 0.3812, "step": 2110 }, { "epoch": 0.9852975495915987, "grad_norm": 0.9296875, "learning_rate": 0.00019528538662994552, "loss": 0.2858, "step": 2111 }, { "epoch": 0.9857642940490081, "grad_norm": 0.91796875, "learning_rate": 0.00019528093323547107, "loss": 0.3019, "step": 2112 }, { "epoch": 0.9862310385064177, "grad_norm": 0.94140625, "learning_rate": 0.00019527647778949244, "loss": 0.294, "step": 2113 }, { "epoch": 0.9866977829638273, "grad_norm": 0.94140625, "learning_rate": 0.0001952720202921055, "loss": 0.3028, "step": 2114 }, { "epoch": 0.9871645274212368, "grad_norm": 0.9140625, "learning_rate": 0.00019526756074340624, "loss": 0.2922, "step": 2115 }, { "epoch": 0.9876312718786464, "grad_norm": 1.078125, "learning_rate": 0.00019526309914349072, "loss": 0.3568, "step": 2116 }, { "epoch": 0.988098016336056, "grad_norm": 0.83984375, "learning_rate": 0.00019525863549245498, "loss": 0.2498, "step": 2117 }, { "epoch": 0.9885647607934656, "grad_norm": 1.15625, "learning_rate": 0.00019525416979039513, "loss": 0.3667, "step": 2118 }, { "epoch": 0.9890315052508751, "grad_norm": 1.0, "learning_rate": 0.00019524970203740728, "loss": 0.2874, "step": 2119 }, { "epoch": 0.9894982497082847, "grad_norm": 1.09375, "learning_rate": 0.00019524523223358773, "loss": 0.3162, "step": 2120 }, { "epoch": 0.9899649941656943, "grad_norm": 1.0234375, "learning_rate": 0.0001952407603790326, "loss": 0.3, "step": 2121 }, { "epoch": 0.9904317386231039, "grad_norm": 1.078125, "learning_rate": 0.00019523628647383825, "loss": 0.3815, "step": 2122 }, { "epoch": 0.9908984830805134, "grad_norm": 1.0625, "learning_rate": 0.000195231810518101, "loss": 0.3099, "step": 2123 }, { "epoch": 0.991365227537923, "grad_norm": 1.125, "learning_rate": 0.00019522733251191716, "loss": 0.3147, "step": 2124 }, { "epoch": 0.9918319719953326, "grad_norm": 0.9609375, "learning_rate": 0.00019522285245538324, "loss": 0.2891, "step": 2125 }, { "epoch": 0.9922987164527421, "grad_norm": 0.94921875, "learning_rate": 0.00019521837034859564, "loss": 0.3103, "step": 2126 }, { "epoch": 0.9927654609101517, "grad_norm": 1.0390625, "learning_rate": 0.00019521388619165087, "loss": 0.268, "step": 2127 }, { "epoch": 0.9932322053675613, "grad_norm": 1.2109375, "learning_rate": 0.0001952093999846455, "loss": 0.4177, "step": 2128 }, { "epoch": 0.9936989498249709, "grad_norm": 0.9609375, "learning_rate": 0.0001952049117276761, "loss": 0.3214, "step": 2129 }, { "epoch": 0.9941656942823804, "grad_norm": 0.9296875, "learning_rate": 0.00019520042142083933, "loss": 0.287, "step": 2130 }, { "epoch": 0.99463243873979, "grad_norm": 0.953125, "learning_rate": 0.00019519592906423188, "loss": 0.2723, "step": 2131 }, { "epoch": 0.9950991831971996, "grad_norm": 0.91796875, "learning_rate": 0.00019519143465795042, "loss": 0.3087, "step": 2132 }, { "epoch": 0.995565927654609, "grad_norm": 1.171875, "learning_rate": 0.00019518693820209175, "loss": 0.3426, "step": 2133 }, { "epoch": 0.9960326721120186, "grad_norm": 1.0625, "learning_rate": 0.0001951824396967527, "loss": 0.3383, "step": 2134 }, { "epoch": 0.9964994165694282, "grad_norm": 1.0703125, "learning_rate": 0.00019517793914203014, "loss": 0.3288, "step": 2135 }, { "epoch": 0.9969661610268378, "grad_norm": 0.91796875, "learning_rate": 0.00019517343653802094, "loss": 0.2814, "step": 2136 }, { "epoch": 0.9974329054842473, "grad_norm": 0.9921875, "learning_rate": 0.00019516893188482203, "loss": 0.2673, "step": 2137 }, { "epoch": 0.9978996499416569, "grad_norm": 0.9296875, "learning_rate": 0.00019516442518253046, "loss": 0.2672, "step": 2138 }, { "epoch": 0.9983663943990665, "grad_norm": 1.1015625, "learning_rate": 0.0001951599164312432, "loss": 0.2843, "step": 2139 }, { "epoch": 0.9988331388564761, "grad_norm": 0.8125, "learning_rate": 0.00019515540563105739, "loss": 0.2489, "step": 2140 }, { "epoch": 0.9992998833138856, "grad_norm": 0.99609375, "learning_rate": 0.0001951508927820701, "loss": 0.3219, "step": 2141 }, { "epoch": 0.9997666277712952, "grad_norm": 0.86328125, "learning_rate": 0.0001951463778843785, "loss": 0.2532, "step": 2142 }, { "epoch": 1.0002333722287047, "grad_norm": 0.98828125, "learning_rate": 0.00019514186093807983, "loss": 0.2466, "step": 2143 }, { "epoch": 1.0007001166861142, "grad_norm": 1.0625, "learning_rate": 0.00019513734194327134, "loss": 0.2551, "step": 2144 }, { "epoch": 1.0007001166861142, "eval_loss": 1.2306029796600342, "eval_runtime": 93.8413, "eval_samples_per_second": 19.224, "eval_steps_per_second": 2.408, "step": 2144 }, { "epoch": 1.0011668611435238, "grad_norm": 1.140625, "learning_rate": 0.00019513282090005032, "loss": 0.2502, "step": 2145 }, { "epoch": 1.0016336056009334, "grad_norm": 0.9453125, "learning_rate": 0.0001951282978085141, "loss": 0.2808, "step": 2146 }, { "epoch": 1.002100350058343, "grad_norm": 1.140625, "learning_rate": 0.0001951237726687601, "loss": 0.2566, "step": 2147 }, { "epoch": 1.0025670945157525, "grad_norm": 0.8515625, "learning_rate": 0.00019511924548088573, "loss": 0.2137, "step": 2148 }, { "epoch": 1.0030338389731621, "grad_norm": 1.0859375, "learning_rate": 0.00019511471624498845, "loss": 0.2158, "step": 2149 }, { "epoch": 1.0035005834305717, "grad_norm": 0.91015625, "learning_rate": 0.0001951101849611658, "loss": 0.224, "step": 2150 }, { "epoch": 1.0039673278879813, "grad_norm": 1.140625, "learning_rate": 0.00019510565162951537, "loss": 0.2626, "step": 2151 }, { "epoch": 1.0044340723453908, "grad_norm": 1.1484375, "learning_rate": 0.00019510111625013472, "loss": 0.2251, "step": 2152 }, { "epoch": 1.0049008168028004, "grad_norm": 0.96875, "learning_rate": 0.0001950965788231215, "loss": 0.23, "step": 2153 }, { "epoch": 1.00536756126021, "grad_norm": 1.0, "learning_rate": 0.00019509203934857348, "loss": 0.2859, "step": 2154 }, { "epoch": 1.0058343057176196, "grad_norm": 0.9140625, "learning_rate": 0.0001950874978265883, "loss": 0.2624, "step": 2155 }, { "epoch": 1.0063010501750291, "grad_norm": 0.96484375, "learning_rate": 0.00019508295425726382, "loss": 0.2305, "step": 2156 }, { "epoch": 1.0067677946324387, "grad_norm": 0.87109375, "learning_rate": 0.0001950784086406978, "loss": 0.2771, "step": 2157 }, { "epoch": 1.0072345390898483, "grad_norm": 0.91796875, "learning_rate": 0.0001950738609769882, "loss": 0.2428, "step": 2158 }, { "epoch": 1.0077012835472579, "grad_norm": 1.0625, "learning_rate": 0.00019506931126623284, "loss": 0.2298, "step": 2159 }, { "epoch": 1.0081680280046674, "grad_norm": 1.0234375, "learning_rate": 0.00019506475950852977, "loss": 0.2872, "step": 2160 }, { "epoch": 1.008634772462077, "grad_norm": 0.953125, "learning_rate": 0.0001950602057039769, "loss": 0.2608, "step": 2161 }, { "epoch": 1.0091015169194866, "grad_norm": 0.890625, "learning_rate": 0.00019505564985267237, "loss": 0.2544, "step": 2162 }, { "epoch": 1.0095682613768961, "grad_norm": 1.109375, "learning_rate": 0.00019505109195471423, "loss": 0.234, "step": 2163 }, { "epoch": 1.0100350058343057, "grad_norm": 0.8359375, "learning_rate": 0.0001950465320102006, "loss": 0.182, "step": 2164 }, { "epoch": 1.0105017502917153, "grad_norm": 1.0078125, "learning_rate": 0.00019504197001922967, "loss": 0.2491, "step": 2165 }, { "epoch": 1.0109684947491249, "grad_norm": 0.8828125, "learning_rate": 0.00019503740598189972, "loss": 0.1935, "step": 2166 }, { "epoch": 1.0114352392065344, "grad_norm": 0.92578125, "learning_rate": 0.00019503283989830895, "loss": 0.2332, "step": 2167 }, { "epoch": 1.011901983663944, "grad_norm": 1.0078125, "learning_rate": 0.0001950282717685557, "loss": 0.2799, "step": 2168 }, { "epoch": 1.0123687281213536, "grad_norm": 1.0546875, "learning_rate": 0.0001950237015927383, "loss": 0.2629, "step": 2169 }, { "epoch": 1.0128354725787632, "grad_norm": 1.015625, "learning_rate": 0.00019501912937095522, "loss": 0.2301, "step": 2170 }, { "epoch": 1.0133022170361727, "grad_norm": 0.921875, "learning_rate": 0.00019501455510330482, "loss": 0.214, "step": 2171 }, { "epoch": 1.0137689614935823, "grad_norm": 0.80859375, "learning_rate": 0.00019500997878988562, "loss": 0.1599, "step": 2172 }, { "epoch": 1.0142357059509919, "grad_norm": 0.9140625, "learning_rate": 0.0001950054004307962, "loss": 0.2481, "step": 2173 }, { "epoch": 1.0147024504084015, "grad_norm": 1.0390625, "learning_rate": 0.00019500082002613507, "loss": 0.2762, "step": 2174 }, { "epoch": 1.015169194865811, "grad_norm": 0.84375, "learning_rate": 0.0001949962375760009, "loss": 0.2261, "step": 2175 }, { "epoch": 1.0156359393232206, "grad_norm": 0.9453125, "learning_rate": 0.00019499165308049232, "loss": 0.2385, "step": 2176 }, { "epoch": 1.0161026837806302, "grad_norm": 1.0078125, "learning_rate": 0.00019498706653970805, "loss": 0.2564, "step": 2177 }, { "epoch": 1.0165694282380398, "grad_norm": 1.015625, "learning_rate": 0.00019498247795374688, "loss": 0.2645, "step": 2178 }, { "epoch": 1.0170361726954493, "grad_norm": 1.0546875, "learning_rate": 0.00019497788732270754, "loss": 0.2569, "step": 2179 }, { "epoch": 1.017502917152859, "grad_norm": 0.99609375, "learning_rate": 0.0001949732946466889, "loss": 0.2739, "step": 2180 }, { "epoch": 1.0179696616102685, "grad_norm": 1.0234375, "learning_rate": 0.0001949686999257899, "loss": 0.2604, "step": 2181 }, { "epoch": 1.018436406067678, "grad_norm": 0.88671875, "learning_rate": 0.00019496410316010936, "loss": 0.2632, "step": 2182 }, { "epoch": 1.0189031505250876, "grad_norm": 0.8046875, "learning_rate": 0.00019495950434974635, "loss": 0.2007, "step": 2183 }, { "epoch": 1.0193698949824972, "grad_norm": 1.28125, "learning_rate": 0.00019495490349479987, "loss": 0.2974, "step": 2184 }, { "epoch": 1.0198366394399065, "grad_norm": 0.9921875, "learning_rate": 0.00019495030059536893, "loss": 0.2166, "step": 2185 }, { "epoch": 1.0203033838973161, "grad_norm": 1.1328125, "learning_rate": 0.00019494569565155266, "loss": 0.3114, "step": 2186 }, { "epoch": 1.0207701283547257, "grad_norm": 0.75390625, "learning_rate": 0.00019494108866345024, "loss": 0.1917, "step": 2187 }, { "epoch": 1.0212368728121353, "grad_norm": 0.87109375, "learning_rate": 0.00019493647963116084, "loss": 0.2037, "step": 2188 }, { "epoch": 1.0217036172695448, "grad_norm": 0.9765625, "learning_rate": 0.00019493186855478373, "loss": 0.2702, "step": 2189 }, { "epoch": 1.0221703617269544, "grad_norm": 1.1171875, "learning_rate": 0.0001949272554344181, "loss": 0.3081, "step": 2190 }, { "epoch": 1.022637106184364, "grad_norm": 0.99609375, "learning_rate": 0.00019492264027016337, "loss": 0.2116, "step": 2191 }, { "epoch": 1.0231038506417736, "grad_norm": 0.8203125, "learning_rate": 0.00019491802306211887, "loss": 0.2118, "step": 2192 }, { "epoch": 1.0235705950991831, "grad_norm": 1.09375, "learning_rate": 0.000194913403810384, "loss": 0.2531, "step": 2193 }, { "epoch": 1.0240373395565927, "grad_norm": 0.953125, "learning_rate": 0.0001949087825150583, "loss": 0.3199, "step": 2194 }, { "epoch": 1.0245040840140023, "grad_norm": 0.9921875, "learning_rate": 0.00019490415917624115, "loss": 0.1776, "step": 2195 }, { "epoch": 1.0249708284714119, "grad_norm": 0.9921875, "learning_rate": 0.00019489953379403217, "loss": 0.2414, "step": 2196 }, { "epoch": 1.0254375729288214, "grad_norm": 1.1875, "learning_rate": 0.00019489490636853094, "loss": 0.3021, "step": 2197 }, { "epoch": 1.025904317386231, "grad_norm": 0.96484375, "learning_rate": 0.0001948902768998371, "loss": 0.2418, "step": 2198 }, { "epoch": 1.0263710618436406, "grad_norm": 0.92578125, "learning_rate": 0.0001948856453880503, "loss": 0.2139, "step": 2199 }, { "epoch": 1.0268378063010501, "grad_norm": 0.99609375, "learning_rate": 0.00019488101183327028, "loss": 0.2284, "step": 2200 }, { "epoch": 1.0273045507584597, "grad_norm": 1.1640625, "learning_rate": 0.0001948763762355968, "loss": 0.2503, "step": 2201 }, { "epoch": 1.0277712952158693, "grad_norm": 1.015625, "learning_rate": 0.0001948717385951297, "loss": 0.2955, "step": 2202 }, { "epoch": 1.0282380396732789, "grad_norm": 0.9453125, "learning_rate": 0.00019486709891196875, "loss": 0.2613, "step": 2203 }, { "epoch": 1.0287047841306884, "grad_norm": 0.85546875, "learning_rate": 0.00019486245718621395, "loss": 0.2269, "step": 2204 }, { "epoch": 1.029171528588098, "grad_norm": 0.90625, "learning_rate": 0.0001948578134179652, "loss": 0.256, "step": 2205 }, { "epoch": 1.0296382730455076, "grad_norm": 0.796875, "learning_rate": 0.00019485316760732243, "loss": 0.226, "step": 2206 }, { "epoch": 1.0301050175029172, "grad_norm": 0.69921875, "learning_rate": 0.00019484851975438576, "loss": 0.2082, "step": 2207 }, { "epoch": 1.0305717619603267, "grad_norm": 1.0546875, "learning_rate": 0.00019484386985925523, "loss": 0.2892, "step": 2208 }, { "epoch": 1.0310385064177363, "grad_norm": 1.0546875, "learning_rate": 0.00019483921792203093, "loss": 0.2331, "step": 2209 }, { "epoch": 1.0315052508751459, "grad_norm": 1.0625, "learning_rate": 0.00019483456394281303, "loss": 0.2458, "step": 2210 }, { "epoch": 1.0319719953325555, "grad_norm": 1.078125, "learning_rate": 0.00019482990792170178, "loss": 0.2395, "step": 2211 }, { "epoch": 1.032438739789965, "grad_norm": 1.1328125, "learning_rate": 0.00019482524985879737, "loss": 0.2352, "step": 2212 }, { "epoch": 1.0329054842473746, "grad_norm": 1.0078125, "learning_rate": 0.00019482058975420015, "loss": 0.2235, "step": 2213 }, { "epoch": 1.0333722287047842, "grad_norm": 1.09375, "learning_rate": 0.0001948159276080104, "loss": 0.2207, "step": 2214 }, { "epoch": 1.0338389731621938, "grad_norm": 1.2421875, "learning_rate": 0.00019481126342032855, "loss": 0.2723, "step": 2215 }, { "epoch": 1.0343057176196033, "grad_norm": 1.1328125, "learning_rate": 0.00019480659719125498, "loss": 0.302, "step": 2216 }, { "epoch": 1.034772462077013, "grad_norm": 0.9375, "learning_rate": 0.0001948019289208902, "loss": 0.218, "step": 2217 }, { "epoch": 1.0352392065344225, "grad_norm": 0.875, "learning_rate": 0.00019479725860933468, "loss": 0.1877, "step": 2218 }, { "epoch": 1.035705950991832, "grad_norm": 1.046875, "learning_rate": 0.00019479258625668906, "loss": 0.2687, "step": 2219 }, { "epoch": 1.0361726954492416, "grad_norm": 0.9375, "learning_rate": 0.00019478791186305382, "loss": 0.2537, "step": 2220 }, { "epoch": 1.0366394399066512, "grad_norm": 0.91015625, "learning_rate": 0.00019478323542852972, "loss": 0.2508, "step": 2221 }, { "epoch": 1.0371061843640608, "grad_norm": 0.88671875, "learning_rate": 0.00019477855695321735, "loss": 0.2425, "step": 2222 }, { "epoch": 1.0375729288214703, "grad_norm": 0.8359375, "learning_rate": 0.0001947738764372175, "loss": 0.2167, "step": 2223 }, { "epoch": 1.03803967327888, "grad_norm": 1.203125, "learning_rate": 0.00019476919388063097, "loss": 0.2439, "step": 2224 }, { "epoch": 1.0385064177362895, "grad_norm": 0.99609375, "learning_rate": 0.00019476450928355852, "loss": 0.2648, "step": 2225 }, { "epoch": 1.0389731621936988, "grad_norm": 0.8671875, "learning_rate": 0.00019475982264610106, "loss": 0.2187, "step": 2226 }, { "epoch": 1.0394399066511084, "grad_norm": 1.0078125, "learning_rate": 0.00019475513396835948, "loss": 0.265, "step": 2227 }, { "epoch": 1.039906651108518, "grad_norm": 0.9609375, "learning_rate": 0.0001947504432504347, "loss": 0.25, "step": 2228 }, { "epoch": 1.0403733955659276, "grad_norm": 1.03125, "learning_rate": 0.00019474575049242777, "loss": 0.251, "step": 2229 }, { "epoch": 1.0408401400233371, "grad_norm": 0.9921875, "learning_rate": 0.00019474105569443974, "loss": 0.25, "step": 2230 }, { "epoch": 1.0413068844807467, "grad_norm": 0.8125, "learning_rate": 0.00019473635885657162, "loss": 0.1806, "step": 2231 }, { "epoch": 1.0417736289381563, "grad_norm": 0.87890625, "learning_rate": 0.0001947316599789246, "loss": 0.2547, "step": 2232 }, { "epoch": 1.0422403733955659, "grad_norm": 0.86328125, "learning_rate": 0.00019472695906159982, "loss": 0.1936, "step": 2233 }, { "epoch": 1.0427071178529754, "grad_norm": 0.9375, "learning_rate": 0.00019472225610469855, "loss": 0.2443, "step": 2234 }, { "epoch": 1.043173862310385, "grad_norm": 0.91796875, "learning_rate": 0.00019471755110832197, "loss": 0.2192, "step": 2235 }, { "epoch": 1.0436406067677946, "grad_norm": 1.25, "learning_rate": 0.00019471284407257146, "loss": 0.2388, "step": 2236 }, { "epoch": 1.0441073512252041, "grad_norm": 0.90625, "learning_rate": 0.00019470813499754828, "loss": 0.2427, "step": 2237 }, { "epoch": 1.0445740956826137, "grad_norm": 0.9921875, "learning_rate": 0.0001947034238833539, "loss": 0.2716, "step": 2238 }, { "epoch": 1.0450408401400233, "grad_norm": 0.82421875, "learning_rate": 0.00019469871073008973, "loss": 0.2475, "step": 2239 }, { "epoch": 1.0455075845974329, "grad_norm": 0.9765625, "learning_rate": 0.00019469399553785724, "loss": 0.2376, "step": 2240 }, { "epoch": 1.0459743290548424, "grad_norm": 0.9296875, "learning_rate": 0.000194689278306758, "loss": 0.2361, "step": 2241 }, { "epoch": 1.046441073512252, "grad_norm": 0.8359375, "learning_rate": 0.0001946845590368935, "loss": 0.2503, "step": 2242 }, { "epoch": 1.0469078179696616, "grad_norm": 0.80078125, "learning_rate": 0.00019467983772836537, "loss": 0.2045, "step": 2243 }, { "epoch": 1.0473745624270712, "grad_norm": 1.1953125, "learning_rate": 0.00019467511438127531, "loss": 0.2466, "step": 2244 }, { "epoch": 1.0478413068844807, "grad_norm": 0.96875, "learning_rate": 0.000194670388995725, "loss": 0.2657, "step": 2245 }, { "epoch": 1.0483080513418903, "grad_norm": 0.83984375, "learning_rate": 0.0001946656615718162, "loss": 0.2298, "step": 2246 }, { "epoch": 1.0487747957992999, "grad_norm": 0.8359375, "learning_rate": 0.00019466093210965062, "loss": 0.2114, "step": 2247 }, { "epoch": 1.0492415402567095, "grad_norm": 1.0390625, "learning_rate": 0.00019465620060933017, "loss": 0.2674, "step": 2248 }, { "epoch": 1.049708284714119, "grad_norm": 1.03125, "learning_rate": 0.0001946514670709567, "loss": 0.2503, "step": 2249 }, { "epoch": 1.0501750291715286, "grad_norm": 0.94921875, "learning_rate": 0.00019464673149463213, "loss": 0.218, "step": 2250 }, { "epoch": 1.0506417736289382, "grad_norm": 1.1875, "learning_rate": 0.0001946419938804584, "loss": 0.2836, "step": 2251 }, { "epoch": 1.0511085180863478, "grad_norm": 0.80078125, "learning_rate": 0.00019463725422853753, "loss": 0.1677, "step": 2252 }, { "epoch": 1.0515752625437573, "grad_norm": 1.0, "learning_rate": 0.00019463251253897158, "loss": 0.2379, "step": 2253 }, { "epoch": 1.052042007001167, "grad_norm": 1.0, "learning_rate": 0.00019462776881186267, "loss": 0.1799, "step": 2254 }, { "epoch": 1.0525087514585765, "grad_norm": 1.1796875, "learning_rate": 0.0001946230230473129, "loss": 0.2155, "step": 2255 }, { "epoch": 1.052975495915986, "grad_norm": 1.0859375, "learning_rate": 0.0001946182752454244, "loss": 0.2522, "step": 2256 }, { "epoch": 1.0534422403733956, "grad_norm": 0.94921875, "learning_rate": 0.00019461352540629952, "loss": 0.2253, "step": 2257 }, { "epoch": 1.0539089848308052, "grad_norm": 0.97265625, "learning_rate": 0.00019460877353004043, "loss": 0.2448, "step": 2258 }, { "epoch": 1.0543757292882148, "grad_norm": 1.109375, "learning_rate": 0.0001946040196167495, "loss": 0.2603, "step": 2259 }, { "epoch": 1.0548424737456243, "grad_norm": 1.296875, "learning_rate": 0.00019459926366652904, "loss": 0.2377, "step": 2260 }, { "epoch": 1.055309218203034, "grad_norm": 0.93359375, "learning_rate": 0.00019459450567948147, "loss": 0.2353, "step": 2261 }, { "epoch": 1.0557759626604435, "grad_norm": 0.9453125, "learning_rate": 0.00019458974565570925, "loss": 0.2976, "step": 2262 }, { "epoch": 1.056242707117853, "grad_norm": 0.9921875, "learning_rate": 0.00019458498359531485, "loss": 0.2382, "step": 2263 }, { "epoch": 1.0567094515752626, "grad_norm": 1.03125, "learning_rate": 0.00019458021949840082, "loss": 0.2588, "step": 2264 }, { "epoch": 1.0571761960326722, "grad_norm": 0.97265625, "learning_rate": 0.00019457545336506972, "loss": 0.182, "step": 2265 }, { "epoch": 1.0576429404900818, "grad_norm": 0.8828125, "learning_rate": 0.0001945706851954242, "loss": 0.2257, "step": 2266 }, { "epoch": 1.0581096849474911, "grad_norm": 0.71484375, "learning_rate": 0.00019456591498956684, "loss": 0.1687, "step": 2267 }, { "epoch": 1.0585764294049007, "grad_norm": 0.84765625, "learning_rate": 0.00019456114274760044, "loss": 0.2213, "step": 2268 }, { "epoch": 1.0590431738623103, "grad_norm": 0.9296875, "learning_rate": 0.00019455636846962772, "loss": 0.2771, "step": 2269 }, { "epoch": 1.0595099183197199, "grad_norm": 0.98046875, "learning_rate": 0.00019455159215575146, "loss": 0.2325, "step": 2270 }, { "epoch": 1.0599766627771294, "grad_norm": 0.99609375, "learning_rate": 0.00019454681380607456, "loss": 0.2455, "step": 2271 }, { "epoch": 1.060443407234539, "grad_norm": 0.9765625, "learning_rate": 0.00019454203342069982, "loss": 0.1583, "step": 2272 }, { "epoch": 1.0609101516919486, "grad_norm": 0.9609375, "learning_rate": 0.00019453725099973022, "loss": 0.2193, "step": 2273 }, { "epoch": 1.0613768961493582, "grad_norm": 1.1796875, "learning_rate": 0.0001945324665432687, "loss": 0.2357, "step": 2274 }, { "epoch": 1.0618436406067677, "grad_norm": 1.046875, "learning_rate": 0.0001945276800514183, "loss": 0.2292, "step": 2275 }, { "epoch": 1.0623103850641773, "grad_norm": 1.03125, "learning_rate": 0.00019452289152428206, "loss": 0.2365, "step": 2276 }, { "epoch": 1.0627771295215869, "grad_norm": 0.8984375, "learning_rate": 0.0001945181009619631, "loss": 0.2065, "step": 2277 }, { "epoch": 1.0632438739789964, "grad_norm": 0.953125, "learning_rate": 0.00019451330836456457, "loss": 0.2211, "step": 2278 }, { "epoch": 1.063710618436406, "grad_norm": 0.80859375, "learning_rate": 0.0001945085137321896, "loss": 0.1949, "step": 2279 }, { "epoch": 1.0641773628938156, "grad_norm": 1.0078125, "learning_rate": 0.00019450371706494153, "loss": 0.2269, "step": 2280 }, { "epoch": 1.0646441073512252, "grad_norm": 0.859375, "learning_rate": 0.00019449891836292353, "loss": 0.2181, "step": 2281 }, { "epoch": 1.0651108518086347, "grad_norm": 0.91015625, "learning_rate": 0.000194494117626239, "loss": 0.1948, "step": 2282 }, { "epoch": 1.0655775962660443, "grad_norm": 1.203125, "learning_rate": 0.00019448931485499126, "loss": 0.2224, "step": 2283 }, { "epoch": 1.0660443407234539, "grad_norm": 1.1796875, "learning_rate": 0.0001944845100492837, "loss": 0.2516, "step": 2284 }, { "epoch": 1.0665110851808635, "grad_norm": 1.0078125, "learning_rate": 0.00019447970320921986, "loss": 0.1979, "step": 2285 }, { "epoch": 1.066977829638273, "grad_norm": 0.984375, "learning_rate": 0.00019447489433490315, "loss": 0.1938, "step": 2286 }, { "epoch": 1.0674445740956826, "grad_norm": 1.2890625, "learning_rate": 0.00019447008342643717, "loss": 0.2753, "step": 2287 }, { "epoch": 1.0679113185530922, "grad_norm": 0.81640625, "learning_rate": 0.00019446527048392546, "loss": 0.2355, "step": 2288 }, { "epoch": 1.0683780630105018, "grad_norm": 0.91015625, "learning_rate": 0.0001944604555074717, "loss": 0.1796, "step": 2289 }, { "epoch": 1.0688448074679113, "grad_norm": 0.9453125, "learning_rate": 0.00019445563849717948, "loss": 0.1926, "step": 2290 }, { "epoch": 1.069311551925321, "grad_norm": 1.0, "learning_rate": 0.0001944508194531526, "loss": 0.24, "step": 2291 }, { "epoch": 1.0697782963827305, "grad_norm": 1.1953125, "learning_rate": 0.00019444599837549476, "loss": 0.2633, "step": 2292 }, { "epoch": 1.07024504084014, "grad_norm": 0.8828125, "learning_rate": 0.00019444117526430977, "loss": 0.2094, "step": 2293 }, { "epoch": 1.0707117852975496, "grad_norm": 1.046875, "learning_rate": 0.00019443635011970152, "loss": 0.2457, "step": 2294 }, { "epoch": 1.0711785297549592, "grad_norm": 0.91015625, "learning_rate": 0.00019443152294177385, "loss": 0.1909, "step": 2295 }, { "epoch": 1.0716452742123688, "grad_norm": 0.8828125, "learning_rate": 0.0001944266937306307, "loss": 0.2116, "step": 2296 }, { "epoch": 1.0721120186697783, "grad_norm": 0.93359375, "learning_rate": 0.00019442186248637612, "loss": 0.1813, "step": 2297 }, { "epoch": 1.072578763127188, "grad_norm": 1.1875, "learning_rate": 0.00019441702920911406, "loss": 0.2442, "step": 2298 }, { "epoch": 1.0730455075845975, "grad_norm": 1.1171875, "learning_rate": 0.00019441219389894858, "loss": 0.2369, "step": 2299 }, { "epoch": 1.073512252042007, "grad_norm": 1.046875, "learning_rate": 0.00019440735655598386, "loss": 0.2274, "step": 2300 }, { "epoch": 1.0739789964994166, "grad_norm": 1.3515625, "learning_rate": 0.00019440251718032397, "loss": 0.326, "step": 2301 }, { "epoch": 1.0744457409568262, "grad_norm": 0.90625, "learning_rate": 0.00019439767577207314, "loss": 0.1723, "step": 2302 }, { "epoch": 1.0749124854142358, "grad_norm": 0.8984375, "learning_rate": 0.00019439283233133563, "loss": 0.2544, "step": 2303 }, { "epoch": 1.0753792298716454, "grad_norm": 1.0390625, "learning_rate": 0.0001943879868582157, "loss": 0.2578, "step": 2304 }, { "epoch": 1.075845974329055, "grad_norm": 0.90234375, "learning_rate": 0.00019438313935281768, "loss": 0.2702, "step": 2305 }, { "epoch": 1.0763127187864645, "grad_norm": 0.89453125, "learning_rate": 0.00019437828981524598, "loss": 0.2418, "step": 2306 }, { "epoch": 1.076779463243874, "grad_norm": 0.94921875, "learning_rate": 0.00019437343824560497, "loss": 0.2026, "step": 2307 }, { "epoch": 1.0772462077012834, "grad_norm": 1.1328125, "learning_rate": 0.0001943685846439991, "loss": 0.3111, "step": 2308 }, { "epoch": 1.0777129521586932, "grad_norm": 1.0703125, "learning_rate": 0.00019436372901053295, "loss": 0.2151, "step": 2309 }, { "epoch": 1.0781796966161026, "grad_norm": 0.78515625, "learning_rate": 0.00019435887134531098, "loss": 0.1683, "step": 2310 }, { "epoch": 1.0786464410735122, "grad_norm": 0.93359375, "learning_rate": 0.00019435401164843782, "loss": 0.2121, "step": 2311 }, { "epoch": 1.0791131855309217, "grad_norm": 0.828125, "learning_rate": 0.00019434914992001812, "loss": 0.2417, "step": 2312 }, { "epoch": 1.0795799299883313, "grad_norm": 1.0390625, "learning_rate": 0.00019434428616015652, "loss": 0.1975, "step": 2313 }, { "epoch": 1.0800466744457409, "grad_norm": 0.96484375, "learning_rate": 0.0001943394203689578, "loss": 0.2332, "step": 2314 }, { "epoch": 1.0805134189031504, "grad_norm": 1.0078125, "learning_rate": 0.00019433455254652663, "loss": 0.2676, "step": 2315 }, { "epoch": 1.08098016336056, "grad_norm": 1.0390625, "learning_rate": 0.0001943296826929679, "loss": 0.257, "step": 2316 }, { "epoch": 1.0814469078179696, "grad_norm": 1.109375, "learning_rate": 0.00019432481080838642, "loss": 0.2414, "step": 2317 }, { "epoch": 1.0819136522753792, "grad_norm": 1.171875, "learning_rate": 0.00019431993689288714, "loss": 0.2656, "step": 2318 }, { "epoch": 1.0823803967327887, "grad_norm": 0.91015625, "learning_rate": 0.00019431506094657496, "loss": 0.2405, "step": 2319 }, { "epoch": 1.0828471411901983, "grad_norm": 0.921875, "learning_rate": 0.0001943101829695549, "loss": 0.2192, "step": 2320 }, { "epoch": 1.0833138856476079, "grad_norm": 0.9453125, "learning_rate": 0.00019430530296193194, "loss": 0.2129, "step": 2321 }, { "epoch": 1.0837806301050175, "grad_norm": 1.015625, "learning_rate": 0.00019430042092381117, "loss": 0.223, "step": 2322 }, { "epoch": 1.084247374562427, "grad_norm": 0.87109375, "learning_rate": 0.00019429553685529772, "loss": 0.2025, "step": 2323 }, { "epoch": 1.0847141190198366, "grad_norm": 0.8828125, "learning_rate": 0.00019429065075649672, "loss": 0.2192, "step": 2324 }, { "epoch": 1.0851808634772462, "grad_norm": 1.09375, "learning_rate": 0.0001942857626275134, "loss": 0.299, "step": 2325 }, { "epoch": 1.0856476079346558, "grad_norm": 0.9609375, "learning_rate": 0.00019428087246845302, "loss": 0.2877, "step": 2326 }, { "epoch": 1.0861143523920653, "grad_norm": 0.95703125, "learning_rate": 0.00019427598027942083, "loss": 0.2732, "step": 2327 }, { "epoch": 1.086581096849475, "grad_norm": 0.89453125, "learning_rate": 0.00019427108606052216, "loss": 0.1935, "step": 2328 }, { "epoch": 1.0870478413068845, "grad_norm": 1.078125, "learning_rate": 0.00019426618981186242, "loss": 0.3053, "step": 2329 }, { "epoch": 1.087514585764294, "grad_norm": 0.828125, "learning_rate": 0.00019426129153354708, "loss": 0.1771, "step": 2330 }, { "epoch": 1.0879813302217036, "grad_norm": 1.046875, "learning_rate": 0.00019425639122568148, "loss": 0.2274, "step": 2331 }, { "epoch": 1.0884480746791132, "grad_norm": 1.0234375, "learning_rate": 0.0001942514888883712, "loss": 0.2305, "step": 2332 }, { "epoch": 1.0889148191365228, "grad_norm": 0.94921875, "learning_rate": 0.00019424658452172185, "loss": 0.1955, "step": 2333 }, { "epoch": 1.0893815635939323, "grad_norm": 1.0625, "learning_rate": 0.00019424167812583893, "loss": 0.1955, "step": 2334 }, { "epoch": 1.089848308051342, "grad_norm": 0.97265625, "learning_rate": 0.0001942367697008281, "loss": 0.2281, "step": 2335 }, { "epoch": 1.0903150525087515, "grad_norm": 1.1171875, "learning_rate": 0.0001942318592467951, "loss": 0.2311, "step": 2336 }, { "epoch": 1.090781796966161, "grad_norm": 1.3671875, "learning_rate": 0.00019422694676384558, "loss": 0.3157, "step": 2337 }, { "epoch": 1.0912485414235706, "grad_norm": 1.1484375, "learning_rate": 0.00019422203225208539, "loss": 0.2205, "step": 2338 }, { "epoch": 1.0917152858809802, "grad_norm": 1.140625, "learning_rate": 0.00019421711571162027, "loss": 0.2418, "step": 2339 }, { "epoch": 1.0921820303383898, "grad_norm": 0.75390625, "learning_rate": 0.0001942121971425561, "loss": 0.1829, "step": 2340 }, { "epoch": 1.0926487747957994, "grad_norm": 0.9296875, "learning_rate": 0.00019420727654499885, "loss": 0.2171, "step": 2341 }, { "epoch": 1.093115519253209, "grad_norm": 1.0234375, "learning_rate": 0.00019420235391905436, "loss": 0.2278, "step": 2342 }, { "epoch": 1.0935822637106185, "grad_norm": 1.015625, "learning_rate": 0.00019419742926482872, "loss": 0.2366, "step": 2343 }, { "epoch": 1.094049008168028, "grad_norm": 0.859375, "learning_rate": 0.00019419250258242786, "loss": 0.1932, "step": 2344 }, { "epoch": 1.0945157526254377, "grad_norm": 1.203125, "learning_rate": 0.00019418757387195795, "loss": 0.3093, "step": 2345 }, { "epoch": 1.0949824970828472, "grad_norm": 1.15625, "learning_rate": 0.00019418264313352505, "loss": 0.2753, "step": 2346 }, { "epoch": 1.0954492415402568, "grad_norm": 1.1171875, "learning_rate": 0.00019417771036723535, "loss": 0.2706, "step": 2347 }, { "epoch": 1.0959159859976664, "grad_norm": 1.0390625, "learning_rate": 0.00019417277557319507, "loss": 0.2216, "step": 2348 }, { "epoch": 1.096382730455076, "grad_norm": 0.87890625, "learning_rate": 0.00019416783875151044, "loss": 0.2515, "step": 2349 }, { "epoch": 1.0968494749124855, "grad_norm": 1.0859375, "learning_rate": 0.00019416289990228775, "loss": 0.2861, "step": 2350 }, { "epoch": 1.0973162193698949, "grad_norm": 0.96484375, "learning_rate": 0.00019415795902563335, "loss": 0.2058, "step": 2351 }, { "epoch": 1.0977829638273044, "grad_norm": 0.91796875, "learning_rate": 0.00019415301612165365, "loss": 0.2403, "step": 2352 }, { "epoch": 1.098249708284714, "grad_norm": 0.9375, "learning_rate": 0.000194148071190455, "loss": 0.2202, "step": 2353 }, { "epoch": 1.0987164527421236, "grad_norm": 1.34375, "learning_rate": 0.00019414312423214397, "loss": 0.3434, "step": 2354 }, { "epoch": 1.0991831971995332, "grad_norm": 1.1484375, "learning_rate": 0.00019413817524682698, "loss": 0.304, "step": 2355 }, { "epoch": 1.0996499416569427, "grad_norm": 1.0234375, "learning_rate": 0.00019413322423461065, "loss": 0.2471, "step": 2356 }, { "epoch": 1.1001166861143523, "grad_norm": 0.98046875, "learning_rate": 0.00019412827119560152, "loss": 0.2006, "step": 2357 }, { "epoch": 1.100583430571762, "grad_norm": 0.83984375, "learning_rate": 0.00019412331612990634, "loss": 0.1803, "step": 2358 }, { "epoch": 1.1010501750291715, "grad_norm": 0.7578125, "learning_rate": 0.0001941183590376317, "loss": 0.212, "step": 2359 }, { "epoch": 1.101516919486581, "grad_norm": 0.9609375, "learning_rate": 0.00019411339991888435, "loss": 0.2127, "step": 2360 }, { "epoch": 1.1019836639439906, "grad_norm": 1.1875, "learning_rate": 0.0001941084387737711, "loss": 0.3057, "step": 2361 }, { "epoch": 1.1024504084014002, "grad_norm": 0.96484375, "learning_rate": 0.0001941034756023987, "loss": 0.2501, "step": 2362 }, { "epoch": 1.1029171528588098, "grad_norm": 0.93359375, "learning_rate": 0.00019409851040487412, "loss": 0.2064, "step": 2363 }, { "epoch": 1.1033838973162193, "grad_norm": 0.890625, "learning_rate": 0.0001940935431813042, "loss": 0.1881, "step": 2364 }, { "epoch": 1.103850641773629, "grad_norm": 1.03125, "learning_rate": 0.00019408857393179586, "loss": 0.2226, "step": 2365 }, { "epoch": 1.1043173862310385, "grad_norm": 0.95703125, "learning_rate": 0.00019408360265645618, "loss": 0.1776, "step": 2366 }, { "epoch": 1.104784130688448, "grad_norm": 1.0234375, "learning_rate": 0.0001940786293553921, "loss": 0.2592, "step": 2367 }, { "epoch": 1.1052508751458576, "grad_norm": 1.1953125, "learning_rate": 0.00019407365402871078, "loss": 0.2413, "step": 2368 }, { "epoch": 1.1057176196032672, "grad_norm": 0.8984375, "learning_rate": 0.0001940686766765193, "loss": 0.242, "step": 2369 }, { "epoch": 1.1061843640606768, "grad_norm": 1.0, "learning_rate": 0.00019406369729892483, "loss": 0.2174, "step": 2370 }, { "epoch": 1.1066511085180863, "grad_norm": 1.0390625, "learning_rate": 0.0001940587158960346, "loss": 0.235, "step": 2371 }, { "epoch": 1.107117852975496, "grad_norm": 0.8046875, "learning_rate": 0.00019405373246795588, "loss": 0.1654, "step": 2372 }, { "epoch": 1.1075845974329055, "grad_norm": 1.0625, "learning_rate": 0.00019404874701479594, "loss": 0.2066, "step": 2373 }, { "epoch": 1.108051341890315, "grad_norm": 0.83984375, "learning_rate": 0.0001940437595366621, "loss": 0.1897, "step": 2374 }, { "epoch": 1.1085180863477246, "grad_norm": 1.0546875, "learning_rate": 0.0001940387700336618, "loss": 0.2093, "step": 2375 }, { "epoch": 1.1089848308051342, "grad_norm": 0.92578125, "learning_rate": 0.00019403377850590243, "loss": 0.1885, "step": 2376 }, { "epoch": 1.1094515752625438, "grad_norm": 1.109375, "learning_rate": 0.00019402878495349145, "loss": 0.2773, "step": 2377 }, { "epoch": 1.1099183197199534, "grad_norm": 1.1796875, "learning_rate": 0.00019402378937653645, "loss": 0.2894, "step": 2378 }, { "epoch": 1.110385064177363, "grad_norm": 0.890625, "learning_rate": 0.00019401879177514492, "loss": 0.16, "step": 2379 }, { "epoch": 1.1108518086347725, "grad_norm": 0.90234375, "learning_rate": 0.0001940137921494245, "loss": 0.2041, "step": 2380 }, { "epoch": 1.111318553092182, "grad_norm": 0.98828125, "learning_rate": 0.0001940087904994828, "loss": 0.1897, "step": 2381 }, { "epoch": 1.1117852975495917, "grad_norm": 1.2109375, "learning_rate": 0.00019400378682542756, "loss": 0.2953, "step": 2382 }, { "epoch": 1.1122520420070012, "grad_norm": 1.0625, "learning_rate": 0.00019399878112736648, "loss": 0.2477, "step": 2383 }, { "epoch": 1.1127187864644108, "grad_norm": 0.921875, "learning_rate": 0.00019399377340540737, "loss": 0.1695, "step": 2384 }, { "epoch": 1.1131855309218204, "grad_norm": 1.109375, "learning_rate": 0.00019398876365965802, "loss": 0.2543, "step": 2385 }, { "epoch": 1.11365227537923, "grad_norm": 0.8828125, "learning_rate": 0.00019398375189022627, "loss": 0.1865, "step": 2386 }, { "epoch": 1.1141190198366395, "grad_norm": 0.95703125, "learning_rate": 0.00019397873809722012, "loss": 0.2478, "step": 2387 }, { "epoch": 1.114585764294049, "grad_norm": 1.2734375, "learning_rate": 0.00019397372228074743, "loss": 0.2818, "step": 2388 }, { "epoch": 1.1150525087514587, "grad_norm": 0.84375, "learning_rate": 0.00019396870444091623, "loss": 0.2183, "step": 2389 }, { "epoch": 1.1155192532088682, "grad_norm": 0.9296875, "learning_rate": 0.0001939636845778346, "loss": 0.165, "step": 2390 }, { "epoch": 1.1159859976662778, "grad_norm": 1.046875, "learning_rate": 0.00019395866269161056, "loss": 0.2518, "step": 2391 }, { "epoch": 1.1164527421236872, "grad_norm": 1.09375, "learning_rate": 0.00019395363878235225, "loss": 0.2346, "step": 2392 }, { "epoch": 1.116919486581097, "grad_norm": 1.140625, "learning_rate": 0.00019394861285016788, "loss": 0.2032, "step": 2393 }, { "epoch": 1.1173862310385063, "grad_norm": 1.0625, "learning_rate": 0.00019394358489516564, "loss": 0.2578, "step": 2394 }, { "epoch": 1.117852975495916, "grad_norm": 0.8828125, "learning_rate": 0.00019393855491745376, "loss": 0.2035, "step": 2395 }, { "epoch": 1.1183197199533255, "grad_norm": 0.9609375, "learning_rate": 0.0001939335229171406, "loss": 0.2096, "step": 2396 }, { "epoch": 1.118786464410735, "grad_norm": 0.9453125, "learning_rate": 0.00019392848889433447, "loss": 0.229, "step": 2397 }, { "epoch": 1.1192532088681446, "grad_norm": 1.046875, "learning_rate": 0.00019392345284914372, "loss": 0.1959, "step": 2398 }, { "epoch": 1.1197199533255542, "grad_norm": 1.125, "learning_rate": 0.00019391841478167681, "loss": 0.2616, "step": 2399 }, { "epoch": 1.1201866977829638, "grad_norm": 0.96875, "learning_rate": 0.0001939133746920423, "loss": 0.262, "step": 2400 }, { "epoch": 1.1206534422403733, "grad_norm": 1.234375, "learning_rate": 0.00019390833258034858, "loss": 0.2233, "step": 2401 }, { "epoch": 1.121120186697783, "grad_norm": 0.96484375, "learning_rate": 0.00019390328844670426, "loss": 0.1906, "step": 2402 }, { "epoch": 1.1215869311551925, "grad_norm": 0.98828125, "learning_rate": 0.00019389824229121798, "loss": 0.2365, "step": 2403 }, { "epoch": 1.122053675612602, "grad_norm": 0.80078125, "learning_rate": 0.00019389319411399836, "loss": 0.1846, "step": 2404 }, { "epoch": 1.1225204200700116, "grad_norm": 1.015625, "learning_rate": 0.0001938881439151541, "loss": 0.2078, "step": 2405 }, { "epoch": 1.1229871645274212, "grad_norm": 0.95703125, "learning_rate": 0.00019388309169479388, "loss": 0.214, "step": 2406 }, { "epoch": 1.1234539089848308, "grad_norm": 1.2265625, "learning_rate": 0.00019387803745302658, "loss": 0.2262, "step": 2407 }, { "epoch": 1.1239206534422403, "grad_norm": 1.0078125, "learning_rate": 0.00019387298118996095, "loss": 0.2706, "step": 2408 }, { "epoch": 1.12438739789965, "grad_norm": 1.0703125, "learning_rate": 0.0001938679229057059, "loss": 0.2475, "step": 2409 }, { "epoch": 1.1248541423570595, "grad_norm": 1.0, "learning_rate": 0.00019386286260037033, "loss": 0.2066, "step": 2410 }, { "epoch": 1.125320886814469, "grad_norm": 1.0546875, "learning_rate": 0.00019385780027406316, "loss": 0.217, "step": 2411 }, { "epoch": 1.1257876312718786, "grad_norm": 1.0234375, "learning_rate": 0.0001938527359268934, "loss": 0.1894, "step": 2412 }, { "epoch": 1.1262543757292882, "grad_norm": 0.859375, "learning_rate": 0.00019384766955897015, "loss": 0.1968, "step": 2413 }, { "epoch": 1.1267211201866978, "grad_norm": 1.0859375, "learning_rate": 0.00019384260117040242, "loss": 0.2198, "step": 2414 }, { "epoch": 1.1271878646441074, "grad_norm": 1.1015625, "learning_rate": 0.00019383753076129937, "loss": 0.2411, "step": 2415 }, { "epoch": 1.127654609101517, "grad_norm": 1.2421875, "learning_rate": 0.00019383245833177014, "loss": 0.2902, "step": 2416 }, { "epoch": 1.1281213535589265, "grad_norm": 0.94921875, "learning_rate": 0.000193827383881924, "loss": 0.1835, "step": 2417 }, { "epoch": 1.128588098016336, "grad_norm": 1.1171875, "learning_rate": 0.00019382230741187023, "loss": 0.1948, "step": 2418 }, { "epoch": 1.1290548424737457, "grad_norm": 0.90625, "learning_rate": 0.000193817228921718, "loss": 0.2108, "step": 2419 }, { "epoch": 1.1295215869311552, "grad_norm": 1.1796875, "learning_rate": 0.00019381214841157678, "loss": 0.2795, "step": 2420 }, { "epoch": 1.1299883313885648, "grad_norm": 0.94140625, "learning_rate": 0.00019380706588155593, "loss": 0.2193, "step": 2421 }, { "epoch": 1.1304550758459744, "grad_norm": 1.0546875, "learning_rate": 0.00019380198133176486, "loss": 0.2879, "step": 2422 }, { "epoch": 1.130921820303384, "grad_norm": 1.296875, "learning_rate": 0.00019379689476231303, "loss": 0.2675, "step": 2423 }, { "epoch": 1.1313885647607935, "grad_norm": 1.1015625, "learning_rate": 0.00019379180617331004, "loss": 0.2809, "step": 2424 }, { "epoch": 1.131855309218203, "grad_norm": 1.0234375, "learning_rate": 0.00019378671556486533, "loss": 0.2362, "step": 2425 }, { "epoch": 1.1323220536756127, "grad_norm": 1.0234375, "learning_rate": 0.00019378162293708862, "loss": 0.2254, "step": 2426 }, { "epoch": 1.1327887981330222, "grad_norm": 0.9140625, "learning_rate": 0.00019377652829008954, "loss": 0.2095, "step": 2427 }, { "epoch": 1.1332555425904318, "grad_norm": 0.828125, "learning_rate": 0.00019377143162397774, "loss": 0.2136, "step": 2428 }, { "epoch": 1.1337222870478414, "grad_norm": 0.99609375, "learning_rate": 0.00019376633293886295, "loss": 0.2394, "step": 2429 }, { "epoch": 1.134189031505251, "grad_norm": 0.86328125, "learning_rate": 0.000193761232234855, "loss": 0.2411, "step": 2430 }, { "epoch": 1.1346557759626605, "grad_norm": 1.21875, "learning_rate": 0.00019375612951206365, "loss": 0.2059, "step": 2431 }, { "epoch": 1.1351225204200701, "grad_norm": 1.25, "learning_rate": 0.00019375102477059886, "loss": 0.3163, "step": 2432 }, { "epoch": 1.1355892648774795, "grad_norm": 1.0234375, "learning_rate": 0.00019374591801057047, "loss": 0.206, "step": 2433 }, { "epoch": 1.1360560093348893, "grad_norm": 0.8125, "learning_rate": 0.00019374080923208843, "loss": 0.161, "step": 2434 }, { "epoch": 1.1365227537922986, "grad_norm": 1.0078125, "learning_rate": 0.00019373569843526278, "loss": 0.2216, "step": 2435 }, { "epoch": 1.1369894982497084, "grad_norm": 0.8046875, "learning_rate": 0.00019373058562020354, "loss": 0.1693, "step": 2436 }, { "epoch": 1.1374562427071178, "grad_norm": 1.0703125, "learning_rate": 0.0001937254707870208, "loss": 0.1914, "step": 2437 }, { "epoch": 1.1379229871645273, "grad_norm": 0.99609375, "learning_rate": 0.0001937203539358247, "loss": 0.2169, "step": 2438 }, { "epoch": 1.138389731621937, "grad_norm": 0.89453125, "learning_rate": 0.00019371523506672537, "loss": 0.201, "step": 2439 }, { "epoch": 1.1388564760793465, "grad_norm": 0.96484375, "learning_rate": 0.00019371011417983306, "loss": 0.2133, "step": 2440 }, { "epoch": 1.139323220536756, "grad_norm": 0.8203125, "learning_rate": 0.00019370499127525798, "loss": 0.1873, "step": 2441 }, { "epoch": 1.1397899649941656, "grad_norm": 1.015625, "learning_rate": 0.00019369986635311054, "loss": 0.1905, "step": 2442 }, { "epoch": 1.1402567094515752, "grad_norm": 1.1171875, "learning_rate": 0.00019369473941350095, "loss": 0.2438, "step": 2443 }, { "epoch": 1.1407234539089848, "grad_norm": 1.296875, "learning_rate": 0.0001936896104565397, "loss": 0.2276, "step": 2444 }, { "epoch": 1.1411901983663943, "grad_norm": 1.03125, "learning_rate": 0.0001936844794823372, "loss": 0.2276, "step": 2445 }, { "epoch": 1.141656942823804, "grad_norm": 1.0625, "learning_rate": 0.00019367934649100387, "loss": 0.2201, "step": 2446 }, { "epoch": 1.1421236872812135, "grad_norm": 1.1171875, "learning_rate": 0.0001936742114826503, "loss": 0.2041, "step": 2447 }, { "epoch": 1.142590431738623, "grad_norm": 1.09375, "learning_rate": 0.00019366907445738703, "loss": 0.24, "step": 2448 }, { "epoch": 1.1430571761960326, "grad_norm": 0.95703125, "learning_rate": 0.00019366393541532463, "loss": 0.2643, "step": 2449 }, { "epoch": 1.1435239206534422, "grad_norm": 0.8984375, "learning_rate": 0.0001936587943565738, "loss": 0.1913, "step": 2450 }, { "epoch": 1.1439906651108518, "grad_norm": 0.94921875, "learning_rate": 0.0001936536512812452, "loss": 0.2067, "step": 2451 }, { "epoch": 1.1444574095682614, "grad_norm": 0.92578125, "learning_rate": 0.00019364850618944958, "loss": 0.2144, "step": 2452 }, { "epoch": 1.144924154025671, "grad_norm": 0.89453125, "learning_rate": 0.00019364335908129773, "loss": 0.2385, "step": 2453 }, { "epoch": 1.1453908984830805, "grad_norm": 1.0546875, "learning_rate": 0.00019363820995690046, "loss": 0.2412, "step": 2454 }, { "epoch": 1.14585764294049, "grad_norm": 1.0546875, "learning_rate": 0.00019363305881636863, "loss": 0.2939, "step": 2455 }, { "epoch": 1.1463243873978997, "grad_norm": 0.80078125, "learning_rate": 0.00019362790565981316, "loss": 0.1874, "step": 2456 }, { "epoch": 1.1467911318553092, "grad_norm": 1.015625, "learning_rate": 0.00019362275048734497, "loss": 0.2217, "step": 2457 }, { "epoch": 1.1472578763127188, "grad_norm": 0.89453125, "learning_rate": 0.00019361759329907508, "loss": 0.2098, "step": 2458 }, { "epoch": 1.1477246207701284, "grad_norm": 0.8828125, "learning_rate": 0.0001936124340951146, "loss": 0.2123, "step": 2459 }, { "epoch": 1.148191365227538, "grad_norm": 0.8984375, "learning_rate": 0.0001936072728755745, "loss": 0.2159, "step": 2460 }, { "epoch": 1.1486581096849475, "grad_norm": 0.9453125, "learning_rate": 0.000193602109640566, "loss": 0.1896, "step": 2461 }, { "epoch": 1.149124854142357, "grad_norm": 0.91015625, "learning_rate": 0.00019359694439020016, "loss": 0.1636, "step": 2462 }, { "epoch": 1.1495915985997667, "grad_norm": 1.0390625, "learning_rate": 0.00019359177712458832, "loss": 0.2027, "step": 2463 }, { "epoch": 1.1500583430571762, "grad_norm": 1.0625, "learning_rate": 0.00019358660784384163, "loss": 0.1809, "step": 2464 }, { "epoch": 1.1505250875145858, "grad_norm": 1.1015625, "learning_rate": 0.00019358143654807147, "loss": 0.1893, "step": 2465 }, { "epoch": 1.1509918319719954, "grad_norm": 0.9765625, "learning_rate": 0.00019357626323738913, "loss": 0.2289, "step": 2466 }, { "epoch": 1.151458576429405, "grad_norm": 1.1015625, "learning_rate": 0.00019357108791190602, "loss": 0.2143, "step": 2467 }, { "epoch": 1.1519253208868145, "grad_norm": 1.2890625, "learning_rate": 0.00019356591057173355, "loss": 0.2221, "step": 2468 }, { "epoch": 1.1523920653442241, "grad_norm": 1.0, "learning_rate": 0.0001935607312169833, "loss": 0.2413, "step": 2469 }, { "epoch": 1.1528588098016337, "grad_norm": 0.90625, "learning_rate": 0.00019355554984776661, "loss": 0.2145, "step": 2470 }, { "epoch": 1.1533255542590433, "grad_norm": 0.94921875, "learning_rate": 0.00019355036646419516, "loss": 0.2249, "step": 2471 }, { "epoch": 1.1537922987164528, "grad_norm": 1.171875, "learning_rate": 0.00019354518106638052, "loss": 0.2432, "step": 2472 }, { "epoch": 1.1542590431738624, "grad_norm": 0.94140625, "learning_rate": 0.00019353999365443435, "loss": 0.2494, "step": 2473 }, { "epoch": 1.1547257876312718, "grad_norm": 1.1875, "learning_rate": 0.0001935348042284683, "loss": 0.2807, "step": 2474 }, { "epoch": 1.1551925320886816, "grad_norm": 0.88671875, "learning_rate": 0.00019352961278859416, "loss": 0.1944, "step": 2475 }, { "epoch": 1.155659276546091, "grad_norm": 1.03125, "learning_rate": 0.00019352441933492368, "loss": 0.2025, "step": 2476 }, { "epoch": 1.1561260210035007, "grad_norm": 0.96484375, "learning_rate": 0.0001935192238675687, "loss": 0.2445, "step": 2477 }, { "epoch": 1.15659276546091, "grad_norm": 0.9921875, "learning_rate": 0.00019351402638664107, "loss": 0.2177, "step": 2478 }, { "epoch": 1.1570595099183196, "grad_norm": 0.96875, "learning_rate": 0.00019350882689225267, "loss": 0.241, "step": 2479 }, { "epoch": 1.1575262543757292, "grad_norm": 1.03125, "learning_rate": 0.0001935036253845155, "loss": 0.2241, "step": 2480 }, { "epoch": 1.1579929988331388, "grad_norm": 0.89453125, "learning_rate": 0.00019349842186354153, "loss": 0.1815, "step": 2481 }, { "epoch": 1.1584597432905483, "grad_norm": 1.1484375, "learning_rate": 0.00019349321632944276, "loss": 0.2616, "step": 2482 }, { "epoch": 1.158926487747958, "grad_norm": 0.953125, "learning_rate": 0.00019348800878233138, "loss": 0.1701, "step": 2483 }, { "epoch": 1.1593932322053675, "grad_norm": 0.85546875, "learning_rate": 0.00019348279922231938, "loss": 0.1838, "step": 2484 }, { "epoch": 1.159859976662777, "grad_norm": 1.1484375, "learning_rate": 0.00019347758764951903, "loss": 0.236, "step": 2485 }, { "epoch": 1.1603267211201866, "grad_norm": 1.0703125, "learning_rate": 0.00019347237406404247, "loss": 0.2314, "step": 2486 }, { "epoch": 1.1607934655775962, "grad_norm": 1.1640625, "learning_rate": 0.00019346715846600202, "loss": 0.2304, "step": 2487 }, { "epoch": 1.1612602100350058, "grad_norm": 1.2265625, "learning_rate": 0.00019346194085550993, "loss": 0.2471, "step": 2488 }, { "epoch": 1.1617269544924154, "grad_norm": 0.91796875, "learning_rate": 0.00019345672123267856, "loss": 0.1926, "step": 2489 }, { "epoch": 1.162193698949825, "grad_norm": 0.96484375, "learning_rate": 0.0001934514995976203, "loss": 0.2159, "step": 2490 }, { "epoch": 1.1626604434072345, "grad_norm": 1.0546875, "learning_rate": 0.00019344627595044753, "loss": 0.2628, "step": 2491 }, { "epoch": 1.163127187864644, "grad_norm": 0.890625, "learning_rate": 0.0001934410502912728, "loss": 0.176, "step": 2492 }, { "epoch": 1.1635939323220537, "grad_norm": 0.93359375, "learning_rate": 0.00019343582262020857, "loss": 0.1886, "step": 2493 }, { "epoch": 1.1640606767794632, "grad_norm": 0.96875, "learning_rate": 0.0001934305929373674, "loss": 0.1654, "step": 2494 }, { "epoch": 1.1645274212368728, "grad_norm": 1.1171875, "learning_rate": 0.0001934253612428619, "loss": 0.2338, "step": 2495 }, { "epoch": 1.1649941656942824, "grad_norm": 0.890625, "learning_rate": 0.00019342012753680474, "loss": 0.2454, "step": 2496 }, { "epoch": 1.165460910151692, "grad_norm": 1.015625, "learning_rate": 0.00019341489181930858, "loss": 0.2252, "step": 2497 }, { "epoch": 1.1659276546091015, "grad_norm": 0.94140625, "learning_rate": 0.0001934096540904861, "loss": 0.2087, "step": 2498 }, { "epoch": 1.166394399066511, "grad_norm": 1.1796875, "learning_rate": 0.00019340441435045018, "loss": 0.2019, "step": 2499 }, { "epoch": 1.1668611435239207, "grad_norm": 0.8125, "learning_rate": 0.00019339917259931358, "loss": 0.1595, "step": 2500 }, { "epoch": 1.1673278879813302, "grad_norm": 0.89453125, "learning_rate": 0.00019339392883718914, "loss": 0.1916, "step": 2501 }, { "epoch": 1.1677946324387398, "grad_norm": 1.1171875, "learning_rate": 0.00019338868306418983, "loss": 0.23, "step": 2502 }, { "epoch": 1.1682613768961494, "grad_norm": 1.0078125, "learning_rate": 0.0001933834352804285, "loss": 0.19, "step": 2503 }, { "epoch": 1.168728121353559, "grad_norm": 1.0390625, "learning_rate": 0.00019337818548601825, "loss": 0.2698, "step": 2504 }, { "epoch": 1.1691948658109685, "grad_norm": 1.09375, "learning_rate": 0.00019337293368107202, "loss": 0.2835, "step": 2505 }, { "epoch": 1.1696616102683781, "grad_norm": 1.1015625, "learning_rate": 0.00019336767986570297, "loss": 0.2395, "step": 2506 }, { "epoch": 1.1701283547257877, "grad_norm": 1.0546875, "learning_rate": 0.00019336242404002416, "loss": 0.2379, "step": 2507 }, { "epoch": 1.1705950991831973, "grad_norm": 0.8515625, "learning_rate": 0.00019335716620414876, "loss": 0.1639, "step": 2508 }, { "epoch": 1.1710618436406068, "grad_norm": 0.9921875, "learning_rate": 0.00019335190635819002, "loss": 0.2091, "step": 2509 }, { "epoch": 1.1715285880980164, "grad_norm": 1.0703125, "learning_rate": 0.00019334664450226113, "loss": 0.2841, "step": 2510 }, { "epoch": 1.171995332555426, "grad_norm": 0.8671875, "learning_rate": 0.00019334138063647544, "loss": 0.1889, "step": 2511 }, { "epoch": 1.1724620770128356, "grad_norm": 0.9765625, "learning_rate": 0.00019333611476094625, "loss": 0.1703, "step": 2512 }, { "epoch": 1.1729288214702451, "grad_norm": 1.0234375, "learning_rate": 0.00019333084687578697, "loss": 0.2213, "step": 2513 }, { "epoch": 1.1733955659276547, "grad_norm": 0.88671875, "learning_rate": 0.00019332557698111094, "loss": 0.1575, "step": 2514 }, { "epoch": 1.173862310385064, "grad_norm": 0.87109375, "learning_rate": 0.00019332030507703175, "loss": 0.2017, "step": 2515 }, { "epoch": 1.1743290548424739, "grad_norm": 0.87890625, "learning_rate": 0.00019331503116366285, "loss": 0.2111, "step": 2516 }, { "epoch": 1.1747957992998832, "grad_norm": 0.984375, "learning_rate": 0.0001933097552411178, "loss": 0.1965, "step": 2517 }, { "epoch": 1.175262543757293, "grad_norm": 1.09375, "learning_rate": 0.00019330447730951016, "loss": 0.2294, "step": 2518 }, { "epoch": 1.1757292882147024, "grad_norm": 1.03125, "learning_rate": 0.0001932991973689536, "loss": 0.225, "step": 2519 }, { "epoch": 1.176196032672112, "grad_norm": 0.97265625, "learning_rate": 0.0001932939154195618, "loss": 0.2125, "step": 2520 }, { "epoch": 1.1766627771295215, "grad_norm": 0.98046875, "learning_rate": 0.00019328863146144852, "loss": 0.2299, "step": 2521 }, { "epoch": 1.177129521586931, "grad_norm": 0.96875, "learning_rate": 0.00019328334549472749, "loss": 0.1957, "step": 2522 }, { "epoch": 1.1775962660443406, "grad_norm": 1.0703125, "learning_rate": 0.00019327805751951252, "loss": 0.2401, "step": 2523 }, { "epoch": 1.1780630105017502, "grad_norm": 0.8359375, "learning_rate": 0.00019327276753591747, "loss": 0.1759, "step": 2524 }, { "epoch": 1.1785297549591598, "grad_norm": 1.0859375, "learning_rate": 0.00019326747554405626, "loss": 0.2082, "step": 2525 }, { "epoch": 1.1789964994165694, "grad_norm": 1.0859375, "learning_rate": 0.00019326218154404283, "loss": 0.2176, "step": 2526 }, { "epoch": 1.179463243873979, "grad_norm": 0.8671875, "learning_rate": 0.00019325688553599115, "loss": 0.18, "step": 2527 }, { "epoch": 1.1799299883313885, "grad_norm": 0.8203125, "learning_rate": 0.00019325158752001525, "loss": 0.2154, "step": 2528 }, { "epoch": 1.180396732788798, "grad_norm": 1.1328125, "learning_rate": 0.00019324628749622922, "loss": 0.203, "step": 2529 }, { "epoch": 1.1808634772462077, "grad_norm": 0.7734375, "learning_rate": 0.00019324098546474714, "loss": 0.177, "step": 2530 }, { "epoch": 1.1813302217036172, "grad_norm": 0.87109375, "learning_rate": 0.00019323568142568318, "loss": 0.1831, "step": 2531 }, { "epoch": 1.1817969661610268, "grad_norm": 1.0390625, "learning_rate": 0.00019323037537915158, "loss": 0.1953, "step": 2532 }, { "epoch": 1.1822637106184364, "grad_norm": 1.1875, "learning_rate": 0.00019322506732526656, "loss": 0.185, "step": 2533 }, { "epoch": 1.182730455075846, "grad_norm": 1.015625, "learning_rate": 0.00019321975726414238, "loss": 0.1687, "step": 2534 }, { "epoch": 1.1831971995332555, "grad_norm": 1.046875, "learning_rate": 0.0001932144451958934, "loss": 0.2447, "step": 2535 }, { "epoch": 1.183663943990665, "grad_norm": 0.94921875, "learning_rate": 0.000193209131120634, "loss": 0.1618, "step": 2536 }, { "epoch": 1.1841306884480747, "grad_norm": 0.96875, "learning_rate": 0.0001932038150384786, "loss": 0.1916, "step": 2537 }, { "epoch": 1.1845974329054842, "grad_norm": 0.99609375, "learning_rate": 0.00019319849694954163, "loss": 0.1777, "step": 2538 }, { "epoch": 1.1850641773628938, "grad_norm": 0.98046875, "learning_rate": 0.00019319317685393764, "loss": 0.1951, "step": 2539 }, { "epoch": 1.1855309218203034, "grad_norm": 0.8828125, "learning_rate": 0.00019318785475178115, "loss": 0.1766, "step": 2540 }, { "epoch": 1.185997666277713, "grad_norm": 0.9453125, "learning_rate": 0.00019318253064318675, "loss": 0.1843, "step": 2541 }, { "epoch": 1.1864644107351225, "grad_norm": 1.046875, "learning_rate": 0.00019317720452826907, "loss": 0.2082, "step": 2542 }, { "epoch": 1.1869311551925321, "grad_norm": 0.86328125, "learning_rate": 0.00019317187640714278, "loss": 0.1468, "step": 2543 }, { "epoch": 1.1873978996499417, "grad_norm": 1.0859375, "learning_rate": 0.00019316654627992262, "loss": 0.2385, "step": 2544 }, { "epoch": 1.1878646441073513, "grad_norm": 1.2890625, "learning_rate": 0.00019316121414672337, "loss": 0.2404, "step": 2545 }, { "epoch": 1.1883313885647608, "grad_norm": 1.15625, "learning_rate": 0.00019315588000765982, "loss": 0.2167, "step": 2546 }, { "epoch": 1.1887981330221704, "grad_norm": 0.890625, "learning_rate": 0.0001931505438628468, "loss": 0.1818, "step": 2547 }, { "epoch": 1.18926487747958, "grad_norm": 1.0390625, "learning_rate": 0.0001931452057123992, "loss": 0.2769, "step": 2548 }, { "epoch": 1.1897316219369896, "grad_norm": 0.8984375, "learning_rate": 0.000193139865556432, "loss": 0.1953, "step": 2549 }, { "epoch": 1.1901983663943991, "grad_norm": 1.109375, "learning_rate": 0.00019313452339506015, "loss": 0.1683, "step": 2550 }, { "epoch": 1.1906651108518087, "grad_norm": 1.1015625, "learning_rate": 0.00019312917922839867, "loss": 0.178, "step": 2551 }, { "epoch": 1.1911318553092183, "grad_norm": 0.96875, "learning_rate": 0.00019312383305656263, "loss": 0.1987, "step": 2552 }, { "epoch": 1.1915985997666279, "grad_norm": 1.125, "learning_rate": 0.00019311848487966714, "loss": 0.2476, "step": 2553 }, { "epoch": 1.1920653442240374, "grad_norm": 1.046875, "learning_rate": 0.00019311313469782737, "loss": 0.238, "step": 2554 }, { "epoch": 1.192532088681447, "grad_norm": 0.93359375, "learning_rate": 0.0001931077825111585, "loss": 0.1729, "step": 2555 }, { "epoch": 1.1929988331388564, "grad_norm": 0.90625, "learning_rate": 0.00019310242831977573, "loss": 0.1716, "step": 2556 }, { "epoch": 1.1934655775962661, "grad_norm": 1.078125, "learning_rate": 0.00019309707212379442, "loss": 0.2436, "step": 2557 }, { "epoch": 1.1939323220536755, "grad_norm": 1.046875, "learning_rate": 0.00019309171392332982, "loss": 0.1739, "step": 2558 }, { "epoch": 1.1943990665110853, "grad_norm": 0.90625, "learning_rate": 0.0001930863537184974, "loss": 0.1955, "step": 2559 }, { "epoch": 1.1948658109684946, "grad_norm": 1.125, "learning_rate": 0.00019308099150941243, "loss": 0.2132, "step": 2560 }, { "epoch": 1.1953325554259044, "grad_norm": 1.1328125, "learning_rate": 0.00019307562729619046, "loss": 0.2133, "step": 2561 }, { "epoch": 1.1957992998833138, "grad_norm": 1.0078125, "learning_rate": 0.000193070261078947, "loss": 0.2074, "step": 2562 }, { "epoch": 1.1962660443407234, "grad_norm": 1.1953125, "learning_rate": 0.0001930648928577975, "loss": 0.2746, "step": 2563 }, { "epoch": 1.196732788798133, "grad_norm": 1.0234375, "learning_rate": 0.00019305952263285763, "loss": 0.2192, "step": 2564 }, { "epoch": 1.1971995332555425, "grad_norm": 0.9765625, "learning_rate": 0.000193054150404243, "loss": 0.2045, "step": 2565 }, { "epoch": 1.197666277712952, "grad_norm": 0.9765625, "learning_rate": 0.00019304877617206922, "loss": 0.1731, "step": 2566 }, { "epoch": 1.1981330221703617, "grad_norm": 0.90234375, "learning_rate": 0.0001930433999364521, "loss": 0.1934, "step": 2567 }, { "epoch": 1.1985997666277712, "grad_norm": 0.953125, "learning_rate": 0.00019303802169750732, "loss": 0.1758, "step": 2568 }, { "epoch": 1.1990665110851808, "grad_norm": 0.80078125, "learning_rate": 0.0001930326414553507, "loss": 0.1535, "step": 2569 }, { "epoch": 1.1995332555425904, "grad_norm": 0.9921875, "learning_rate": 0.0001930272592100981, "loss": 0.2302, "step": 2570 }, { "epoch": 1.2, "grad_norm": 1.015625, "learning_rate": 0.0001930218749618654, "loss": 0.2061, "step": 2571 }, { "epoch": 1.2004667444574095, "grad_norm": 1.09375, "learning_rate": 0.0001930164887107685, "loss": 0.245, "step": 2572 }, { "epoch": 1.200933488914819, "grad_norm": 0.9765625, "learning_rate": 0.00019301110045692342, "loss": 0.2025, "step": 2573 }, { "epoch": 1.2014002333722287, "grad_norm": 0.93359375, "learning_rate": 0.0001930057102004461, "loss": 0.1918, "step": 2574 }, { "epoch": 1.2018669778296382, "grad_norm": 0.96484375, "learning_rate": 0.0001930003179414527, "loss": 0.1863, "step": 2575 }, { "epoch": 1.2023337222870478, "grad_norm": 1.0546875, "learning_rate": 0.00019299492368005927, "loss": 0.1803, "step": 2576 }, { "epoch": 1.2028004667444574, "grad_norm": 0.90234375, "learning_rate": 0.00019298952741638194, "loss": 0.1946, "step": 2577 }, { "epoch": 1.203267211201867, "grad_norm": 1.0390625, "learning_rate": 0.00019298412915053688, "loss": 0.2358, "step": 2578 }, { "epoch": 1.2037339556592765, "grad_norm": 0.98046875, "learning_rate": 0.0001929787288826404, "loss": 0.1976, "step": 2579 }, { "epoch": 1.2042007001166861, "grad_norm": 0.953125, "learning_rate": 0.0001929733266128087, "loss": 0.1949, "step": 2580 }, { "epoch": 1.2046674445740957, "grad_norm": 1.140625, "learning_rate": 0.00019296792234115812, "loss": 0.2932, "step": 2581 }, { "epoch": 1.2051341890315053, "grad_norm": 1.125, "learning_rate": 0.00019296251606780505, "loss": 0.25, "step": 2582 }, { "epoch": 1.2056009334889148, "grad_norm": 0.8984375, "learning_rate": 0.00019295710779286584, "loss": 0.1869, "step": 2583 }, { "epoch": 1.2060676779463244, "grad_norm": 0.99609375, "learning_rate": 0.00019295169751645696, "loss": 0.1854, "step": 2584 }, { "epoch": 1.206534422403734, "grad_norm": 0.98828125, "learning_rate": 0.0001929462852386949, "loss": 0.1981, "step": 2585 }, { "epoch": 1.2070011668611436, "grad_norm": 0.9296875, "learning_rate": 0.00019294087095969622, "loss": 0.1741, "step": 2586 }, { "epoch": 1.2074679113185531, "grad_norm": 1.015625, "learning_rate": 0.00019293545467957744, "loss": 0.2241, "step": 2587 }, { "epoch": 1.2079346557759627, "grad_norm": 1.21875, "learning_rate": 0.00019293003639845522, "loss": 0.1876, "step": 2588 }, { "epoch": 1.2084014002333723, "grad_norm": 1.1171875, "learning_rate": 0.0001929246161164462, "loss": 0.273, "step": 2589 }, { "epoch": 1.2088681446907819, "grad_norm": 1.03125, "learning_rate": 0.0001929191938336671, "loss": 0.2252, "step": 2590 }, { "epoch": 1.2093348891481914, "grad_norm": 0.96484375, "learning_rate": 0.00019291376955023467, "loss": 0.1794, "step": 2591 }, { "epoch": 1.209801633605601, "grad_norm": 1.0390625, "learning_rate": 0.0001929083432662657, "loss": 0.2024, "step": 2592 }, { "epoch": 1.2102683780630106, "grad_norm": 1.0859375, "learning_rate": 0.00019290291498187697, "loss": 0.2628, "step": 2593 }, { "epoch": 1.2107351225204201, "grad_norm": 1.109375, "learning_rate": 0.00019289748469718542, "loss": 0.2353, "step": 2594 }, { "epoch": 1.2112018669778297, "grad_norm": 0.88671875, "learning_rate": 0.00019289205241230798, "loss": 0.2071, "step": 2595 }, { "epoch": 1.2116686114352393, "grad_norm": 1.1015625, "learning_rate": 0.00019288661812736155, "loss": 0.2128, "step": 2596 }, { "epoch": 1.2121353558926489, "grad_norm": 0.93359375, "learning_rate": 0.0001928811818424632, "loss": 0.1791, "step": 2597 }, { "epoch": 1.2126021003500584, "grad_norm": 0.9921875, "learning_rate": 0.00019287574355772994, "loss": 0.2009, "step": 2598 }, { "epoch": 1.2130688448074678, "grad_norm": 0.87890625, "learning_rate": 0.00019287030327327887, "loss": 0.1913, "step": 2599 }, { "epoch": 1.2135355892648776, "grad_norm": 0.86328125, "learning_rate": 0.00019286486098922713, "loss": 0.1723, "step": 2600 }, { "epoch": 1.214002333722287, "grad_norm": 0.9609375, "learning_rate": 0.00019285941670569193, "loss": 0.2285, "step": 2601 }, { "epoch": 1.2144690781796967, "grad_norm": 0.984375, "learning_rate": 0.00019285397042279043, "loss": 0.1893, "step": 2602 }, { "epoch": 1.214935822637106, "grad_norm": 0.98046875, "learning_rate": 0.00019284852214063994, "loss": 0.2155, "step": 2603 }, { "epoch": 1.2154025670945157, "grad_norm": 0.8828125, "learning_rate": 0.00019284307185935773, "loss": 0.2071, "step": 2604 }, { "epoch": 1.2158693115519252, "grad_norm": 1.140625, "learning_rate": 0.00019283761957906116, "loss": 0.2217, "step": 2605 }, { "epoch": 1.2163360560093348, "grad_norm": 1.2421875, "learning_rate": 0.00019283216529986764, "loss": 0.2731, "step": 2606 }, { "epoch": 1.2168028004667444, "grad_norm": 0.828125, "learning_rate": 0.00019282670902189464, "loss": 0.1293, "step": 2607 }, { "epoch": 1.217269544924154, "grad_norm": 1.1171875, "learning_rate": 0.00019282125074525957, "loss": 0.2126, "step": 2608 }, { "epoch": 1.2177362893815635, "grad_norm": 1.2578125, "learning_rate": 0.00019281579047008, "loss": 0.244, "step": 2609 }, { "epoch": 1.218203033838973, "grad_norm": 0.9453125, "learning_rate": 0.0001928103281964735, "loss": 0.1617, "step": 2610 }, { "epoch": 1.2186697782963827, "grad_norm": 1.171875, "learning_rate": 0.0001928048639245576, "loss": 0.2016, "step": 2611 }, { "epoch": 1.2191365227537923, "grad_norm": 0.91015625, "learning_rate": 0.00019279939765445007, "loss": 0.2403, "step": 2612 }, { "epoch": 1.2196032672112018, "grad_norm": 0.91015625, "learning_rate": 0.00019279392938626851, "loss": 0.1594, "step": 2613 }, { "epoch": 1.2200700116686114, "grad_norm": 0.984375, "learning_rate": 0.0001927884591201307, "loss": 0.1849, "step": 2614 }, { "epoch": 1.220536756126021, "grad_norm": 1.09375, "learning_rate": 0.00019278298685615442, "loss": 0.1904, "step": 2615 }, { "epoch": 1.2210035005834305, "grad_norm": 1.0078125, "learning_rate": 0.00019277751259445748, "loss": 0.2165, "step": 2616 }, { "epoch": 1.2214702450408401, "grad_norm": 0.890625, "learning_rate": 0.00019277203633515777, "loss": 0.1595, "step": 2617 }, { "epoch": 1.2219369894982497, "grad_norm": 0.9296875, "learning_rate": 0.00019276655807837315, "loss": 0.2092, "step": 2618 }, { "epoch": 1.2224037339556593, "grad_norm": 0.9375, "learning_rate": 0.00019276107782422166, "loss": 0.1396, "step": 2619 }, { "epoch": 1.2228704784130688, "grad_norm": 1.0546875, "learning_rate": 0.00019275559557282122, "loss": 0.1837, "step": 2620 }, { "epoch": 1.2233372228704784, "grad_norm": 0.90234375, "learning_rate": 0.00019275011132428987, "loss": 0.1566, "step": 2621 }, { "epoch": 1.223803967327888, "grad_norm": 1.0859375, "learning_rate": 0.00019274462507874575, "loss": 0.1832, "step": 2622 }, { "epoch": 1.2242707117852976, "grad_norm": 0.953125, "learning_rate": 0.00019273913683630692, "loss": 0.1714, "step": 2623 }, { "epoch": 1.2247374562427071, "grad_norm": 1.109375, "learning_rate": 0.00019273364659709162, "loss": 0.1839, "step": 2624 }, { "epoch": 1.2252042007001167, "grad_norm": 1.1875, "learning_rate": 0.00019272815436121798, "loss": 0.2136, "step": 2625 }, { "epoch": 1.2256709451575263, "grad_norm": 0.984375, "learning_rate": 0.0001927226601288043, "loss": 0.1723, "step": 2626 }, { "epoch": 1.2261376896149359, "grad_norm": 1.1953125, "learning_rate": 0.00019271716389996886, "loss": 0.1657, "step": 2627 }, { "epoch": 1.2266044340723454, "grad_norm": 1.0859375, "learning_rate": 0.00019271166567483004, "loss": 0.2068, "step": 2628 }, { "epoch": 1.227071178529755, "grad_norm": 1.265625, "learning_rate": 0.00019270616545350618, "loss": 0.2109, "step": 2629 }, { "epoch": 1.2275379229871646, "grad_norm": 0.875, "learning_rate": 0.00019270066323611573, "loss": 0.1497, "step": 2630 }, { "epoch": 1.2280046674445741, "grad_norm": 1.078125, "learning_rate": 0.00019269515902277715, "loss": 0.1843, "step": 2631 }, { "epoch": 1.2284714119019837, "grad_norm": 1.0859375, "learning_rate": 0.00019268965281360893, "loss": 0.2663, "step": 2632 }, { "epoch": 1.2289381563593933, "grad_norm": 0.9453125, "learning_rate": 0.00019268414460872963, "loss": 0.1725, "step": 2633 }, { "epoch": 1.2294049008168029, "grad_norm": 0.89453125, "learning_rate": 0.0001926786344082579, "loss": 0.2056, "step": 2634 }, { "epoch": 1.2298716452742124, "grad_norm": 1.2265625, "learning_rate": 0.0001926731222123123, "loss": 0.3282, "step": 2635 }, { "epoch": 1.230338389731622, "grad_norm": 0.89453125, "learning_rate": 0.0001926676080210116, "loss": 0.1815, "step": 2636 }, { "epoch": 1.2308051341890316, "grad_norm": 1.2734375, "learning_rate": 0.00019266209183447445, "loss": 0.1998, "step": 2637 }, { "epoch": 1.2312718786464412, "grad_norm": 0.8203125, "learning_rate": 0.00019265657365281967, "loss": 0.1926, "step": 2638 }, { "epoch": 1.2317386231038507, "grad_norm": 1.0390625, "learning_rate": 0.00019265105347616602, "loss": 0.2383, "step": 2639 }, { "epoch": 1.23220536756126, "grad_norm": 1.0625, "learning_rate": 0.00019264553130463242, "loss": 0.1994, "step": 2640 }, { "epoch": 1.2326721120186699, "grad_norm": 0.98046875, "learning_rate": 0.00019264000713833777, "loss": 0.1897, "step": 2641 }, { "epoch": 1.2331388564760792, "grad_norm": 1.125, "learning_rate": 0.00019263448097740096, "loss": 0.2974, "step": 2642 }, { "epoch": 1.233605600933489, "grad_norm": 1.0546875, "learning_rate": 0.00019262895282194098, "loss": 0.2032, "step": 2643 }, { "epoch": 1.2340723453908984, "grad_norm": 0.87890625, "learning_rate": 0.00019262342267207687, "loss": 0.1618, "step": 2644 }, { "epoch": 1.234539089848308, "grad_norm": 0.96875, "learning_rate": 0.0001926178905279277, "loss": 0.2134, "step": 2645 }, { "epoch": 1.2350058343057175, "grad_norm": 1.15625, "learning_rate": 0.00019261235638961258, "loss": 0.2682, "step": 2646 }, { "epoch": 1.235472578763127, "grad_norm": 0.8828125, "learning_rate": 0.0001926068202572507, "loss": 0.2019, "step": 2647 }, { "epoch": 1.2359393232205367, "grad_norm": 0.81640625, "learning_rate": 0.00019260128213096123, "loss": 0.1589, "step": 2648 }, { "epoch": 1.2364060676779463, "grad_norm": 1.125, "learning_rate": 0.0001925957420108634, "loss": 0.1959, "step": 2649 }, { "epoch": 1.2368728121353558, "grad_norm": 1.0859375, "learning_rate": 0.00019259019989707648, "loss": 0.191, "step": 2650 }, { "epoch": 1.2373395565927654, "grad_norm": 0.9375, "learning_rate": 0.00019258465578971988, "loss": 0.1931, "step": 2651 }, { "epoch": 1.237806301050175, "grad_norm": 0.921875, "learning_rate": 0.0001925791096889129, "loss": 0.2238, "step": 2652 }, { "epoch": 1.2382730455075845, "grad_norm": 0.91015625, "learning_rate": 0.00019257356159477494, "loss": 0.1554, "step": 2653 }, { "epoch": 1.2387397899649941, "grad_norm": 1.1015625, "learning_rate": 0.0001925680115074255, "loss": 0.1976, "step": 2654 }, { "epoch": 1.2392065344224037, "grad_norm": 1.1015625, "learning_rate": 0.00019256245942698408, "loss": 0.2239, "step": 2655 }, { "epoch": 1.2396732788798133, "grad_norm": 0.84375, "learning_rate": 0.0001925569053535702, "loss": 0.1902, "step": 2656 }, { "epoch": 1.2401400233372228, "grad_norm": 1.03125, "learning_rate": 0.00019255134928730346, "loss": 0.1886, "step": 2657 }, { "epoch": 1.2406067677946324, "grad_norm": 1.046875, "learning_rate": 0.00019254579122830346, "loss": 0.2132, "step": 2658 }, { "epoch": 1.241073512252042, "grad_norm": 0.98828125, "learning_rate": 0.0001925402311766899, "loss": 0.1374, "step": 2659 }, { "epoch": 1.2415402567094516, "grad_norm": 1.15625, "learning_rate": 0.00019253466913258252, "loss": 0.2233, "step": 2660 }, { "epoch": 1.2420070011668611, "grad_norm": 1.1015625, "learning_rate": 0.00019252910509610102, "loss": 0.1861, "step": 2661 }, { "epoch": 1.2424737456242707, "grad_norm": 0.93359375, "learning_rate": 0.00019252353906736524, "loss": 0.1634, "step": 2662 }, { "epoch": 1.2429404900816803, "grad_norm": 0.9140625, "learning_rate": 0.00019251797104649502, "loss": 0.2075, "step": 2663 }, { "epoch": 1.2434072345390899, "grad_norm": 0.9765625, "learning_rate": 0.00019251240103361017, "loss": 0.1733, "step": 2664 }, { "epoch": 1.2438739789964994, "grad_norm": 0.9296875, "learning_rate": 0.00019250682902883072, "loss": 0.1776, "step": 2665 }, { "epoch": 1.244340723453909, "grad_norm": 1.078125, "learning_rate": 0.0001925012550322766, "loss": 0.2666, "step": 2666 }, { "epoch": 1.2448074679113186, "grad_norm": 0.88671875, "learning_rate": 0.00019249567904406785, "loss": 0.1493, "step": 2667 }, { "epoch": 1.2452742123687282, "grad_norm": 1.0, "learning_rate": 0.00019249010106432447, "loss": 0.1723, "step": 2668 }, { "epoch": 1.2457409568261377, "grad_norm": 1.078125, "learning_rate": 0.00019248452109316662, "loss": 0.2069, "step": 2669 }, { "epoch": 1.2462077012835473, "grad_norm": 1.1640625, "learning_rate": 0.0001924789391307144, "loss": 0.1732, "step": 2670 }, { "epoch": 1.2466744457409569, "grad_norm": 0.89453125, "learning_rate": 0.00019247335517708802, "loss": 0.178, "step": 2671 }, { "epoch": 1.2471411901983664, "grad_norm": 0.93359375, "learning_rate": 0.0001924677692324077, "loss": 0.1849, "step": 2672 }, { "epoch": 1.247607934655776, "grad_norm": 1.2578125, "learning_rate": 0.00019246218129679373, "loss": 0.2077, "step": 2673 }, { "epoch": 1.2480746791131856, "grad_norm": 1.140625, "learning_rate": 0.0001924565913703664, "loss": 0.2684, "step": 2674 }, { "epoch": 1.2485414235705952, "grad_norm": 1.140625, "learning_rate": 0.00019245099945324604, "loss": 0.2062, "step": 2675 }, { "epoch": 1.2490081680280047, "grad_norm": 1.1796875, "learning_rate": 0.00019244540554555313, "loss": 0.2145, "step": 2676 }, { "epoch": 1.2494749124854143, "grad_norm": 1.1328125, "learning_rate": 0.00019243980964740804, "loss": 0.2253, "step": 2677 }, { "epoch": 1.2499416569428239, "grad_norm": 1.3359375, "learning_rate": 0.0001924342117589313, "loss": 0.258, "step": 2678 }, { "epoch": 1.2504084014002332, "grad_norm": 1.03125, "learning_rate": 0.0001924286118802434, "loss": 0.1971, "step": 2679 }, { "epoch": 1.250875145857643, "grad_norm": 1.140625, "learning_rate": 0.00019242301001146496, "loss": 0.1894, "step": 2680 }, { "epoch": 1.250875145857643, "eval_loss": 1.2540687322616577, "eval_runtime": 94.4185, "eval_samples_per_second": 19.106, "eval_steps_per_second": 2.394, "step": 2680 }, { "epoch": 1.2513418903150524, "grad_norm": 0.875, "learning_rate": 0.00019241740615271659, "loss": 0.1938, "step": 2681 }, { "epoch": 1.2518086347724622, "grad_norm": 0.89453125, "learning_rate": 0.0001924118003041189, "loss": 0.155, "step": 2682 }, { "epoch": 1.2522753792298715, "grad_norm": 0.96484375, "learning_rate": 0.0001924061924657926, "loss": 0.1516, "step": 2683 }, { "epoch": 1.2527421236872813, "grad_norm": 0.84765625, "learning_rate": 0.0001924005826378585, "loss": 0.1383, "step": 2684 }, { "epoch": 1.2532088681446907, "grad_norm": 1.1171875, "learning_rate": 0.00019239497082043728, "loss": 0.2517, "step": 2685 }, { "epoch": 1.2536756126021005, "grad_norm": 0.90625, "learning_rate": 0.00019238935701364988, "loss": 0.1693, "step": 2686 }, { "epoch": 1.2541423570595098, "grad_norm": 1.0703125, "learning_rate": 0.00019238374121761703, "loss": 0.2126, "step": 2687 }, { "epoch": 1.2546091015169196, "grad_norm": 0.94921875, "learning_rate": 0.0001923781234324598, "loss": 0.193, "step": 2688 }, { "epoch": 1.255075845974329, "grad_norm": 0.921875, "learning_rate": 0.00019237250365829908, "loss": 0.1817, "step": 2689 }, { "epoch": 1.2555425904317385, "grad_norm": 0.96484375, "learning_rate": 0.00019236688189525585, "loss": 0.2106, "step": 2690 }, { "epoch": 1.2560093348891481, "grad_norm": 0.96875, "learning_rate": 0.00019236125814345116, "loss": 0.1914, "step": 2691 }, { "epoch": 1.2564760793465577, "grad_norm": 1.078125, "learning_rate": 0.0001923556324030061, "loss": 0.1825, "step": 2692 }, { "epoch": 1.2569428238039673, "grad_norm": 1.0, "learning_rate": 0.00019235000467404185, "loss": 0.153, "step": 2693 }, { "epoch": 1.2574095682613768, "grad_norm": 1.15625, "learning_rate": 0.00019234437495667948, "loss": 0.2263, "step": 2694 }, { "epoch": 1.2578763127187864, "grad_norm": 1.1015625, "learning_rate": 0.0001923387432510403, "loss": 0.2134, "step": 2695 }, { "epoch": 1.258343057176196, "grad_norm": 1.015625, "learning_rate": 0.0001923331095572455, "loss": 0.1849, "step": 2696 }, { "epoch": 1.2588098016336056, "grad_norm": 0.9375, "learning_rate": 0.0001923274738754164, "loss": 0.1565, "step": 2697 }, { "epoch": 1.2592765460910151, "grad_norm": 1.078125, "learning_rate": 0.00019232183620567436, "loss": 0.2124, "step": 2698 }, { "epoch": 1.2597432905484247, "grad_norm": 1.0, "learning_rate": 0.00019231619654814077, "loss": 0.1587, "step": 2699 }, { "epoch": 1.2602100350058343, "grad_norm": 1.03125, "learning_rate": 0.000192310554902937, "loss": 0.1961, "step": 2700 }, { "epoch": 1.2606767794632439, "grad_norm": 1.1484375, "learning_rate": 0.00019230491127018463, "loss": 0.2365, "step": 2701 }, { "epoch": 1.2611435239206534, "grad_norm": 0.83203125, "learning_rate": 0.00019229926565000506, "loss": 0.1328, "step": 2702 }, { "epoch": 1.261610268378063, "grad_norm": 0.8671875, "learning_rate": 0.0001922936180425199, "loss": 0.1529, "step": 2703 }, { "epoch": 1.2620770128354726, "grad_norm": 0.96484375, "learning_rate": 0.00019228796844785072, "loss": 0.1803, "step": 2704 }, { "epoch": 1.2625437572928822, "grad_norm": 0.93359375, "learning_rate": 0.00019228231686611923, "loss": 0.1732, "step": 2705 }, { "epoch": 1.2630105017502917, "grad_norm": 1.09375, "learning_rate": 0.00019227666329744704, "loss": 0.1984, "step": 2706 }, { "epoch": 1.2634772462077013, "grad_norm": 1.0859375, "learning_rate": 0.0001922710077419559, "loss": 0.2114, "step": 2707 }, { "epoch": 1.2639439906651109, "grad_norm": 0.96484375, "learning_rate": 0.00019226535019976763, "loss": 0.1533, "step": 2708 }, { "epoch": 1.2644107351225204, "grad_norm": 1.125, "learning_rate": 0.00019225969067100397, "loss": 0.1754, "step": 2709 }, { "epoch": 1.26487747957993, "grad_norm": 1.0546875, "learning_rate": 0.00019225402915578677, "loss": 0.2632, "step": 2710 }, { "epoch": 1.2653442240373396, "grad_norm": 1.0078125, "learning_rate": 0.00019224836565423801, "loss": 0.1809, "step": 2711 }, { "epoch": 1.2658109684947492, "grad_norm": 1.2265625, "learning_rate": 0.00019224270016647958, "loss": 0.1861, "step": 2712 }, { "epoch": 1.2662777129521587, "grad_norm": 1.0390625, "learning_rate": 0.00019223703269263345, "loss": 0.1918, "step": 2713 }, { "epoch": 1.2667444574095683, "grad_norm": 1.0078125, "learning_rate": 0.0001922313632328217, "loss": 0.1902, "step": 2714 }, { "epoch": 1.2672112018669779, "grad_norm": 1.0, "learning_rate": 0.00019222569178716634, "loss": 0.1934, "step": 2715 }, { "epoch": 1.2676779463243875, "grad_norm": 0.8359375, "learning_rate": 0.00019222001835578954, "loss": 0.1607, "step": 2716 }, { "epoch": 1.268144690781797, "grad_norm": 0.8984375, "learning_rate": 0.00019221434293881337, "loss": 0.1728, "step": 2717 }, { "epoch": 1.2686114352392066, "grad_norm": 0.8984375, "learning_rate": 0.00019220866553636015, "loss": 0.1794, "step": 2718 }, { "epoch": 1.2690781796966162, "grad_norm": 0.83203125, "learning_rate": 0.00019220298614855202, "loss": 0.1652, "step": 2719 }, { "epoch": 1.2695449241540255, "grad_norm": 0.75390625, "learning_rate": 0.00019219730477551127, "loss": 0.1125, "step": 2720 }, { "epoch": 1.2700116686114353, "grad_norm": 0.9296875, "learning_rate": 0.00019219162141736028, "loss": 0.2013, "step": 2721 }, { "epoch": 1.2704784130688447, "grad_norm": 0.8984375, "learning_rate": 0.0001921859360742214, "loss": 0.1761, "step": 2722 }, { "epoch": 1.2709451575262545, "grad_norm": 1.203125, "learning_rate": 0.00019218024874621703, "loss": 0.2777, "step": 2723 }, { "epoch": 1.2714119019836638, "grad_norm": 0.97265625, "learning_rate": 0.00019217455943346964, "loss": 0.1714, "step": 2724 }, { "epoch": 1.2718786464410736, "grad_norm": 1.0859375, "learning_rate": 0.0001921688681361017, "loss": 0.2285, "step": 2725 }, { "epoch": 1.272345390898483, "grad_norm": 1.0, "learning_rate": 0.00019216317485423577, "loss": 0.1795, "step": 2726 }, { "epoch": 1.2728121353558928, "grad_norm": 0.99609375, "learning_rate": 0.00019215747958799444, "loss": 0.1628, "step": 2727 }, { "epoch": 1.2732788798133021, "grad_norm": 1.0234375, "learning_rate": 0.0001921517823375003, "loss": 0.2006, "step": 2728 }, { "epoch": 1.273745624270712, "grad_norm": 1.171875, "learning_rate": 0.00019214608310287606, "loss": 0.1517, "step": 2729 }, { "epoch": 1.2742123687281213, "grad_norm": 1.0078125, "learning_rate": 0.0001921403818842444, "loss": 0.183, "step": 2730 }, { "epoch": 1.2746791131855308, "grad_norm": 1.0, "learning_rate": 0.00019213467868172809, "loss": 0.189, "step": 2731 }, { "epoch": 1.2751458576429404, "grad_norm": 0.8125, "learning_rate": 0.00019212897349544993, "loss": 0.1537, "step": 2732 }, { "epoch": 1.27561260210035, "grad_norm": 0.9375, "learning_rate": 0.00019212326632553274, "loss": 0.1602, "step": 2733 }, { "epoch": 1.2760793465577596, "grad_norm": 0.796875, "learning_rate": 0.00019211755717209942, "loss": 0.1194, "step": 2734 }, { "epoch": 1.2765460910151691, "grad_norm": 1.09375, "learning_rate": 0.0001921118460352729, "loss": 0.1566, "step": 2735 }, { "epoch": 1.2770128354725787, "grad_norm": 0.96484375, "learning_rate": 0.00019210613291517615, "loss": 0.1927, "step": 2736 }, { "epoch": 1.2774795799299883, "grad_norm": 1.046875, "learning_rate": 0.0001921004178119321, "loss": 0.196, "step": 2737 }, { "epoch": 1.2779463243873979, "grad_norm": 1.046875, "learning_rate": 0.00019209470072566393, "loss": 0.2736, "step": 2738 }, { "epoch": 1.2784130688448074, "grad_norm": 1.0625, "learning_rate": 0.00019208898165649463, "loss": 0.2075, "step": 2739 }, { "epoch": 1.278879813302217, "grad_norm": 1.0234375, "learning_rate": 0.00019208326060454742, "loss": 0.1691, "step": 2740 }, { "epoch": 1.2793465577596266, "grad_norm": 1.0546875, "learning_rate": 0.00019207753756994542, "loss": 0.1904, "step": 2741 }, { "epoch": 1.2798133022170362, "grad_norm": 0.9453125, "learning_rate": 0.00019207181255281186, "loss": 0.1766, "step": 2742 }, { "epoch": 1.2802800466744457, "grad_norm": 1.1875, "learning_rate": 0.00019206608555327005, "loss": 0.2311, "step": 2743 }, { "epoch": 1.2807467911318553, "grad_norm": 0.98828125, "learning_rate": 0.00019206035657144324, "loss": 0.1986, "step": 2744 }, { "epoch": 1.2812135355892649, "grad_norm": 1.03125, "learning_rate": 0.00019205462560745483, "loss": 0.2101, "step": 2745 }, { "epoch": 1.2816802800466744, "grad_norm": 1.0078125, "learning_rate": 0.0001920488926614282, "loss": 0.1721, "step": 2746 }, { "epoch": 1.282147024504084, "grad_norm": 0.9765625, "learning_rate": 0.00019204315773348676, "loss": 0.1847, "step": 2747 }, { "epoch": 1.2826137689614936, "grad_norm": 1.1171875, "learning_rate": 0.000192037420823754, "loss": 0.1453, "step": 2748 }, { "epoch": 1.2830805134189032, "grad_norm": 0.97265625, "learning_rate": 0.0001920316819323535, "loss": 0.1495, "step": 2749 }, { "epoch": 1.2835472578763127, "grad_norm": 1.3203125, "learning_rate": 0.00019202594105940875, "loss": 0.2295, "step": 2750 }, { "epoch": 1.2840140023337223, "grad_norm": 1.1171875, "learning_rate": 0.00019202019820504336, "loss": 0.1825, "step": 2751 }, { "epoch": 1.2844807467911319, "grad_norm": 1.1484375, "learning_rate": 0.00019201445336938105, "loss": 0.178, "step": 2752 }, { "epoch": 1.2849474912485415, "grad_norm": 0.88671875, "learning_rate": 0.00019200870655254544, "loss": 0.14, "step": 2753 }, { "epoch": 1.285414235705951, "grad_norm": 1.0390625, "learning_rate": 0.0001920029577546603, "loss": 0.1509, "step": 2754 }, { "epoch": 1.2858809801633606, "grad_norm": 1.0859375, "learning_rate": 0.0001919972069758494, "loss": 0.2064, "step": 2755 }, { "epoch": 1.2863477246207702, "grad_norm": 0.9296875, "learning_rate": 0.00019199145421623656, "loss": 0.192, "step": 2756 }, { "epoch": 1.2868144690781798, "grad_norm": 0.94140625, "learning_rate": 0.00019198569947594566, "loss": 0.1495, "step": 2757 }, { "epoch": 1.2872812135355893, "grad_norm": 0.92578125, "learning_rate": 0.00019197994275510055, "loss": 0.1765, "step": 2758 }, { "epoch": 1.287747957992999, "grad_norm": 0.94921875, "learning_rate": 0.00019197418405382527, "loss": 0.1473, "step": 2759 }, { "epoch": 1.2882147024504085, "grad_norm": 1.1171875, "learning_rate": 0.00019196842337224372, "loss": 0.175, "step": 2760 }, { "epoch": 1.288681446907818, "grad_norm": 0.95703125, "learning_rate": 0.00019196266071047998, "loss": 0.1873, "step": 2761 }, { "epoch": 1.2891481913652276, "grad_norm": 0.94140625, "learning_rate": 0.00019195689606865812, "loss": 0.169, "step": 2762 }, { "epoch": 1.289614935822637, "grad_norm": 1.09375, "learning_rate": 0.00019195112944690225, "loss": 0.1428, "step": 2763 }, { "epoch": 1.2900816802800468, "grad_norm": 0.90234375, "learning_rate": 0.00019194536084533658, "loss": 0.1059, "step": 2764 }, { "epoch": 1.2905484247374561, "grad_norm": 1.1015625, "learning_rate": 0.00019193959026408524, "loss": 0.1372, "step": 2765 }, { "epoch": 1.291015169194866, "grad_norm": 1.1640625, "learning_rate": 0.00019193381770327253, "loss": 0.2027, "step": 2766 }, { "epoch": 1.2914819136522753, "grad_norm": 1.1796875, "learning_rate": 0.00019192804316302273, "loss": 0.2615, "step": 2767 }, { "epoch": 1.291948658109685, "grad_norm": 1.21875, "learning_rate": 0.00019192226664346018, "loss": 0.1939, "step": 2768 }, { "epoch": 1.2924154025670944, "grad_norm": 1.1171875, "learning_rate": 0.0001919164881447092, "loss": 0.2044, "step": 2769 }, { "epoch": 1.2928821470245042, "grad_norm": 0.97265625, "learning_rate": 0.00019191070766689427, "loss": 0.1697, "step": 2770 }, { "epoch": 1.2933488914819136, "grad_norm": 1.0390625, "learning_rate": 0.0001919049252101398, "loss": 0.1932, "step": 2771 }, { "epoch": 1.2938156359393231, "grad_norm": 1.0390625, "learning_rate": 0.00019189914077457037, "loss": 0.1852, "step": 2772 }, { "epoch": 1.2942823803967327, "grad_norm": 0.9921875, "learning_rate": 0.00019189335436031044, "loss": 0.2019, "step": 2773 }, { "epoch": 1.2947491248541423, "grad_norm": 0.83984375, "learning_rate": 0.00019188756596748462, "loss": 0.1645, "step": 2774 }, { "epoch": 1.2952158693115519, "grad_norm": 0.90234375, "learning_rate": 0.0001918817755962176, "loss": 0.1413, "step": 2775 }, { "epoch": 1.2956826137689614, "grad_norm": 0.89453125, "learning_rate": 0.00019187598324663395, "loss": 0.1621, "step": 2776 }, { "epoch": 1.296149358226371, "grad_norm": 1.0625, "learning_rate": 0.00019187018891885847, "loss": 0.1889, "step": 2777 }, { "epoch": 1.2966161026837806, "grad_norm": 0.984375, "learning_rate": 0.0001918643926130159, "loss": 0.2034, "step": 2778 }, { "epoch": 1.2970828471411902, "grad_norm": 1.0, "learning_rate": 0.00019185859432923103, "loss": 0.195, "step": 2779 }, { "epoch": 1.2975495915985997, "grad_norm": 1.0703125, "learning_rate": 0.0001918527940676287, "loss": 0.2068, "step": 2780 }, { "epoch": 1.2980163360560093, "grad_norm": 1.0625, "learning_rate": 0.00019184699182833378, "loss": 0.2034, "step": 2781 }, { "epoch": 1.2984830805134189, "grad_norm": 0.9140625, "learning_rate": 0.00019184118761147125, "loss": 0.2015, "step": 2782 }, { "epoch": 1.2989498249708284, "grad_norm": 1.1015625, "learning_rate": 0.00019183538141716603, "loss": 0.1963, "step": 2783 }, { "epoch": 1.299416569428238, "grad_norm": 1.1015625, "learning_rate": 0.00019182957324554315, "loss": 0.2142, "step": 2784 }, { "epoch": 1.2998833138856476, "grad_norm": 1.0859375, "learning_rate": 0.00019182376309672772, "loss": 0.1844, "step": 2785 }, { "epoch": 1.3003500583430572, "grad_norm": 1.03125, "learning_rate": 0.00019181795097084474, "loss": 0.1997, "step": 2786 }, { "epoch": 1.3008168028004667, "grad_norm": 0.94921875, "learning_rate": 0.0001918121368680194, "loss": 0.1859, "step": 2787 }, { "epoch": 1.3012835472578763, "grad_norm": 0.8203125, "learning_rate": 0.00019180632078837694, "loss": 0.1309, "step": 2788 }, { "epoch": 1.301750291715286, "grad_norm": 1.0859375, "learning_rate": 0.00019180050273204252, "loss": 0.1696, "step": 2789 }, { "epoch": 1.3022170361726955, "grad_norm": 1.0078125, "learning_rate": 0.00019179468269914137, "loss": 0.1662, "step": 2790 }, { "epoch": 1.302683780630105, "grad_norm": 1.109375, "learning_rate": 0.0001917888606897989, "loss": 0.1974, "step": 2791 }, { "epoch": 1.3031505250875146, "grad_norm": 0.87109375, "learning_rate": 0.0001917830367041404, "loss": 0.1475, "step": 2792 }, { "epoch": 1.3036172695449242, "grad_norm": 1.0234375, "learning_rate": 0.00019177721074229126, "loss": 0.2007, "step": 2793 }, { "epoch": 1.3040840140023338, "grad_norm": 0.84765625, "learning_rate": 0.000191771382804377, "loss": 0.1717, "step": 2794 }, { "epoch": 1.3045507584597433, "grad_norm": 0.97265625, "learning_rate": 0.00019176555289052301, "loss": 0.1578, "step": 2795 }, { "epoch": 1.305017502917153, "grad_norm": 0.9453125, "learning_rate": 0.00019175972100085486, "loss": 0.1874, "step": 2796 }, { "epoch": 1.3054842473745625, "grad_norm": 1.1640625, "learning_rate": 0.00019175388713549808, "loss": 0.1612, "step": 2797 }, { "epoch": 1.305950991831972, "grad_norm": 1.1640625, "learning_rate": 0.00019174805129457836, "loss": 0.2134, "step": 2798 }, { "epoch": 1.3064177362893816, "grad_norm": 1.0, "learning_rate": 0.00019174221347822125, "loss": 0.1829, "step": 2799 }, { "epoch": 1.3068844807467912, "grad_norm": 1.0390625, "learning_rate": 0.00019173637368655254, "loss": 0.1626, "step": 2800 }, { "epoch": 1.3073512252042008, "grad_norm": 1.09375, "learning_rate": 0.00019173053191969789, "loss": 0.188, "step": 2801 }, { "epoch": 1.3078179696616103, "grad_norm": 0.91796875, "learning_rate": 0.00019172468817778312, "loss": 0.1982, "step": 2802 }, { "epoch": 1.30828471411902, "grad_norm": 0.921875, "learning_rate": 0.00019171884246093403, "loss": 0.1656, "step": 2803 }, { "epoch": 1.3087514585764293, "grad_norm": 1.0234375, "learning_rate": 0.0001917129947692765, "loss": 0.2287, "step": 2804 }, { "epoch": 1.309218203033839, "grad_norm": 1.046875, "learning_rate": 0.00019170714510293645, "loss": 0.2813, "step": 2805 }, { "epoch": 1.3096849474912484, "grad_norm": 0.8203125, "learning_rate": 0.0001917012934620398, "loss": 0.131, "step": 2806 }, { "epoch": 1.3101516919486582, "grad_norm": 0.90234375, "learning_rate": 0.00019169543984671257, "loss": 0.1602, "step": 2807 }, { "epoch": 1.3106184364060676, "grad_norm": 1.015625, "learning_rate": 0.0001916895842570808, "loss": 0.1949, "step": 2808 }, { "epoch": 1.3110851808634774, "grad_norm": 0.88671875, "learning_rate": 0.0001916837266932705, "loss": 0.1493, "step": 2809 }, { "epoch": 1.3115519253208867, "grad_norm": 0.84765625, "learning_rate": 0.00019167786715540788, "loss": 0.1412, "step": 2810 }, { "epoch": 1.3120186697782965, "grad_norm": 0.80859375, "learning_rate": 0.00019167200564361905, "loss": 0.1374, "step": 2811 }, { "epoch": 1.3124854142357059, "grad_norm": 1.046875, "learning_rate": 0.00019166614215803022, "loss": 0.2021, "step": 2812 }, { "epoch": 1.3129521586931157, "grad_norm": 0.98046875, "learning_rate": 0.00019166027669876768, "loss": 0.1328, "step": 2813 }, { "epoch": 1.313418903150525, "grad_norm": 1.0859375, "learning_rate": 0.00019165440926595764, "loss": 0.1806, "step": 2814 }, { "epoch": 1.3138856476079346, "grad_norm": 1.15625, "learning_rate": 0.0001916485398597265, "loss": 0.211, "step": 2815 }, { "epoch": 1.3143523920653442, "grad_norm": 1.125, "learning_rate": 0.0001916426684802006, "loss": 0.1906, "step": 2816 }, { "epoch": 1.3148191365227537, "grad_norm": 0.96875, "learning_rate": 0.00019163679512750634, "loss": 0.1799, "step": 2817 }, { "epoch": 1.3152858809801633, "grad_norm": 1.0390625, "learning_rate": 0.00019163091980177024, "loss": 0.1887, "step": 2818 }, { "epoch": 1.3157526254375729, "grad_norm": 1.0546875, "learning_rate": 0.0001916250425031188, "loss": 0.1652, "step": 2819 }, { "epoch": 1.3162193698949824, "grad_norm": 1.109375, "learning_rate": 0.00019161916323167847, "loss": 0.1971, "step": 2820 }, { "epoch": 1.316686114352392, "grad_norm": 0.796875, "learning_rate": 0.00019161328198757596, "loss": 0.1169, "step": 2821 }, { "epoch": 1.3171528588098016, "grad_norm": 0.89453125, "learning_rate": 0.00019160739877093782, "loss": 0.1541, "step": 2822 }, { "epoch": 1.3176196032672112, "grad_norm": 1.0546875, "learning_rate": 0.0001916015135818907, "loss": 0.1941, "step": 2823 }, { "epoch": 1.3180863477246207, "grad_norm": 0.95703125, "learning_rate": 0.00019159562642056144, "loss": 0.1527, "step": 2824 }, { "epoch": 1.3185530921820303, "grad_norm": 1.078125, "learning_rate": 0.00019158973728707667, "loss": 0.2008, "step": 2825 }, { "epoch": 1.31901983663944, "grad_norm": 0.90234375, "learning_rate": 0.00019158384618156324, "loss": 0.1558, "step": 2826 }, { "epoch": 1.3194865810968495, "grad_norm": 0.9609375, "learning_rate": 0.000191577953104148, "loss": 0.164, "step": 2827 }, { "epoch": 1.319953325554259, "grad_norm": 0.9296875, "learning_rate": 0.0001915720580549578, "loss": 0.1704, "step": 2828 }, { "epoch": 1.3204200700116686, "grad_norm": 0.9609375, "learning_rate": 0.0001915661610341196, "loss": 0.1499, "step": 2829 }, { "epoch": 1.3208868144690782, "grad_norm": 1.0625, "learning_rate": 0.00019156026204176038, "loss": 0.2331, "step": 2830 }, { "epoch": 1.3213535589264878, "grad_norm": 0.84375, "learning_rate": 0.0001915543610780071, "loss": 0.1358, "step": 2831 }, { "epoch": 1.3218203033838973, "grad_norm": 0.90625, "learning_rate": 0.00019154845814298686, "loss": 0.165, "step": 2832 }, { "epoch": 1.322287047841307, "grad_norm": 1.0546875, "learning_rate": 0.00019154255323682672, "loss": 0.1723, "step": 2833 }, { "epoch": 1.3227537922987165, "grad_norm": 1.0703125, "learning_rate": 0.00019153664635965386, "loss": 0.2014, "step": 2834 }, { "epoch": 1.323220536756126, "grad_norm": 0.93359375, "learning_rate": 0.00019153073751159545, "loss": 0.1423, "step": 2835 }, { "epoch": 1.3236872812135356, "grad_norm": 0.96875, "learning_rate": 0.0001915248266927787, "loss": 0.1277, "step": 2836 }, { "epoch": 1.3241540256709452, "grad_norm": 0.90625, "learning_rate": 0.00019151891390333087, "loss": 0.115, "step": 2837 }, { "epoch": 1.3246207701283548, "grad_norm": 1.109375, "learning_rate": 0.00019151299914337927, "loss": 0.1544, "step": 2838 }, { "epoch": 1.3250875145857643, "grad_norm": 1.0390625, "learning_rate": 0.0001915070824130513, "loss": 0.1786, "step": 2839 }, { "epoch": 1.325554259043174, "grad_norm": 1.2265625, "learning_rate": 0.0001915011637124743, "loss": 0.1805, "step": 2840 }, { "epoch": 1.3260210035005835, "grad_norm": 1.09375, "learning_rate": 0.00019149524304177572, "loss": 0.1552, "step": 2841 }, { "epoch": 1.326487747957993, "grad_norm": 0.98828125, "learning_rate": 0.00019148932040108306, "loss": 0.1509, "step": 2842 }, { "epoch": 1.3269544924154026, "grad_norm": 1.109375, "learning_rate": 0.0001914833957905238, "loss": 0.171, "step": 2843 }, { "epoch": 1.3274212368728122, "grad_norm": 1.1171875, "learning_rate": 0.00019147746921022553, "loss": 0.1814, "step": 2844 }, { "epoch": 1.3278879813302216, "grad_norm": 1.0703125, "learning_rate": 0.00019147154066031586, "loss": 0.1858, "step": 2845 }, { "epoch": 1.3283547257876314, "grad_norm": 1.0859375, "learning_rate": 0.00019146561014092242, "loss": 0.1424, "step": 2846 }, { "epoch": 1.3288214702450407, "grad_norm": 0.9140625, "learning_rate": 0.00019145967765217293, "loss": 0.1424, "step": 2847 }, { "epoch": 1.3292882147024505, "grad_norm": 1.1328125, "learning_rate": 0.00019145374319419508, "loss": 0.1789, "step": 2848 }, { "epoch": 1.3297549591598599, "grad_norm": 1.015625, "learning_rate": 0.00019144780676711668, "loss": 0.1707, "step": 2849 }, { "epoch": 1.3302217036172697, "grad_norm": 1.171875, "learning_rate": 0.00019144186837106552, "loss": 0.195, "step": 2850 }, { "epoch": 1.330688448074679, "grad_norm": 1.0234375, "learning_rate": 0.00019143592800616952, "loss": 0.2097, "step": 2851 }, { "epoch": 1.3311551925320888, "grad_norm": 0.86328125, "learning_rate": 0.0001914299856725565, "loss": 0.1411, "step": 2852 }, { "epoch": 1.3316219369894982, "grad_norm": 0.88671875, "learning_rate": 0.00019142404137035448, "loss": 0.1315, "step": 2853 }, { "epoch": 1.332088681446908, "grad_norm": 1.1640625, "learning_rate": 0.00019141809509969136, "loss": 0.2245, "step": 2854 }, { "epoch": 1.3325554259043173, "grad_norm": 0.953125, "learning_rate": 0.00019141214686069525, "loss": 0.1475, "step": 2855 }, { "epoch": 1.3330221703617269, "grad_norm": 1.0234375, "learning_rate": 0.00019140619665349422, "loss": 0.1923, "step": 2856 }, { "epoch": 1.3334889148191365, "grad_norm": 1.0078125, "learning_rate": 0.00019140024447821633, "loss": 0.1726, "step": 2857 }, { "epoch": 1.333955659276546, "grad_norm": 0.87109375, "learning_rate": 0.00019139429033498978, "loss": 0.1477, "step": 2858 }, { "epoch": 1.3344224037339556, "grad_norm": 1.0234375, "learning_rate": 0.00019138833422394276, "loss": 0.2183, "step": 2859 }, { "epoch": 1.3348891481913652, "grad_norm": 1.0078125, "learning_rate": 0.00019138237614520348, "loss": 0.1677, "step": 2860 }, { "epoch": 1.3353558926487747, "grad_norm": 1.0078125, "learning_rate": 0.00019137641609890028, "loss": 0.2039, "step": 2861 }, { "epoch": 1.3358226371061843, "grad_norm": 1.3046875, "learning_rate": 0.00019137045408516143, "loss": 0.2137, "step": 2862 }, { "epoch": 1.336289381563594, "grad_norm": 1.2421875, "learning_rate": 0.00019136449010411535, "loss": 0.2071, "step": 2863 }, { "epoch": 1.3367561260210035, "grad_norm": 1.265625, "learning_rate": 0.00019135852415589044, "loss": 0.1962, "step": 2864 }, { "epoch": 1.337222870478413, "grad_norm": 1.0390625, "learning_rate": 0.0001913525562406151, "loss": 0.1582, "step": 2865 }, { "epoch": 1.3376896149358226, "grad_norm": 0.96875, "learning_rate": 0.00019134658635841788, "loss": 0.1801, "step": 2866 }, { "epoch": 1.3381563593932322, "grad_norm": 0.9375, "learning_rate": 0.00019134061450942733, "loss": 0.1776, "step": 2867 }, { "epoch": 1.3386231038506418, "grad_norm": 0.953125, "learning_rate": 0.00019133464069377197, "loss": 0.1394, "step": 2868 }, { "epoch": 1.3390898483080513, "grad_norm": 1.1640625, "learning_rate": 0.0001913286649115805, "loss": 0.2525, "step": 2869 }, { "epoch": 1.339556592765461, "grad_norm": 0.9296875, "learning_rate": 0.0001913226871629815, "loss": 0.1332, "step": 2870 }, { "epoch": 1.3400233372228705, "grad_norm": 0.90625, "learning_rate": 0.00019131670744810376, "loss": 0.194, "step": 2871 }, { "epoch": 1.34049008168028, "grad_norm": 0.984375, "learning_rate": 0.00019131072576707595, "loss": 0.1408, "step": 2872 }, { "epoch": 1.3409568261376896, "grad_norm": 1.0859375, "learning_rate": 0.0001913047421200269, "loss": 0.2097, "step": 2873 }, { "epoch": 1.3414235705950992, "grad_norm": 0.94921875, "learning_rate": 0.00019129875650708547, "loss": 0.1535, "step": 2874 }, { "epoch": 1.3418903150525088, "grad_norm": 0.91796875, "learning_rate": 0.00019129276892838055, "loss": 0.1313, "step": 2875 }, { "epoch": 1.3423570595099183, "grad_norm": 1.0546875, "learning_rate": 0.00019128677938404099, "loss": 0.1595, "step": 2876 }, { "epoch": 1.342823803967328, "grad_norm": 1.1015625, "learning_rate": 0.0001912807878741958, "loss": 0.1708, "step": 2877 }, { "epoch": 1.3432905484247375, "grad_norm": 1.1796875, "learning_rate": 0.00019127479439897394, "loss": 0.1833, "step": 2878 }, { "epoch": 1.343757292882147, "grad_norm": 0.98828125, "learning_rate": 0.0001912687989585045, "loss": 0.2133, "step": 2879 }, { "epoch": 1.3442240373395566, "grad_norm": 0.8984375, "learning_rate": 0.00019126280155291653, "loss": 0.1526, "step": 2880 }, { "epoch": 1.3446907817969662, "grad_norm": 1.0234375, "learning_rate": 0.00019125680218233924, "loss": 0.1742, "step": 2881 }, { "epoch": 1.3451575262543758, "grad_norm": 0.921875, "learning_rate": 0.0001912508008469017, "loss": 0.1903, "step": 2882 }, { "epoch": 1.3456242707117854, "grad_norm": 1.1796875, "learning_rate": 0.00019124479754673322, "loss": 0.201, "step": 2883 }, { "epoch": 1.346091015169195, "grad_norm": 1.140625, "learning_rate": 0.00019123879228196295, "loss": 0.1898, "step": 2884 }, { "epoch": 1.3465577596266045, "grad_norm": 1.1484375, "learning_rate": 0.0001912327850527203, "loss": 0.1923, "step": 2885 }, { "epoch": 1.347024504084014, "grad_norm": 0.8828125, "learning_rate": 0.00019122677585913452, "loss": 0.1444, "step": 2886 }, { "epoch": 1.3474912485414237, "grad_norm": 1.171875, "learning_rate": 0.0001912207647013351, "loss": 0.1765, "step": 2887 }, { "epoch": 1.347957992998833, "grad_norm": 0.8359375, "learning_rate": 0.00019121475157945133, "loss": 0.1321, "step": 2888 }, { "epoch": 1.3484247374562428, "grad_norm": 0.9453125, "learning_rate": 0.00019120873649361282, "loss": 0.2113, "step": 2889 }, { "epoch": 1.3488914819136522, "grad_norm": 0.8671875, "learning_rate": 0.00019120271944394896, "loss": 0.171, "step": 2890 }, { "epoch": 1.349358226371062, "grad_norm": 1.0390625, "learning_rate": 0.00019119670043058943, "loss": 0.1867, "step": 2891 }, { "epoch": 1.3498249708284713, "grad_norm": 0.9375, "learning_rate": 0.00019119067945366368, "loss": 0.1535, "step": 2892 }, { "epoch": 1.350291715285881, "grad_norm": 0.9296875, "learning_rate": 0.00019118465651330146, "loss": 0.1381, "step": 2893 }, { "epoch": 1.3507584597432905, "grad_norm": 1.015625, "learning_rate": 0.0001911786316096324, "loss": 0.1791, "step": 2894 }, { "epoch": 1.3512252042007002, "grad_norm": 0.99609375, "learning_rate": 0.00019117260474278623, "loss": 0.1802, "step": 2895 }, { "epoch": 1.3516919486581096, "grad_norm": 0.87890625, "learning_rate": 0.00019116657591289278, "loss": 0.1122, "step": 2896 }, { "epoch": 1.3521586931155192, "grad_norm": 0.8515625, "learning_rate": 0.00019116054512008176, "loss": 0.1794, "step": 2897 }, { "epoch": 1.3526254375729287, "grad_norm": 1.09375, "learning_rate": 0.00019115451236448305, "loss": 0.1581, "step": 2898 }, { "epoch": 1.3530921820303383, "grad_norm": 0.8203125, "learning_rate": 0.00019114847764622653, "loss": 0.1433, "step": 2899 }, { "epoch": 1.353558926487748, "grad_norm": 1.0859375, "learning_rate": 0.00019114244096544221, "loss": 0.1771, "step": 2900 }, { "epoch": 1.3540256709451575, "grad_norm": 1.1015625, "learning_rate": 0.00019113640232226, "loss": 0.2008, "step": 2901 }, { "epoch": 1.354492415402567, "grad_norm": 1.203125, "learning_rate": 0.0001911303617168099, "loss": 0.2152, "step": 2902 }, { "epoch": 1.3549591598599766, "grad_norm": 0.921875, "learning_rate": 0.00019112431914922203, "loss": 0.1475, "step": 2903 }, { "epoch": 1.3554259043173862, "grad_norm": 1.1171875, "learning_rate": 0.00019111827461962643, "loss": 0.1559, "step": 2904 }, { "epoch": 1.3558926487747958, "grad_norm": 1.015625, "learning_rate": 0.0001911122281281533, "loss": 0.148, "step": 2905 }, { "epoch": 1.3563593932322053, "grad_norm": 1.0078125, "learning_rate": 0.0001911061796749328, "loss": 0.1458, "step": 2906 }, { "epoch": 1.356826137689615, "grad_norm": 1.09375, "learning_rate": 0.0001911001292600952, "loss": 0.1826, "step": 2907 }, { "epoch": 1.3572928821470245, "grad_norm": 1.0390625, "learning_rate": 0.00019109407688377072, "loss": 0.1566, "step": 2908 }, { "epoch": 1.357759626604434, "grad_norm": 1.171875, "learning_rate": 0.00019108802254608968, "loss": 0.2108, "step": 2909 }, { "epoch": 1.3582263710618436, "grad_norm": 1.1171875, "learning_rate": 0.00019108196624718245, "loss": 0.194, "step": 2910 }, { "epoch": 1.3586931155192532, "grad_norm": 0.91015625, "learning_rate": 0.00019107590798717947, "loss": 0.1409, "step": 2911 }, { "epoch": 1.3591598599766628, "grad_norm": 0.83203125, "learning_rate": 0.0001910698477662111, "loss": 0.1518, "step": 2912 }, { "epoch": 1.3596266044340724, "grad_norm": 0.9296875, "learning_rate": 0.00019106378558440788, "loss": 0.1766, "step": 2913 }, { "epoch": 1.360093348891482, "grad_norm": 1.03125, "learning_rate": 0.00019105772144190032, "loss": 0.1622, "step": 2914 }, { "epoch": 1.3605600933488915, "grad_norm": 1.125, "learning_rate": 0.000191051655338819, "loss": 0.2193, "step": 2915 }, { "epoch": 1.361026837806301, "grad_norm": 1.21875, "learning_rate": 0.00019104558727529446, "loss": 0.2111, "step": 2916 }, { "epoch": 1.3614935822637106, "grad_norm": 1.1328125, "learning_rate": 0.00019103951725145746, "loss": 0.1782, "step": 2917 }, { "epoch": 1.3619603267211202, "grad_norm": 0.78515625, "learning_rate": 0.00019103344526743863, "loss": 0.1424, "step": 2918 }, { "epoch": 1.3624270711785298, "grad_norm": 0.91015625, "learning_rate": 0.00019102737132336875, "loss": 0.2199, "step": 2919 }, { "epoch": 1.3628938156359394, "grad_norm": 0.91015625, "learning_rate": 0.0001910212954193785, "loss": 0.1626, "step": 2920 }, { "epoch": 1.363360560093349, "grad_norm": 1.0625, "learning_rate": 0.00019101521755559883, "loss": 0.1582, "step": 2921 }, { "epoch": 1.3638273045507585, "grad_norm": 0.8828125, "learning_rate": 0.00019100913773216052, "loss": 0.1316, "step": 2922 }, { "epoch": 1.364294049008168, "grad_norm": 0.8984375, "learning_rate": 0.0001910030559491945, "loss": 0.1846, "step": 2923 }, { "epoch": 1.3647607934655777, "grad_norm": 0.89453125, "learning_rate": 0.00019099697220683172, "loss": 0.1509, "step": 2924 }, { "epoch": 1.3652275379229872, "grad_norm": 0.921875, "learning_rate": 0.00019099088650520316, "loss": 0.1865, "step": 2925 }, { "epoch": 1.3656942823803968, "grad_norm": 1.2265625, "learning_rate": 0.00019098479884443984, "loss": 0.2182, "step": 2926 }, { "epoch": 1.3661610268378064, "grad_norm": 1.140625, "learning_rate": 0.0001909787092246729, "loss": 0.1925, "step": 2927 }, { "epoch": 1.366627771295216, "grad_norm": 1.015625, "learning_rate": 0.00019097261764603339, "loss": 0.139, "step": 2928 }, { "epoch": 1.3670945157526253, "grad_norm": 1.046875, "learning_rate": 0.00019096652410865246, "loss": 0.1821, "step": 2929 }, { "epoch": 1.367561260210035, "grad_norm": 1.09375, "learning_rate": 0.00019096042861266134, "loss": 0.1595, "step": 2930 }, { "epoch": 1.3680280046674445, "grad_norm": 1.0859375, "learning_rate": 0.0001909543311581913, "loss": 0.1841, "step": 2931 }, { "epoch": 1.3684947491248542, "grad_norm": 1.171875, "learning_rate": 0.00019094823174537357, "loss": 0.192, "step": 2932 }, { "epoch": 1.3689614935822636, "grad_norm": 0.9296875, "learning_rate": 0.00019094213037433951, "loss": 0.1815, "step": 2933 }, { "epoch": 1.3694282380396734, "grad_norm": 0.9609375, "learning_rate": 0.00019093602704522048, "loss": 0.1609, "step": 2934 }, { "epoch": 1.3698949824970827, "grad_norm": 0.99609375, "learning_rate": 0.0001909299217581479, "loss": 0.1293, "step": 2935 }, { "epoch": 1.3703617269544925, "grad_norm": 0.97265625, "learning_rate": 0.00019092381451325322, "loss": 0.138, "step": 2936 }, { "epoch": 1.370828471411902, "grad_norm": 1.171875, "learning_rate": 0.0001909177053106679, "loss": 0.1629, "step": 2937 }, { "epoch": 1.3712952158693117, "grad_norm": 0.81640625, "learning_rate": 0.00019091159415052353, "loss": 0.127, "step": 2938 }, { "epoch": 1.371761960326721, "grad_norm": 1.0078125, "learning_rate": 0.00019090548103295167, "loss": 0.1893, "step": 2939 }, { "epoch": 1.3722287047841306, "grad_norm": 0.98828125, "learning_rate": 0.00019089936595808395, "loss": 0.1606, "step": 2940 }, { "epoch": 1.3726954492415402, "grad_norm": 1.0703125, "learning_rate": 0.000190893248926052, "loss": 0.1806, "step": 2941 }, { "epoch": 1.3731621936989498, "grad_norm": 0.88671875, "learning_rate": 0.00019088712993698757, "loss": 0.1469, "step": 2942 }, { "epoch": 1.3736289381563593, "grad_norm": 1.015625, "learning_rate": 0.0001908810089910224, "loss": 0.1875, "step": 2943 }, { "epoch": 1.374095682613769, "grad_norm": 1.078125, "learning_rate": 0.00019087488608828825, "loss": 0.1216, "step": 2944 }, { "epoch": 1.3745624270711785, "grad_norm": 1.2578125, "learning_rate": 0.00019086876122891702, "loss": 0.1995, "step": 2945 }, { "epoch": 1.375029171528588, "grad_norm": 0.8984375, "learning_rate": 0.0001908626344130405, "loss": 0.1312, "step": 2946 }, { "epoch": 1.3754959159859976, "grad_norm": 0.9140625, "learning_rate": 0.00019085650564079065, "loss": 0.1448, "step": 2947 }, { "epoch": 1.3759626604434072, "grad_norm": 1.015625, "learning_rate": 0.00019085037491229945, "loss": 0.1716, "step": 2948 }, { "epoch": 1.3764294049008168, "grad_norm": 1.2890625, "learning_rate": 0.00019084424222769885, "loss": 0.25, "step": 2949 }, { "epoch": 1.3768961493582264, "grad_norm": 1.0859375, "learning_rate": 0.00019083810758712094, "loss": 0.1126, "step": 2950 }, { "epoch": 1.377362893815636, "grad_norm": 1.0859375, "learning_rate": 0.00019083197099069776, "loss": 0.1775, "step": 2951 }, { "epoch": 1.3778296382730455, "grad_norm": 1.0859375, "learning_rate": 0.00019082583243856149, "loss": 0.2081, "step": 2952 }, { "epoch": 1.378296382730455, "grad_norm": 1.1796875, "learning_rate": 0.00019081969193084425, "loss": 0.2033, "step": 2953 }, { "epoch": 1.3787631271878646, "grad_norm": 0.85546875, "learning_rate": 0.0001908135494676783, "loss": 0.1293, "step": 2954 }, { "epoch": 1.3792298716452742, "grad_norm": 0.80859375, "learning_rate": 0.00019080740504919588, "loss": 0.1306, "step": 2955 }, { "epoch": 1.3796966161026838, "grad_norm": 1.0859375, "learning_rate": 0.00019080125867552926, "loss": 0.167, "step": 2956 }, { "epoch": 1.3801633605600934, "grad_norm": 1.1171875, "learning_rate": 0.00019079511034681082, "loss": 0.1967, "step": 2957 }, { "epoch": 1.380630105017503, "grad_norm": 0.98046875, "learning_rate": 0.00019078896006317287, "loss": 0.1147, "step": 2958 }, { "epoch": 1.3810968494749125, "grad_norm": 0.88671875, "learning_rate": 0.00019078280782474792, "loss": 0.1934, "step": 2959 }, { "epoch": 1.381563593932322, "grad_norm": 1.1796875, "learning_rate": 0.00019077665363166838, "loss": 0.1559, "step": 2960 }, { "epoch": 1.3820303383897317, "grad_norm": 1.015625, "learning_rate": 0.00019077049748406674, "loss": 0.1676, "step": 2961 }, { "epoch": 1.3824970828471412, "grad_norm": 0.98828125, "learning_rate": 0.0001907643393820756, "loss": 0.1637, "step": 2962 }, { "epoch": 1.3829638273045508, "grad_norm": 0.890625, "learning_rate": 0.0001907581793258275, "loss": 0.1129, "step": 2963 }, { "epoch": 1.3834305717619604, "grad_norm": 1.0625, "learning_rate": 0.00019075201731545515, "loss": 0.163, "step": 2964 }, { "epoch": 1.38389731621937, "grad_norm": 1.125, "learning_rate": 0.00019074585335109116, "loss": 0.1903, "step": 2965 }, { "epoch": 1.3843640606767795, "grad_norm": 0.8046875, "learning_rate": 0.00019073968743286824, "loss": 0.1699, "step": 2966 }, { "epoch": 1.384830805134189, "grad_norm": 1.125, "learning_rate": 0.0001907335195609192, "loss": 0.203, "step": 2967 }, { "epoch": 1.3852975495915987, "grad_norm": 0.7578125, "learning_rate": 0.0001907273497353768, "loss": 0.1403, "step": 2968 }, { "epoch": 1.3857642940490082, "grad_norm": 1.1875, "learning_rate": 0.00019072117795637393, "loss": 0.1947, "step": 2969 }, { "epoch": 1.3862310385064176, "grad_norm": 1.0234375, "learning_rate": 0.00019071500422404342, "loss": 0.1703, "step": 2970 }, { "epoch": 1.3866977829638274, "grad_norm": 0.93359375, "learning_rate": 0.00019070882853851817, "loss": 0.1536, "step": 2971 }, { "epoch": 1.3871645274212367, "grad_norm": 0.94140625, "learning_rate": 0.00019070265089993124, "loss": 0.1394, "step": 2972 }, { "epoch": 1.3876312718786465, "grad_norm": 0.8671875, "learning_rate": 0.00019069647130841563, "loss": 0.1803, "step": 2973 }, { "epoch": 1.388098016336056, "grad_norm": 0.92578125, "learning_rate": 0.0001906902897641043, "loss": 0.206, "step": 2974 }, { "epoch": 1.3885647607934657, "grad_norm": 1.0234375, "learning_rate": 0.00019068410626713048, "loss": 0.1582, "step": 2975 }, { "epoch": 1.389031505250875, "grad_norm": 0.99609375, "learning_rate": 0.00019067792081762716, "loss": 0.1771, "step": 2976 }, { "epoch": 1.3894982497082848, "grad_norm": 0.87890625, "learning_rate": 0.00019067173341572765, "loss": 0.155, "step": 2977 }, { "epoch": 1.3899649941656942, "grad_norm": 0.9375, "learning_rate": 0.0001906655440615651, "loss": 0.1885, "step": 2978 }, { "epoch": 1.390431738623104, "grad_norm": 0.91015625, "learning_rate": 0.00019065935275527278, "loss": 0.1256, "step": 2979 }, { "epoch": 1.3908984830805133, "grad_norm": 0.90625, "learning_rate": 0.00019065315949698403, "loss": 0.1455, "step": 2980 }, { "epoch": 1.391365227537923, "grad_norm": 0.9765625, "learning_rate": 0.00019064696428683218, "loss": 0.1925, "step": 2981 }, { "epoch": 1.3918319719953325, "grad_norm": 0.96484375, "learning_rate": 0.0001906407671249506, "loss": 0.106, "step": 2982 }, { "epoch": 1.392298716452742, "grad_norm": 1.0703125, "learning_rate": 0.00019063456801147273, "loss": 0.1871, "step": 2983 }, { "epoch": 1.3927654609101516, "grad_norm": 1.15625, "learning_rate": 0.00019062836694653203, "loss": 0.1697, "step": 2984 }, { "epoch": 1.3932322053675612, "grad_norm": 1.078125, "learning_rate": 0.00019062216393026207, "loss": 0.1737, "step": 2985 }, { "epoch": 1.3936989498249708, "grad_norm": 1.1640625, "learning_rate": 0.00019061595896279637, "loss": 0.1855, "step": 2986 }, { "epoch": 1.3941656942823804, "grad_norm": 0.9140625, "learning_rate": 0.0001906097520442685, "loss": 0.1218, "step": 2987 }, { "epoch": 1.39463243873979, "grad_norm": 1.0, "learning_rate": 0.00019060354317481217, "loss": 0.1566, "step": 2988 }, { "epoch": 1.3950991831971995, "grad_norm": 1.1953125, "learning_rate": 0.00019059733235456103, "loss": 0.1784, "step": 2989 }, { "epoch": 1.395565927654609, "grad_norm": 1.0390625, "learning_rate": 0.0001905911195836488, "loss": 0.1398, "step": 2990 }, { "epoch": 1.3960326721120186, "grad_norm": 1.015625, "learning_rate": 0.00019058490486220927, "loss": 0.1513, "step": 2991 }, { "epoch": 1.3964994165694282, "grad_norm": 1.1328125, "learning_rate": 0.0001905786881903762, "loss": 0.1531, "step": 2992 }, { "epoch": 1.3969661610268378, "grad_norm": 1.03125, "learning_rate": 0.00019057246956828347, "loss": 0.1282, "step": 2993 }, { "epoch": 1.3974329054842474, "grad_norm": 1.078125, "learning_rate": 0.00019056624899606498, "loss": 0.179, "step": 2994 }, { "epoch": 1.397899649941657, "grad_norm": 1.0703125, "learning_rate": 0.0001905600264738547, "loss": 0.2017, "step": 2995 }, { "epoch": 1.3983663943990665, "grad_norm": 0.92578125, "learning_rate": 0.00019055380200178653, "loss": 0.1518, "step": 2996 }, { "epoch": 1.398833138856476, "grad_norm": 1.0078125, "learning_rate": 0.00019054757557999456, "loss": 0.1581, "step": 2997 }, { "epoch": 1.3992998833138857, "grad_norm": 1.0234375, "learning_rate": 0.00019054134720861282, "loss": 0.1928, "step": 2998 }, { "epoch": 1.3997666277712952, "grad_norm": 1.03125, "learning_rate": 0.00019053511688777542, "loss": 0.1199, "step": 2999 }, { "epoch": 1.4002333722287048, "grad_norm": 1.0390625, "learning_rate": 0.0001905288846176165, "loss": 0.1489, "step": 3000 }, { "epoch": 1.4007001166861144, "grad_norm": 0.93359375, "learning_rate": 0.00019052265039827022, "loss": 0.1539, "step": 3001 }, { "epoch": 1.401166861143524, "grad_norm": 1.0078125, "learning_rate": 0.00019051641422987087, "loss": 0.1273, "step": 3002 }, { "epoch": 1.4016336056009335, "grad_norm": 1.109375, "learning_rate": 0.00019051017611255267, "loss": 0.1701, "step": 3003 }, { "epoch": 1.402100350058343, "grad_norm": 0.8984375, "learning_rate": 0.00019050393604645, "loss": 0.1478, "step": 3004 }, { "epoch": 1.4025670945157527, "grad_norm": 1.0859375, "learning_rate": 0.00019049769403169716, "loss": 0.1715, "step": 3005 }, { "epoch": 1.4030338389731623, "grad_norm": 0.8671875, "learning_rate": 0.00019049145006842853, "loss": 0.1369, "step": 3006 }, { "epoch": 1.4035005834305718, "grad_norm": 0.96484375, "learning_rate": 0.00019048520415677856, "loss": 0.1648, "step": 3007 }, { "epoch": 1.4039673278879814, "grad_norm": 0.9140625, "learning_rate": 0.0001904789562968818, "loss": 0.1156, "step": 3008 }, { "epoch": 1.404434072345391, "grad_norm": 5.40625, "learning_rate": 0.0001904727064888727, "loss": 0.172, "step": 3009 }, { "epoch": 1.4049008168028005, "grad_norm": 0.9140625, "learning_rate": 0.00019046645473288587, "loss": 0.113, "step": 3010 }, { "epoch": 1.40536756126021, "grad_norm": 1.03125, "learning_rate": 0.00019046020102905589, "loss": 0.1608, "step": 3011 }, { "epoch": 1.4058343057176197, "grad_norm": 0.97265625, "learning_rate": 0.00019045394537751739, "loss": 0.1178, "step": 3012 }, { "epoch": 1.406301050175029, "grad_norm": 1.0234375, "learning_rate": 0.00019044768777840506, "loss": 0.1492, "step": 3013 }, { "epoch": 1.4067677946324388, "grad_norm": 1.1796875, "learning_rate": 0.00019044142823185372, "loss": 0.1734, "step": 3014 }, { "epoch": 1.4072345390898482, "grad_norm": 0.93359375, "learning_rate": 0.00019043516673799806, "loss": 0.1534, "step": 3015 }, { "epoch": 1.407701283547258, "grad_norm": 1.03125, "learning_rate": 0.00019042890329697292, "loss": 0.1573, "step": 3016 }, { "epoch": 1.4081680280046673, "grad_norm": 0.9453125, "learning_rate": 0.00019042263790891318, "loss": 0.1439, "step": 3017 }, { "epoch": 1.4086347724620771, "grad_norm": 1.171875, "learning_rate": 0.00019041637057395369, "loss": 0.1518, "step": 3018 }, { "epoch": 1.4091015169194865, "grad_norm": 1.046875, "learning_rate": 0.00019041010129222942, "loss": 0.1678, "step": 3019 }, { "epoch": 1.4095682613768963, "grad_norm": 1.09375, "learning_rate": 0.0001904038300638754, "loss": 0.1809, "step": 3020 }, { "epoch": 1.4100350058343056, "grad_norm": 1.1171875, "learning_rate": 0.00019039755688902656, "loss": 0.1796, "step": 3021 }, { "epoch": 1.4105017502917152, "grad_norm": 1.0078125, "learning_rate": 0.00019039128176781805, "loss": 0.1495, "step": 3022 }, { "epoch": 1.4109684947491248, "grad_norm": 1.109375, "learning_rate": 0.00019038500470038494, "loss": 0.1861, "step": 3023 }, { "epoch": 1.4114352392065344, "grad_norm": 1.109375, "learning_rate": 0.0001903787256868624, "loss": 0.2207, "step": 3024 }, { "epoch": 1.411901983663944, "grad_norm": 0.9609375, "learning_rate": 0.00019037244472738563, "loss": 0.1352, "step": 3025 }, { "epoch": 1.4123687281213535, "grad_norm": 1.125, "learning_rate": 0.00019036616182208985, "loss": 0.1424, "step": 3026 }, { "epoch": 1.412835472578763, "grad_norm": 0.98046875, "learning_rate": 0.00019035987697111034, "loss": 0.1688, "step": 3027 }, { "epoch": 1.4133022170361726, "grad_norm": 1.0078125, "learning_rate": 0.00019035359017458242, "loss": 0.1725, "step": 3028 }, { "epoch": 1.4137689614935822, "grad_norm": 0.9453125, "learning_rate": 0.0001903473014326415, "loss": 0.1478, "step": 3029 }, { "epoch": 1.4142357059509918, "grad_norm": 0.93359375, "learning_rate": 0.00019034101074542285, "loss": 0.1358, "step": 3030 }, { "epoch": 1.4147024504084014, "grad_norm": 1.0703125, "learning_rate": 0.00019033471811306205, "loss": 0.1742, "step": 3031 }, { "epoch": 1.415169194865811, "grad_norm": 1.09375, "learning_rate": 0.00019032842353569455, "loss": 0.1807, "step": 3032 }, { "epoch": 1.4156359393232205, "grad_norm": 1.015625, "learning_rate": 0.00019032212701345586, "loss": 0.1495, "step": 3033 }, { "epoch": 1.41610268378063, "grad_norm": 1.2265625, "learning_rate": 0.00019031582854648157, "loss": 0.2042, "step": 3034 }, { "epoch": 1.4165694282380397, "grad_norm": 0.875, "learning_rate": 0.00019030952813490725, "loss": 0.118, "step": 3035 }, { "epoch": 1.4170361726954492, "grad_norm": 1.109375, "learning_rate": 0.00019030322577886862, "loss": 0.1751, "step": 3036 }, { "epoch": 1.4175029171528588, "grad_norm": 1.03125, "learning_rate": 0.00019029692147850137, "loss": 0.1581, "step": 3037 }, { "epoch": 1.4179696616102684, "grad_norm": 1.015625, "learning_rate": 0.00019029061523394115, "loss": 0.1508, "step": 3038 }, { "epoch": 1.418436406067678, "grad_norm": 0.93359375, "learning_rate": 0.00019028430704532388, "loss": 0.137, "step": 3039 }, { "epoch": 1.4189031505250875, "grad_norm": 1.125, "learning_rate": 0.00019027799691278524, "loss": 0.1701, "step": 3040 }, { "epoch": 1.419369894982497, "grad_norm": 1.0390625, "learning_rate": 0.00019027168483646122, "loss": 0.1359, "step": 3041 }, { "epoch": 1.4198366394399067, "grad_norm": 1.0625, "learning_rate": 0.0001902653708164876, "loss": 0.1769, "step": 3042 }, { "epoch": 1.4203033838973163, "grad_norm": 1.1875, "learning_rate": 0.00019025905485300047, "loss": 0.1742, "step": 3043 }, { "epoch": 1.4207701283547258, "grad_norm": 0.93359375, "learning_rate": 0.00019025273694613567, "loss": 0.1614, "step": 3044 }, { "epoch": 1.4212368728121354, "grad_norm": 0.94140625, "learning_rate": 0.00019024641709602938, "loss": 0.1322, "step": 3045 }, { "epoch": 1.421703617269545, "grad_norm": 1.0625, "learning_rate": 0.00019024009530281755, "loss": 0.1956, "step": 3046 }, { "epoch": 1.4221703617269545, "grad_norm": 0.95703125, "learning_rate": 0.00019023377156663637, "loss": 0.139, "step": 3047 }, { "epoch": 1.4226371061843641, "grad_norm": 1.2265625, "learning_rate": 0.00019022744588762194, "loss": 0.1984, "step": 3048 }, { "epoch": 1.4231038506417737, "grad_norm": 1.015625, "learning_rate": 0.0001902211182659105, "loss": 0.1606, "step": 3049 }, { "epoch": 1.4235705950991833, "grad_norm": 1.015625, "learning_rate": 0.00019021478870163828, "loss": 0.1197, "step": 3050 }, { "epoch": 1.4240373395565928, "grad_norm": 1.046875, "learning_rate": 0.0001902084571949416, "loss": 0.1593, "step": 3051 }, { "epoch": 1.4245040840140024, "grad_norm": 0.87109375, "learning_rate": 0.00019020212374595673, "loss": 0.1109, "step": 3052 }, { "epoch": 1.424970828471412, "grad_norm": 0.9921875, "learning_rate": 0.00019019578835482003, "loss": 0.1716, "step": 3053 }, { "epoch": 1.4254375729288213, "grad_norm": 1.109375, "learning_rate": 0.00019018945102166796, "loss": 0.1986, "step": 3054 }, { "epoch": 1.4259043173862311, "grad_norm": 0.96875, "learning_rate": 0.00019018311174663695, "loss": 0.1271, "step": 3055 }, { "epoch": 1.4263710618436405, "grad_norm": 1.140625, "learning_rate": 0.00019017677052986346, "loss": 0.1247, "step": 3056 }, { "epoch": 1.4268378063010503, "grad_norm": 1.1484375, "learning_rate": 0.00019017042737148407, "loss": 0.1392, "step": 3057 }, { "epoch": 1.4273045507584596, "grad_norm": 0.9453125, "learning_rate": 0.00019016408227163532, "loss": 0.1545, "step": 3058 }, { "epoch": 1.4277712952158694, "grad_norm": 0.9375, "learning_rate": 0.00019015773523045382, "loss": 0.1272, "step": 3059 }, { "epoch": 1.4282380396732788, "grad_norm": 0.87890625, "learning_rate": 0.00019015138624807628, "loss": 0.1074, "step": 3060 }, { "epoch": 1.4287047841306886, "grad_norm": 0.99609375, "learning_rate": 0.00019014503532463936, "loss": 0.147, "step": 3061 }, { "epoch": 1.429171528588098, "grad_norm": 1.1953125, "learning_rate": 0.00019013868246027982, "loss": 0.1672, "step": 3062 }, { "epoch": 1.4296382730455075, "grad_norm": 0.98046875, "learning_rate": 0.00019013232765513442, "loss": 0.1496, "step": 3063 }, { "epoch": 1.430105017502917, "grad_norm": 1.15625, "learning_rate": 0.00019012597090934, "loss": 0.1918, "step": 3064 }, { "epoch": 1.4305717619603266, "grad_norm": 1.1015625, "learning_rate": 0.00019011961222303346, "loss": 0.1492, "step": 3065 }, { "epoch": 1.4310385064177362, "grad_norm": 0.77734375, "learning_rate": 0.00019011325159635164, "loss": 0.0934, "step": 3066 }, { "epoch": 1.4315052508751458, "grad_norm": 0.98828125, "learning_rate": 0.00019010688902943153, "loss": 0.1387, "step": 3067 }, { "epoch": 1.4319719953325554, "grad_norm": 1.0859375, "learning_rate": 0.00019010052452241016, "loss": 0.1545, "step": 3068 }, { "epoch": 1.432438739789965, "grad_norm": 1.03125, "learning_rate": 0.0001900941580754245, "loss": 0.1493, "step": 3069 }, { "epoch": 1.4329054842473745, "grad_norm": 1.078125, "learning_rate": 0.00019008778968861167, "loss": 0.1726, "step": 3070 }, { "epoch": 1.433372228704784, "grad_norm": 0.85546875, "learning_rate": 0.00019008141936210877, "loss": 0.1225, "step": 3071 }, { "epoch": 1.4338389731621937, "grad_norm": 1.0390625, "learning_rate": 0.00019007504709605297, "loss": 0.1669, "step": 3072 }, { "epoch": 1.4343057176196032, "grad_norm": 1.0390625, "learning_rate": 0.00019006867289058147, "loss": 0.1362, "step": 3073 }, { "epoch": 1.4347724620770128, "grad_norm": 1.0390625, "learning_rate": 0.00019006229674583148, "loss": 0.1464, "step": 3074 }, { "epoch": 1.4352392065344224, "grad_norm": 1.1640625, "learning_rate": 0.00019005591866194034, "loss": 0.2105, "step": 3075 }, { "epoch": 1.435705950991832, "grad_norm": 1.078125, "learning_rate": 0.00019004953863904536, "loss": 0.1486, "step": 3076 }, { "epoch": 1.4361726954492415, "grad_norm": 1.21875, "learning_rate": 0.00019004315667728387, "loss": 0.1906, "step": 3077 }, { "epoch": 1.436639439906651, "grad_norm": 0.98828125, "learning_rate": 0.00019003677277679333, "loss": 0.1404, "step": 3078 }, { "epoch": 1.4371061843640607, "grad_norm": 0.93359375, "learning_rate": 0.00019003038693771117, "loss": 0.1547, "step": 3079 }, { "epoch": 1.4375729288214703, "grad_norm": 0.88671875, "learning_rate": 0.0001900239991601749, "loss": 0.1294, "step": 3080 }, { "epoch": 1.4380396732788798, "grad_norm": 0.8671875, "learning_rate": 0.00019001760944432206, "loss": 0.1501, "step": 3081 }, { "epoch": 1.4385064177362894, "grad_norm": 0.97265625, "learning_rate": 0.0001900112177902902, "loss": 0.1499, "step": 3082 }, { "epoch": 1.438973162193699, "grad_norm": 0.86328125, "learning_rate": 0.00019000482419821694, "loss": 0.128, "step": 3083 }, { "epoch": 1.4394399066511085, "grad_norm": 0.9375, "learning_rate": 0.00018999842866823994, "loss": 0.1374, "step": 3084 }, { "epoch": 1.4399066511085181, "grad_norm": 0.96875, "learning_rate": 0.00018999203120049693, "loss": 0.141, "step": 3085 }, { "epoch": 1.4403733955659277, "grad_norm": 0.98828125, "learning_rate": 0.00018998563179512562, "loss": 0.1561, "step": 3086 }, { "epoch": 1.4408401400233373, "grad_norm": 1.0546875, "learning_rate": 0.00018997923045226386, "loss": 0.1551, "step": 3087 }, { "epoch": 1.4413068844807468, "grad_norm": 1.0859375, "learning_rate": 0.00018997282717204942, "loss": 0.1664, "step": 3088 }, { "epoch": 1.4417736289381564, "grad_norm": 1.03125, "learning_rate": 0.0001899664219546202, "loss": 0.1504, "step": 3089 }, { "epoch": 1.442240373395566, "grad_norm": 1.2265625, "learning_rate": 0.00018996001480011405, "loss": 0.1611, "step": 3090 }, { "epoch": 1.4427071178529756, "grad_norm": 1.15625, "learning_rate": 0.00018995360570866901, "loss": 0.1828, "step": 3091 }, { "epoch": 1.4431738623103851, "grad_norm": 1.1796875, "learning_rate": 0.000189947194680423, "loss": 0.1149, "step": 3092 }, { "epoch": 1.4436406067677947, "grad_norm": 1.015625, "learning_rate": 0.0001899407817155141, "loss": 0.1535, "step": 3093 }, { "epoch": 1.4441073512252043, "grad_norm": 1.1015625, "learning_rate": 0.0001899343668140804, "loss": 0.1718, "step": 3094 }, { "epoch": 1.4445740956826136, "grad_norm": 1.015625, "learning_rate": 0.00018992794997625998, "loss": 0.1655, "step": 3095 }, { "epoch": 1.4450408401400234, "grad_norm": 0.9765625, "learning_rate": 0.000189921531202191, "loss": 0.1724, "step": 3096 }, { "epoch": 1.4455075845974328, "grad_norm": 1.109375, "learning_rate": 0.00018991511049201173, "loss": 0.1395, "step": 3097 }, { "epoch": 1.4459743290548426, "grad_norm": 0.8984375, "learning_rate": 0.00018990868784586035, "loss": 0.1292, "step": 3098 }, { "epoch": 1.446441073512252, "grad_norm": 1.03125, "learning_rate": 0.00018990226326387518, "loss": 0.1569, "step": 3099 }, { "epoch": 1.4469078179696617, "grad_norm": 0.96484375, "learning_rate": 0.00018989583674619448, "loss": 0.1384, "step": 3100 }, { "epoch": 1.447374562427071, "grad_norm": 1.7265625, "learning_rate": 0.0001898894082929567, "loss": 0.183, "step": 3101 }, { "epoch": 1.4478413068844809, "grad_norm": 1.3984375, "learning_rate": 0.00018988297790430028, "loss": 0.1465, "step": 3102 }, { "epoch": 1.4483080513418902, "grad_norm": 1.2890625, "learning_rate": 0.00018987654558036353, "loss": 0.2297, "step": 3103 }, { "epoch": 1.4487747957993, "grad_norm": 1.046875, "learning_rate": 0.0001898701113212851, "loss": 0.1831, "step": 3104 }, { "epoch": 1.4492415402567094, "grad_norm": 0.82421875, "learning_rate": 0.00018986367512720343, "loss": 0.1119, "step": 3105 }, { "epoch": 1.449708284714119, "grad_norm": 0.91796875, "learning_rate": 0.00018985723699825716, "loss": 0.1256, "step": 3106 }, { "epoch": 1.4501750291715285, "grad_norm": 0.99609375, "learning_rate": 0.00018985079693458485, "loss": 0.163, "step": 3107 }, { "epoch": 1.450641773628938, "grad_norm": 0.88671875, "learning_rate": 0.00018984435493632517, "loss": 0.1152, "step": 3108 }, { "epoch": 1.4511085180863477, "grad_norm": 0.92578125, "learning_rate": 0.0001898379110036169, "loss": 0.1016, "step": 3109 }, { "epoch": 1.4515752625437572, "grad_norm": 0.8671875, "learning_rate": 0.00018983146513659868, "loss": 0.1613, "step": 3110 }, { "epoch": 1.4520420070011668, "grad_norm": 1.03125, "learning_rate": 0.00018982501733540939, "loss": 0.1604, "step": 3111 }, { "epoch": 1.4525087514585764, "grad_norm": 1.0, "learning_rate": 0.0001898185676001878, "loss": 0.1646, "step": 3112 }, { "epoch": 1.452975495915986, "grad_norm": 1.21875, "learning_rate": 0.00018981211593107274, "loss": 0.1519, "step": 3113 }, { "epoch": 1.4534422403733955, "grad_norm": 1.25, "learning_rate": 0.00018980566232820324, "loss": 0.1538, "step": 3114 }, { "epoch": 1.453908984830805, "grad_norm": 1.046875, "learning_rate": 0.00018979920679171815, "loss": 0.1319, "step": 3115 }, { "epoch": 1.4543757292882147, "grad_norm": 1.0546875, "learning_rate": 0.00018979274932175654, "loss": 0.1692, "step": 3116 }, { "epoch": 1.4548424737456243, "grad_norm": 1.0859375, "learning_rate": 0.0001897862899184574, "loss": 0.1353, "step": 3117 }, { "epoch": 1.4553092182030338, "grad_norm": 1.2578125, "learning_rate": 0.00018977982858195983, "loss": 0.1443, "step": 3118 }, { "epoch": 1.4557759626604434, "grad_norm": 1.09375, "learning_rate": 0.00018977336531240287, "loss": 0.1473, "step": 3119 }, { "epoch": 1.456242707117853, "grad_norm": 0.98828125, "learning_rate": 0.00018976690010992582, "loss": 0.1397, "step": 3120 }, { "epoch": 1.4567094515752625, "grad_norm": 1.0390625, "learning_rate": 0.00018976043297466778, "loss": 0.1336, "step": 3121 }, { "epoch": 1.4571761960326721, "grad_norm": 1.15625, "learning_rate": 0.00018975396390676807, "loss": 0.1897, "step": 3122 }, { "epoch": 1.4576429404900817, "grad_norm": 1.2421875, "learning_rate": 0.00018974749290636588, "loss": 0.1445, "step": 3123 }, { "epoch": 1.4581096849474913, "grad_norm": 0.87890625, "learning_rate": 0.00018974101997360063, "loss": 0.1276, "step": 3124 }, { "epoch": 1.4585764294049008, "grad_norm": 0.8984375, "learning_rate": 0.00018973454510861165, "loss": 0.1202, "step": 3125 }, { "epoch": 1.4590431738623104, "grad_norm": 0.96484375, "learning_rate": 0.00018972806831153832, "loss": 0.1314, "step": 3126 }, { "epoch": 1.45950991831972, "grad_norm": 0.87890625, "learning_rate": 0.00018972158958252013, "loss": 0.1111, "step": 3127 }, { "epoch": 1.4599766627771296, "grad_norm": 1.09375, "learning_rate": 0.00018971510892169658, "loss": 0.2049, "step": 3128 }, { "epoch": 1.4604434072345391, "grad_norm": 1.15625, "learning_rate": 0.00018970862632920721, "loss": 0.1286, "step": 3129 }, { "epoch": 1.4609101516919487, "grad_norm": 0.984375, "learning_rate": 0.00018970214180519154, "loss": 0.1733, "step": 3130 }, { "epoch": 1.4613768961493583, "grad_norm": 0.99609375, "learning_rate": 0.00018969565534978928, "loss": 0.1491, "step": 3131 }, { "epoch": 1.4618436406067679, "grad_norm": 0.953125, "learning_rate": 0.00018968916696314, "loss": 0.174, "step": 3132 }, { "epoch": 1.4623103850641774, "grad_norm": 0.984375, "learning_rate": 0.00018968267664538346, "loss": 0.0946, "step": 3133 }, { "epoch": 1.462777129521587, "grad_norm": 1.046875, "learning_rate": 0.0001896761843966594, "loss": 0.1489, "step": 3134 }, { "epoch": 1.4632438739789966, "grad_norm": 1.25, "learning_rate": 0.00018966969021710757, "loss": 0.1995, "step": 3135 }, { "epoch": 1.463710618436406, "grad_norm": 1.09375, "learning_rate": 0.00018966319410686783, "loss": 0.1606, "step": 3136 }, { "epoch": 1.4641773628938157, "grad_norm": 0.9140625, "learning_rate": 0.00018965669606608006, "loss": 0.1621, "step": 3137 }, { "epoch": 1.464644107351225, "grad_norm": 1.0078125, "learning_rate": 0.00018965019609488412, "loss": 0.1626, "step": 3138 }, { "epoch": 1.4651108518086349, "grad_norm": 0.953125, "learning_rate": 0.00018964369419341998, "loss": 0.1293, "step": 3139 }, { "epoch": 1.4655775962660442, "grad_norm": 0.91796875, "learning_rate": 0.00018963719036182766, "loss": 0.1151, "step": 3140 }, { "epoch": 1.466044340723454, "grad_norm": 1.0703125, "learning_rate": 0.00018963068460024716, "loss": 0.156, "step": 3141 }, { "epoch": 1.4665110851808634, "grad_norm": 1.0390625, "learning_rate": 0.0001896241769088186, "loss": 0.1482, "step": 3142 }, { "epoch": 1.4669778296382732, "grad_norm": 0.92578125, "learning_rate": 0.00018961766728768206, "loss": 0.1445, "step": 3143 }, { "epoch": 1.4674445740956825, "grad_norm": 1.15625, "learning_rate": 0.00018961115573697771, "loss": 0.1386, "step": 3144 }, { "epoch": 1.4679113185530923, "grad_norm": 1.1171875, "learning_rate": 0.00018960464225684575, "loss": 0.1313, "step": 3145 }, { "epoch": 1.4683780630105017, "grad_norm": 0.96484375, "learning_rate": 0.0001895981268474264, "loss": 0.1217, "step": 3146 }, { "epoch": 1.4688448074679112, "grad_norm": 0.921875, "learning_rate": 0.00018959160950886004, "loss": 0.1294, "step": 3147 }, { "epoch": 1.4693115519253208, "grad_norm": 0.8359375, "learning_rate": 0.0001895850902412869, "loss": 0.118, "step": 3148 }, { "epoch": 1.4697782963827304, "grad_norm": 1.0234375, "learning_rate": 0.00018957856904484733, "loss": 0.1218, "step": 3149 }, { "epoch": 1.47024504084014, "grad_norm": 1.015625, "learning_rate": 0.0001895720459196818, "loss": 0.1905, "step": 3150 }, { "epoch": 1.4707117852975495, "grad_norm": 1.203125, "learning_rate": 0.00018956552086593073, "loss": 0.1845, "step": 3151 }, { "epoch": 1.471178529754959, "grad_norm": 1.0859375, "learning_rate": 0.00018955899388373463, "loss": 0.147, "step": 3152 }, { "epoch": 1.4716452742123687, "grad_norm": 1.0625, "learning_rate": 0.000189552464973234, "loss": 0.1541, "step": 3153 }, { "epoch": 1.4721120186697783, "grad_norm": 1.1953125, "learning_rate": 0.00018954593413456948, "loss": 0.1369, "step": 3154 }, { "epoch": 1.4725787631271878, "grad_norm": 1.125, "learning_rate": 0.00018953940136788163, "loss": 0.1947, "step": 3155 }, { "epoch": 1.4730455075845974, "grad_norm": 1.171875, "learning_rate": 0.00018953286667331112, "loss": 0.1823, "step": 3156 }, { "epoch": 1.473512252042007, "grad_norm": 1.046875, "learning_rate": 0.00018952633005099866, "loss": 0.1745, "step": 3157 }, { "epoch": 1.4739789964994165, "grad_norm": 1.0, "learning_rate": 0.00018951979150108494, "loss": 0.1631, "step": 3158 }, { "epoch": 1.4744457409568261, "grad_norm": 0.76171875, "learning_rate": 0.00018951325102371084, "loss": 0.1122, "step": 3159 }, { "epoch": 1.4749124854142357, "grad_norm": 0.9296875, "learning_rate": 0.00018950670861901713, "loss": 0.148, "step": 3160 }, { "epoch": 1.4753792298716453, "grad_norm": 1.015625, "learning_rate": 0.00018950016428714462, "loss": 0.1399, "step": 3161 }, { "epoch": 1.4758459743290548, "grad_norm": 1.203125, "learning_rate": 0.0001894936180282343, "loss": 0.1515, "step": 3162 }, { "epoch": 1.4763127187864644, "grad_norm": 1.09375, "learning_rate": 0.0001894870698424271, "loss": 0.1228, "step": 3163 }, { "epoch": 1.476779463243874, "grad_norm": 0.88671875, "learning_rate": 0.000189480519729864, "loss": 0.1121, "step": 3164 }, { "epoch": 1.4772462077012836, "grad_norm": 1.234375, "learning_rate": 0.00018947396769068603, "loss": 0.2029, "step": 3165 }, { "epoch": 1.4777129521586931, "grad_norm": 0.95703125, "learning_rate": 0.00018946741372503426, "loss": 0.1247, "step": 3166 }, { "epoch": 1.4781796966161027, "grad_norm": 1.109375, "learning_rate": 0.00018946085783304976, "loss": 0.1842, "step": 3167 }, { "epoch": 1.4786464410735123, "grad_norm": 1.1796875, "learning_rate": 0.0001894543000148738, "loss": 0.1275, "step": 3168 }, { "epoch": 1.4791131855309219, "grad_norm": 1.0, "learning_rate": 0.00018944774027064747, "loss": 0.1679, "step": 3169 }, { "epoch": 1.4795799299883314, "grad_norm": 1.09375, "learning_rate": 0.00018944117860051206, "loss": 0.2096, "step": 3170 }, { "epoch": 1.480046674445741, "grad_norm": 0.94140625, "learning_rate": 0.00018943461500460883, "loss": 0.1202, "step": 3171 }, { "epoch": 1.4805134189031506, "grad_norm": 0.8828125, "learning_rate": 0.00018942804948307912, "loss": 0.1437, "step": 3172 }, { "epoch": 1.4809801633605602, "grad_norm": 1.03125, "learning_rate": 0.00018942148203606428, "loss": 0.133, "step": 3173 }, { "epoch": 1.4814469078179697, "grad_norm": 1.1640625, "learning_rate": 0.00018941491266370573, "loss": 0.1761, "step": 3174 }, { "epoch": 1.4819136522753793, "grad_norm": 1.0703125, "learning_rate": 0.0001894083413661449, "loss": 0.1547, "step": 3175 }, { "epoch": 1.4823803967327889, "grad_norm": 0.890625, "learning_rate": 0.00018940176814352327, "loss": 0.1261, "step": 3176 }, { "epoch": 1.4828471411901984, "grad_norm": 1.1484375, "learning_rate": 0.00018939519299598237, "loss": 0.1571, "step": 3177 }, { "epoch": 1.483313885647608, "grad_norm": 0.796875, "learning_rate": 0.0001893886159236638, "loss": 0.0881, "step": 3178 }, { "epoch": 1.4837806301050174, "grad_norm": 0.97265625, "learning_rate": 0.0001893820369267091, "loss": 0.1149, "step": 3179 }, { "epoch": 1.4842473745624272, "grad_norm": 1.109375, "learning_rate": 0.00018937545600526004, "loss": 0.2139, "step": 3180 }, { "epoch": 1.4847141190198365, "grad_norm": 1.25, "learning_rate": 0.0001893688731594582, "loss": 0.1234, "step": 3181 }, { "epoch": 1.4851808634772463, "grad_norm": 0.9765625, "learning_rate": 0.00018936228838944537, "loss": 0.1424, "step": 3182 }, { "epoch": 1.4856476079346557, "grad_norm": 0.92578125, "learning_rate": 0.00018935570169536337, "loss": 0.1479, "step": 3183 }, { "epoch": 1.4861143523920655, "grad_norm": 0.97265625, "learning_rate": 0.0001893491130773539, "loss": 0.158, "step": 3184 }, { "epoch": 1.4865810968494748, "grad_norm": 0.9140625, "learning_rate": 0.00018934252253555896, "loss": 0.1294, "step": 3185 }, { "epoch": 1.4870478413068846, "grad_norm": 0.96484375, "learning_rate": 0.00018933593007012034, "loss": 0.1346, "step": 3186 }, { "epoch": 1.487514585764294, "grad_norm": 0.95703125, "learning_rate": 0.00018932933568118005, "loss": 0.108, "step": 3187 }, { "epoch": 1.4879813302217035, "grad_norm": 0.95703125, "learning_rate": 0.00018932273936888002, "loss": 0.1251, "step": 3188 }, { "epoch": 1.488448074679113, "grad_norm": 0.92578125, "learning_rate": 0.00018931614113336232, "loss": 0.1276, "step": 3189 }, { "epoch": 1.4889148191365227, "grad_norm": 0.81640625, "learning_rate": 0.00018930954097476902, "loss": 0.0895, "step": 3190 }, { "epoch": 1.4893815635939323, "grad_norm": 1.109375, "learning_rate": 0.0001893029388932422, "loss": 0.16, "step": 3191 }, { "epoch": 1.4898483080513418, "grad_norm": 1.1328125, "learning_rate": 0.00018929633488892403, "loss": 0.1235, "step": 3192 }, { "epoch": 1.4903150525087514, "grad_norm": 1.453125, "learning_rate": 0.00018928972896195667, "loss": 0.1876, "step": 3193 }, { "epoch": 1.490781796966161, "grad_norm": 1.203125, "learning_rate": 0.0001892831211124824, "loss": 0.1804, "step": 3194 }, { "epoch": 1.4912485414235706, "grad_norm": 1.0625, "learning_rate": 0.0001892765113406435, "loss": 0.1482, "step": 3195 }, { "epoch": 1.4917152858809801, "grad_norm": 1.09375, "learning_rate": 0.00018926989964658216, "loss": 0.1183, "step": 3196 }, { "epoch": 1.4921820303383897, "grad_norm": 1.2109375, "learning_rate": 0.00018926328603044093, "loss": 0.1475, "step": 3197 }, { "epoch": 1.4926487747957993, "grad_norm": 1.1015625, "learning_rate": 0.00018925667049236208, "loss": 0.177, "step": 3198 }, { "epoch": 1.4931155192532088, "grad_norm": 0.8828125, "learning_rate": 0.00018925005303248806, "loss": 0.12, "step": 3199 }, { "epoch": 1.4935822637106184, "grad_norm": 1.1640625, "learning_rate": 0.0001892434336509614, "loss": 0.1514, "step": 3200 }, { "epoch": 1.494049008168028, "grad_norm": 0.91015625, "learning_rate": 0.0001892368123479246, "loss": 0.1076, "step": 3201 }, { "epoch": 1.4945157526254376, "grad_norm": 1.140625, "learning_rate": 0.0001892301891235202, "loss": 0.1415, "step": 3202 }, { "epoch": 1.4949824970828471, "grad_norm": 1.2265625, "learning_rate": 0.00018922356397789084, "loss": 0.1415, "step": 3203 }, { "epoch": 1.4954492415402567, "grad_norm": 0.9921875, "learning_rate": 0.00018921693691117914, "loss": 0.1238, "step": 3204 }, { "epoch": 1.4959159859976663, "grad_norm": 1.0, "learning_rate": 0.0001892103079235278, "loss": 0.1272, "step": 3205 }, { "epoch": 1.4963827304550759, "grad_norm": 0.89453125, "learning_rate": 0.00018920367701507956, "loss": 0.1685, "step": 3206 }, { "epoch": 1.4968494749124854, "grad_norm": 0.96484375, "learning_rate": 0.00018919704418597717, "loss": 0.1443, "step": 3207 }, { "epoch": 1.497316219369895, "grad_norm": 0.92578125, "learning_rate": 0.00018919040943636347, "loss": 0.108, "step": 3208 }, { "epoch": 1.4977829638273046, "grad_norm": 0.8984375, "learning_rate": 0.00018918377276638126, "loss": 0.1196, "step": 3209 }, { "epoch": 1.4982497082847142, "grad_norm": 0.8515625, "learning_rate": 0.00018917713417617352, "loss": 0.1534, "step": 3210 }, { "epoch": 1.4987164527421237, "grad_norm": 0.921875, "learning_rate": 0.0001891704936658831, "loss": 0.1454, "step": 3211 }, { "epoch": 1.4991831971995333, "grad_norm": 1.078125, "learning_rate": 0.00018916385123565304, "loss": 0.1383, "step": 3212 }, { "epoch": 1.4996499416569429, "grad_norm": 1.0546875, "learning_rate": 0.00018915720688562632, "loss": 0.1727, "step": 3213 }, { "epoch": 1.5001166861143522, "grad_norm": 1.0859375, "learning_rate": 0.00018915056061594602, "loss": 0.1441, "step": 3214 }, { "epoch": 1.500583430571762, "grad_norm": 0.97265625, "learning_rate": 0.00018914391242675523, "loss": 0.1295, "step": 3215 }, { "epoch": 1.5010501750291714, "grad_norm": 0.859375, "learning_rate": 0.00018913726231819711, "loss": 0.1015, "step": 3216 }, { "epoch": 1.5010501750291714, "eval_loss": 1.3733340501785278, "eval_runtime": 93.5915, "eval_samples_per_second": 19.275, "eval_steps_per_second": 2.415, "step": 3216 }, { "epoch": 1.5015169194865812, "grad_norm": 1.0546875, "learning_rate": 0.0001891306102904148, "loss": 0.1458, "step": 3217 }, { "epoch": 1.5019836639439905, "grad_norm": 1.171875, "learning_rate": 0.0001891239563435516, "loss": 0.1363, "step": 3218 }, { "epoch": 1.5024504084014003, "grad_norm": 1.140625, "learning_rate": 0.0001891173004777507, "loss": 0.1664, "step": 3219 }, { "epoch": 1.5029171528588097, "grad_norm": 1.0703125, "learning_rate": 0.0001891106426931555, "loss": 0.1577, "step": 3220 }, { "epoch": 1.5033838973162195, "grad_norm": 1.1796875, "learning_rate": 0.00018910398298990923, "loss": 0.154, "step": 3221 }, { "epoch": 1.5038506417736288, "grad_norm": 1.15625, "learning_rate": 0.00018909732136815536, "loss": 0.1448, "step": 3222 }, { "epoch": 1.5043173862310386, "grad_norm": 0.82421875, "learning_rate": 0.00018909065782803732, "loss": 0.1127, "step": 3223 }, { "epoch": 1.504784130688448, "grad_norm": 0.91796875, "learning_rate": 0.0001890839923696986, "loss": 0.1458, "step": 3224 }, { "epoch": 1.5052508751458578, "grad_norm": 1.2265625, "learning_rate": 0.00018907732499328264, "loss": 0.1454, "step": 3225 }, { "epoch": 1.505717619603267, "grad_norm": 0.73046875, "learning_rate": 0.00018907065569893303, "loss": 0.1018, "step": 3226 }, { "epoch": 1.506184364060677, "grad_norm": 0.765625, "learning_rate": 0.00018906398448679339, "loss": 0.0956, "step": 3227 }, { "epoch": 1.5066511085180863, "grad_norm": 0.94140625, "learning_rate": 0.00018905731135700736, "loss": 0.1371, "step": 3228 }, { "epoch": 1.507117852975496, "grad_norm": 0.7890625, "learning_rate": 0.0001890506363097186, "loss": 0.0939, "step": 3229 }, { "epoch": 1.5075845974329054, "grad_norm": 1.1015625, "learning_rate": 0.00018904395934507082, "loss": 0.1551, "step": 3230 }, { "epoch": 1.5080513418903152, "grad_norm": 0.9296875, "learning_rate": 0.0001890372804632078, "loss": 0.1197, "step": 3231 }, { "epoch": 1.5085180863477246, "grad_norm": 0.93359375, "learning_rate": 0.00018903059966427337, "loss": 0.1274, "step": 3232 }, { "epoch": 1.5089848308051343, "grad_norm": 1.109375, "learning_rate": 0.00018902391694841132, "loss": 0.1136, "step": 3233 }, { "epoch": 1.5094515752625437, "grad_norm": 1.0703125, "learning_rate": 0.0001890172323157656, "loss": 0.1279, "step": 3234 }, { "epoch": 1.5099183197199533, "grad_norm": 1.1640625, "learning_rate": 0.00018901054576648006, "loss": 0.1235, "step": 3235 }, { "epoch": 1.5103850641773628, "grad_norm": 1.0546875, "learning_rate": 0.00018900385730069873, "loss": 0.1796, "step": 3236 }, { "epoch": 1.5108518086347724, "grad_norm": 1.0859375, "learning_rate": 0.0001889971669185656, "loss": 0.1142, "step": 3237 }, { "epoch": 1.511318553092182, "grad_norm": 1.0859375, "learning_rate": 0.00018899047462022473, "loss": 0.1602, "step": 3238 }, { "epoch": 1.5117852975495916, "grad_norm": 0.96875, "learning_rate": 0.0001889837804058202, "loss": 0.1228, "step": 3239 }, { "epoch": 1.5122520420070011, "grad_norm": 1.078125, "learning_rate": 0.00018897708427549614, "loss": 0.1259, "step": 3240 }, { "epoch": 1.5127187864644107, "grad_norm": 1.2578125, "learning_rate": 0.00018897038622939675, "loss": 0.1411, "step": 3241 }, { "epoch": 1.5131855309218203, "grad_norm": 0.921875, "learning_rate": 0.0001889636862676662, "loss": 0.1498, "step": 3242 }, { "epoch": 1.5136522753792299, "grad_norm": 0.88671875, "learning_rate": 0.00018895698439044882, "loss": 0.1294, "step": 3243 }, { "epoch": 1.5141190198366394, "grad_norm": 1.09375, "learning_rate": 0.00018895028059788885, "loss": 0.1396, "step": 3244 }, { "epoch": 1.514585764294049, "grad_norm": 0.94140625, "learning_rate": 0.00018894357489013063, "loss": 0.1169, "step": 3245 }, { "epoch": 1.5150525087514586, "grad_norm": 1.0703125, "learning_rate": 0.0001889368672673186, "loss": 0.1623, "step": 3246 }, { "epoch": 1.5155192532088682, "grad_norm": 0.953125, "learning_rate": 0.0001889301577295971, "loss": 0.1028, "step": 3247 }, { "epoch": 1.5159859976662777, "grad_norm": 1.125, "learning_rate": 0.00018892344627711064, "loss": 0.1329, "step": 3248 }, { "epoch": 1.5164527421236873, "grad_norm": 1.0234375, "learning_rate": 0.0001889167329100037, "loss": 0.1264, "step": 3249 }, { "epoch": 1.5169194865810969, "grad_norm": 0.8046875, "learning_rate": 0.00018891001762842086, "loss": 0.119, "step": 3250 }, { "epoch": 1.5173862310385065, "grad_norm": 0.96875, "learning_rate": 0.0001889033004325067, "loss": 0.1207, "step": 3251 }, { "epoch": 1.517852975495916, "grad_norm": 0.93359375, "learning_rate": 0.00018889658132240585, "loss": 0.1311, "step": 3252 }, { "epoch": 1.5183197199533256, "grad_norm": 1.03125, "learning_rate": 0.000188889860298263, "loss": 0.129, "step": 3253 }, { "epoch": 1.5187864644107352, "grad_norm": 1.0859375, "learning_rate": 0.0001888831373602228, "loss": 0.1369, "step": 3254 }, { "epoch": 1.5192532088681447, "grad_norm": 1.0703125, "learning_rate": 0.00018887641250843, "loss": 0.1071, "step": 3255 }, { "epoch": 1.5197199533255543, "grad_norm": 1.0078125, "learning_rate": 0.00018886968574302948, "loss": 0.1715, "step": 3256 }, { "epoch": 1.5201866977829637, "grad_norm": 1.15625, "learning_rate": 0.00018886295706416602, "loss": 0.1573, "step": 3257 }, { "epoch": 1.5206534422403735, "grad_norm": 0.953125, "learning_rate": 0.00018885622647198447, "loss": 0.1125, "step": 3258 }, { "epoch": 1.5211201866977828, "grad_norm": 0.9765625, "learning_rate": 0.00018884949396662977, "loss": 0.1202, "step": 3259 }, { "epoch": 1.5215869311551926, "grad_norm": 0.84765625, "learning_rate": 0.00018884275954824694, "loss": 0.1054, "step": 3260 }, { "epoch": 1.522053675612602, "grad_norm": 1.0390625, "learning_rate": 0.0001888360232169809, "loss": 0.1554, "step": 3261 }, { "epoch": 1.5225204200700118, "grad_norm": 0.90234375, "learning_rate": 0.0001888292849729767, "loss": 0.1216, "step": 3262 }, { "epoch": 1.5229871645274211, "grad_norm": 0.8984375, "learning_rate": 0.00018882254481637943, "loss": 0.1349, "step": 3263 }, { "epoch": 1.523453908984831, "grad_norm": 0.890625, "learning_rate": 0.0001888158027473342, "loss": 0.0976, "step": 3264 }, { "epoch": 1.5239206534422403, "grad_norm": 1.0859375, "learning_rate": 0.00018880905876598626, "loss": 0.1516, "step": 3265 }, { "epoch": 1.52438739789965, "grad_norm": 1.0703125, "learning_rate": 0.0001888023128724807, "loss": 0.1504, "step": 3266 }, { "epoch": 1.5248541423570594, "grad_norm": 0.9375, "learning_rate": 0.00018879556506696282, "loss": 0.1137, "step": 3267 }, { "epoch": 1.5253208868144692, "grad_norm": 1.1640625, "learning_rate": 0.00018878881534957793, "loss": 0.1903, "step": 3268 }, { "epoch": 1.5257876312718786, "grad_norm": 0.98828125, "learning_rate": 0.0001887820637204713, "loss": 0.134, "step": 3269 }, { "epoch": 1.5262543757292883, "grad_norm": 0.9609375, "learning_rate": 0.00018877531017978834, "loss": 0.1233, "step": 3270 }, { "epoch": 1.5267211201866977, "grad_norm": 1.109375, "learning_rate": 0.0001887685547276745, "loss": 0.1489, "step": 3271 }, { "epoch": 1.5271878646441075, "grad_norm": 0.83203125, "learning_rate": 0.00018876179736427516, "loss": 0.1175, "step": 3272 }, { "epoch": 1.5276546091015168, "grad_norm": 0.8203125, "learning_rate": 0.00018875503808973582, "loss": 0.1036, "step": 3273 }, { "epoch": 1.5281213535589266, "grad_norm": 0.93359375, "learning_rate": 0.00018874827690420202, "loss": 0.1046, "step": 3274 }, { "epoch": 1.528588098016336, "grad_norm": 0.96875, "learning_rate": 0.0001887415138078194, "loss": 0.1361, "step": 3275 }, { "epoch": 1.5290548424737458, "grad_norm": 0.98828125, "learning_rate": 0.0001887347488007335, "loss": 0.1369, "step": 3276 }, { "epoch": 1.5295215869311551, "grad_norm": 1.078125, "learning_rate": 0.00018872798188309, "loss": 0.129, "step": 3277 }, { "epoch": 1.5299883313885647, "grad_norm": 0.83203125, "learning_rate": 0.00018872121305503462, "loss": 0.1371, "step": 3278 }, { "epoch": 1.5304550758459743, "grad_norm": 1.03125, "learning_rate": 0.00018871444231671308, "loss": 0.1104, "step": 3279 }, { "epoch": 1.5309218203033839, "grad_norm": 0.82421875, "learning_rate": 0.00018870766966827115, "loss": 0.1004, "step": 3280 }, { "epoch": 1.5313885647607934, "grad_norm": 1.2109375, "learning_rate": 0.00018870089510985468, "loss": 0.1918, "step": 3281 }, { "epoch": 1.531855309218203, "grad_norm": 0.9609375, "learning_rate": 0.00018869411864160952, "loss": 0.1191, "step": 3282 }, { "epoch": 1.5323220536756126, "grad_norm": 1.09375, "learning_rate": 0.0001886873402636816, "loss": 0.145, "step": 3283 }, { "epoch": 1.5327887981330222, "grad_norm": 1.015625, "learning_rate": 0.00018868055997621683, "loss": 0.1245, "step": 3284 }, { "epoch": 1.5332555425904317, "grad_norm": 0.94140625, "learning_rate": 0.0001886737777793612, "loss": 0.1225, "step": 3285 }, { "epoch": 1.5337222870478413, "grad_norm": 1.0625, "learning_rate": 0.00018866699367326074, "loss": 0.1479, "step": 3286 }, { "epoch": 1.5341890315052509, "grad_norm": 1.1015625, "learning_rate": 0.00018866020765806156, "loss": 0.1415, "step": 3287 }, { "epoch": 1.5346557759626605, "grad_norm": 1.0390625, "learning_rate": 0.00018865341973390974, "loss": 0.1354, "step": 3288 }, { "epoch": 1.53512252042007, "grad_norm": 0.91796875, "learning_rate": 0.0001886466299009514, "loss": 0.1438, "step": 3289 }, { "epoch": 1.5355892648774796, "grad_norm": 1.21875, "learning_rate": 0.00018863983815933279, "loss": 0.1533, "step": 3290 }, { "epoch": 1.5360560093348892, "grad_norm": 1.171875, "learning_rate": 0.00018863304450920013, "loss": 0.1384, "step": 3291 }, { "epoch": 1.5365227537922987, "grad_norm": 1.1015625, "learning_rate": 0.00018862624895069963, "loss": 0.1299, "step": 3292 }, { "epoch": 1.5369894982497083, "grad_norm": 1.0, "learning_rate": 0.00018861945148397768, "loss": 0.1345, "step": 3293 }, { "epoch": 1.537456242707118, "grad_norm": 0.89453125, "learning_rate": 0.00018861265210918062, "loss": 0.1423, "step": 3294 }, { "epoch": 1.5379229871645275, "grad_norm": 0.7421875, "learning_rate": 0.00018860585082645486, "loss": 0.1002, "step": 3295 }, { "epoch": 1.538389731621937, "grad_norm": 1.1484375, "learning_rate": 0.0001885990476359468, "loss": 0.1648, "step": 3296 }, { "epoch": 1.5388564760793466, "grad_norm": 0.9921875, "learning_rate": 0.00018859224253780292, "loss": 0.1421, "step": 3297 }, { "epoch": 1.539323220536756, "grad_norm": 1.03125, "learning_rate": 0.0001885854355321698, "loss": 0.1265, "step": 3298 }, { "epoch": 1.5397899649941658, "grad_norm": 1.171875, "learning_rate": 0.00018857862661919396, "loss": 0.2223, "step": 3299 }, { "epoch": 1.5402567094515751, "grad_norm": 0.98046875, "learning_rate": 0.00018857181579902199, "loss": 0.1319, "step": 3300 }, { "epoch": 1.540723453908985, "grad_norm": 0.90625, "learning_rate": 0.0001885650030718006, "loss": 0.1148, "step": 3301 }, { "epoch": 1.5411901983663943, "grad_norm": 1.0546875, "learning_rate": 0.0001885581884376764, "loss": 0.1427, "step": 3302 }, { "epoch": 1.541656942823804, "grad_norm": 0.7734375, "learning_rate": 0.00018855137189679612, "loss": 0.1124, "step": 3303 }, { "epoch": 1.5421236872812134, "grad_norm": 0.8984375, "learning_rate": 0.00018854455344930658, "loss": 0.1084, "step": 3304 }, { "epoch": 1.5425904317386232, "grad_norm": 0.9921875, "learning_rate": 0.00018853773309535456, "loss": 0.133, "step": 3305 }, { "epoch": 1.5430571761960326, "grad_norm": 1.015625, "learning_rate": 0.00018853091083508692, "loss": 0.1619, "step": 3306 }, { "epoch": 1.5435239206534423, "grad_norm": 0.98046875, "learning_rate": 0.00018852408666865053, "loss": 0.1362, "step": 3307 }, { "epoch": 1.5439906651108517, "grad_norm": 0.96484375, "learning_rate": 0.00018851726059619233, "loss": 0.1626, "step": 3308 }, { "epoch": 1.5444574095682615, "grad_norm": 0.84765625, "learning_rate": 0.0001885104326178593, "loss": 0.0871, "step": 3309 }, { "epoch": 1.5449241540256708, "grad_norm": 1.171875, "learning_rate": 0.00018850360273379847, "loss": 0.1492, "step": 3310 }, { "epoch": 1.5453908984830806, "grad_norm": 0.91015625, "learning_rate": 0.00018849677094415687, "loss": 0.1047, "step": 3311 }, { "epoch": 1.54585764294049, "grad_norm": 0.91796875, "learning_rate": 0.00018848993724908159, "loss": 0.1165, "step": 3312 }, { "epoch": 1.5463243873978998, "grad_norm": 1.109375, "learning_rate": 0.0001884831016487198, "loss": 0.1252, "step": 3313 }, { "epoch": 1.5467911318553091, "grad_norm": 1.0390625, "learning_rate": 0.00018847626414321864, "loss": 0.1311, "step": 3314 }, { "epoch": 1.547257876312719, "grad_norm": 0.828125, "learning_rate": 0.00018846942473272536, "loss": 0.1077, "step": 3315 }, { "epoch": 1.5477246207701283, "grad_norm": 0.76171875, "learning_rate": 0.0001884625834173872, "loss": 0.0914, "step": 3316 }, { "epoch": 1.548191365227538, "grad_norm": 0.95703125, "learning_rate": 0.00018845574019735146, "loss": 0.1385, "step": 3317 }, { "epoch": 1.5486581096849474, "grad_norm": 0.87890625, "learning_rate": 0.00018844889507276552, "loss": 0.0971, "step": 3318 }, { "epoch": 1.549124854142357, "grad_norm": 1.125, "learning_rate": 0.0001884420480437767, "loss": 0.1524, "step": 3319 }, { "epoch": 1.5495915985997666, "grad_norm": 0.796875, "learning_rate": 0.00018843519911053246, "loss": 0.1031, "step": 3320 }, { "epoch": 1.5500583430571762, "grad_norm": 1.015625, "learning_rate": 0.00018842834827318027, "loss": 0.116, "step": 3321 }, { "epoch": 1.5505250875145857, "grad_norm": 1.34375, "learning_rate": 0.00018842149553186763, "loss": 0.1601, "step": 3322 }, { "epoch": 1.5509918319719953, "grad_norm": 0.88671875, "learning_rate": 0.0001884146408867421, "loss": 0.1146, "step": 3323 }, { "epoch": 1.5514585764294049, "grad_norm": 1.2734375, "learning_rate": 0.00018840778433795122, "loss": 0.1566, "step": 3324 }, { "epoch": 1.5519253208868145, "grad_norm": 1.0546875, "learning_rate": 0.00018840092588564268, "loss": 0.1393, "step": 3325 }, { "epoch": 1.552392065344224, "grad_norm": 0.98828125, "learning_rate": 0.00018839406552996412, "loss": 0.1206, "step": 3326 }, { "epoch": 1.5528588098016336, "grad_norm": 1.0078125, "learning_rate": 0.00018838720327106324, "loss": 0.1275, "step": 3327 }, { "epoch": 1.5533255542590432, "grad_norm": 1.171875, "learning_rate": 0.00018838033910908782, "loss": 0.141, "step": 3328 }, { "epoch": 1.5537922987164527, "grad_norm": 0.8515625, "learning_rate": 0.00018837347304418564, "loss": 0.1265, "step": 3329 }, { "epoch": 1.5542590431738623, "grad_norm": 0.91796875, "learning_rate": 0.00018836660507650453, "loss": 0.118, "step": 3330 }, { "epoch": 1.554725787631272, "grad_norm": 0.83203125, "learning_rate": 0.00018835973520619237, "loss": 0.1141, "step": 3331 }, { "epoch": 1.5551925320886815, "grad_norm": 1.1875, "learning_rate": 0.00018835286343339707, "loss": 0.173, "step": 3332 }, { "epoch": 1.555659276546091, "grad_norm": 1.3203125, "learning_rate": 0.00018834598975826658, "loss": 0.1402, "step": 3333 }, { "epoch": 1.5561260210035006, "grad_norm": 1.046875, "learning_rate": 0.00018833911418094892, "loss": 0.144, "step": 3334 }, { "epoch": 1.5565927654609102, "grad_norm": 0.8984375, "learning_rate": 0.00018833223670159212, "loss": 0.1109, "step": 3335 }, { "epoch": 1.5570595099183198, "grad_norm": 0.89453125, "learning_rate": 0.00018832535732034428, "loss": 0.1168, "step": 3336 }, { "epoch": 1.5575262543757293, "grad_norm": 1.21875, "learning_rate": 0.00018831847603735345, "loss": 0.1735, "step": 3337 }, { "epoch": 1.557992998833139, "grad_norm": 0.9140625, "learning_rate": 0.00018831159285276786, "loss": 0.1464, "step": 3338 }, { "epoch": 1.5584597432905483, "grad_norm": 1.0546875, "learning_rate": 0.00018830470776673569, "loss": 0.1243, "step": 3339 }, { "epoch": 1.558926487747958, "grad_norm": 0.9375, "learning_rate": 0.00018829782077940522, "loss": 0.1271, "step": 3340 }, { "epoch": 1.5593932322053674, "grad_norm": 1.0078125, "learning_rate": 0.00018829093189092467, "loss": 0.1185, "step": 3341 }, { "epoch": 1.5598599766627772, "grad_norm": 1.0859375, "learning_rate": 0.0001882840411014424, "loss": 0.171, "step": 3342 }, { "epoch": 1.5603267211201866, "grad_norm": 1.15625, "learning_rate": 0.00018827714841110677, "loss": 0.1358, "step": 3343 }, { "epoch": 1.5607934655775964, "grad_norm": 0.75390625, "learning_rate": 0.00018827025382006621, "loss": 0.1083, "step": 3344 }, { "epoch": 1.5612602100350057, "grad_norm": 1.0859375, "learning_rate": 0.00018826335732846912, "loss": 0.1927, "step": 3345 }, { "epoch": 1.5617269544924155, "grad_norm": 0.98046875, "learning_rate": 0.000188256458936464, "loss": 0.1223, "step": 3346 }, { "epoch": 1.5621936989498248, "grad_norm": 0.8359375, "learning_rate": 0.00018824955864419943, "loss": 0.1362, "step": 3347 }, { "epoch": 1.5626604434072346, "grad_norm": 0.9609375, "learning_rate": 0.00018824265645182395, "loss": 0.1321, "step": 3348 }, { "epoch": 1.563127187864644, "grad_norm": 0.92578125, "learning_rate": 0.00018823575235948616, "loss": 0.0993, "step": 3349 }, { "epoch": 1.5635939323220538, "grad_norm": 0.84375, "learning_rate": 0.00018822884636733472, "loss": 0.1136, "step": 3350 }, { "epoch": 1.5640606767794631, "grad_norm": 0.93359375, "learning_rate": 0.00018822193847551835, "loss": 0.1354, "step": 3351 }, { "epoch": 1.564527421236873, "grad_norm": 0.94921875, "learning_rate": 0.00018821502868418574, "loss": 0.1226, "step": 3352 }, { "epoch": 1.5649941656942823, "grad_norm": 1.03125, "learning_rate": 0.00018820811699348568, "loss": 0.1318, "step": 3353 }, { "epoch": 1.565460910151692, "grad_norm": 0.953125, "learning_rate": 0.000188201203403567, "loss": 0.1255, "step": 3354 }, { "epoch": 1.5659276546091014, "grad_norm": 0.91796875, "learning_rate": 0.00018819428791457853, "loss": 0.1237, "step": 3355 }, { "epoch": 1.5663943990665112, "grad_norm": 0.83984375, "learning_rate": 0.0001881873705266692, "loss": 0.1004, "step": 3356 }, { "epoch": 1.5668611435239206, "grad_norm": 1.046875, "learning_rate": 0.00018818045123998795, "loss": 0.121, "step": 3357 }, { "epoch": 1.5673278879813304, "grad_norm": 0.92578125, "learning_rate": 0.00018817353005468374, "loss": 0.1035, "step": 3358 }, { "epoch": 1.5677946324387397, "grad_norm": 1.1015625, "learning_rate": 0.0001881666069709056, "loss": 0.1324, "step": 3359 }, { "epoch": 1.5682613768961493, "grad_norm": 1.2109375, "learning_rate": 0.0001881596819888026, "loss": 0.1788, "step": 3360 }, { "epoch": 1.5687281213535589, "grad_norm": 0.953125, "learning_rate": 0.0001881527551085238, "loss": 0.1213, "step": 3361 }, { "epoch": 1.5691948658109685, "grad_norm": 1.0703125, "learning_rate": 0.0001881458263302184, "loss": 0.1127, "step": 3362 }, { "epoch": 1.569661610268378, "grad_norm": 1.1640625, "learning_rate": 0.00018813889565403558, "loss": 0.1785, "step": 3363 }, { "epoch": 1.5701283547257876, "grad_norm": 0.95703125, "learning_rate": 0.0001881319630801245, "loss": 0.0996, "step": 3364 }, { "epoch": 1.5705950991831972, "grad_norm": 1.0, "learning_rate": 0.0001881250286086345, "loss": 0.1759, "step": 3365 }, { "epoch": 1.5710618436406067, "grad_norm": 1.0546875, "learning_rate": 0.00018811809223971484, "loss": 0.1573, "step": 3366 }, { "epoch": 1.5715285880980163, "grad_norm": 1.0234375, "learning_rate": 0.00018811115397351488, "loss": 0.1181, "step": 3367 }, { "epoch": 1.571995332555426, "grad_norm": 0.984375, "learning_rate": 0.00018810421381018402, "loss": 0.1182, "step": 3368 }, { "epoch": 1.5724620770128355, "grad_norm": 1.0703125, "learning_rate": 0.0001880972717498717, "loss": 0.1302, "step": 3369 }, { "epoch": 1.572928821470245, "grad_norm": 1.0078125, "learning_rate": 0.00018809032779272737, "loss": 0.1246, "step": 3370 }, { "epoch": 1.5733955659276546, "grad_norm": 0.80078125, "learning_rate": 0.0001880833819389005, "loss": 0.1094, "step": 3371 }, { "epoch": 1.5738623103850642, "grad_norm": 0.84765625, "learning_rate": 0.00018807643418854077, "loss": 0.0892, "step": 3372 }, { "epoch": 1.5743290548424738, "grad_norm": 0.9140625, "learning_rate": 0.00018806948454179763, "loss": 0.1304, "step": 3373 }, { "epoch": 1.5747957992998833, "grad_norm": 0.8828125, "learning_rate": 0.0001880625329988208, "loss": 0.1277, "step": 3374 }, { "epoch": 1.575262543757293, "grad_norm": 0.78515625, "learning_rate": 0.00018805557955975993, "loss": 0.0976, "step": 3375 }, { "epoch": 1.5757292882147025, "grad_norm": 0.97265625, "learning_rate": 0.0001880486242247647, "loss": 0.1364, "step": 3376 }, { "epoch": 1.576196032672112, "grad_norm": 1.0546875, "learning_rate": 0.0001880416669939849, "loss": 0.114, "step": 3377 }, { "epoch": 1.5766627771295216, "grad_norm": 0.95703125, "learning_rate": 0.00018803470786757034, "loss": 0.1136, "step": 3378 }, { "epoch": 1.5771295215869312, "grad_norm": 0.96875, "learning_rate": 0.00018802774684567087, "loss": 0.1024, "step": 3379 }, { "epoch": 1.5775962660443408, "grad_norm": 1.0859375, "learning_rate": 0.0001880207839284363, "loss": 0.1124, "step": 3380 }, { "epoch": 1.5780630105017504, "grad_norm": 0.98828125, "learning_rate": 0.0001880138191160166, "loss": 0.1393, "step": 3381 }, { "epoch": 1.5785297549591597, "grad_norm": 0.8203125, "learning_rate": 0.00018800685240856173, "loss": 0.1104, "step": 3382 }, { "epoch": 1.5789964994165695, "grad_norm": 1.203125, "learning_rate": 0.00018799988380622168, "loss": 0.1332, "step": 3383 }, { "epoch": 1.5794632438739789, "grad_norm": 0.9765625, "learning_rate": 0.00018799291330914652, "loss": 0.1461, "step": 3384 }, { "epoch": 1.5799299883313886, "grad_norm": 1.1484375, "learning_rate": 0.00018798594091748628, "loss": 0.1598, "step": 3385 }, { "epoch": 1.580396732788798, "grad_norm": 1.09375, "learning_rate": 0.0001879789666313911, "loss": 0.1022, "step": 3386 }, { "epoch": 1.5808634772462078, "grad_norm": 0.890625, "learning_rate": 0.00018797199045101116, "loss": 0.1063, "step": 3387 }, { "epoch": 1.5813302217036171, "grad_norm": 0.87890625, "learning_rate": 0.0001879650123764967, "loss": 0.1121, "step": 3388 }, { "epoch": 1.581796966161027, "grad_norm": 1.0078125, "learning_rate": 0.00018795803240799787, "loss": 0.1178, "step": 3389 }, { "epoch": 1.5822637106184363, "grad_norm": 0.87890625, "learning_rate": 0.00018795105054566501, "loss": 0.0857, "step": 3390 }, { "epoch": 1.582730455075846, "grad_norm": 1.09375, "learning_rate": 0.00018794406678964852, "loss": 0.1119, "step": 3391 }, { "epoch": 1.5831971995332554, "grad_norm": 1.15625, "learning_rate": 0.00018793708114009863, "loss": 0.137, "step": 3392 }, { "epoch": 1.5836639439906652, "grad_norm": 0.99609375, "learning_rate": 0.00018793009359716582, "loss": 0.1298, "step": 3393 }, { "epoch": 1.5841306884480746, "grad_norm": 1.0, "learning_rate": 0.00018792310416100057, "loss": 0.1173, "step": 3394 }, { "epoch": 1.5845974329054844, "grad_norm": 0.89453125, "learning_rate": 0.0001879161128317533, "loss": 0.1103, "step": 3395 }, { "epoch": 1.5850641773628937, "grad_norm": 1.03125, "learning_rate": 0.00018790911960957457, "loss": 0.1438, "step": 3396 }, { "epoch": 1.5855309218203035, "grad_norm": 1.171875, "learning_rate": 0.000187902124494615, "loss": 0.1464, "step": 3397 }, { "epoch": 1.5859976662777129, "grad_norm": 0.9140625, "learning_rate": 0.00018789512748702513, "loss": 0.1318, "step": 3398 }, { "epoch": 1.5864644107351227, "grad_norm": 1.0703125, "learning_rate": 0.00018788812858695563, "loss": 0.1228, "step": 3399 }, { "epoch": 1.586931155192532, "grad_norm": 1.09375, "learning_rate": 0.00018788112779455726, "loss": 0.1111, "step": 3400 }, { "epoch": 1.5873978996499416, "grad_norm": 0.921875, "learning_rate": 0.00018787412510998065, "loss": 0.1652, "step": 3401 }, { "epoch": 1.5878646441073512, "grad_norm": 0.90234375, "learning_rate": 0.00018786712053337664, "loss": 0.1257, "step": 3402 }, { "epoch": 1.5883313885647607, "grad_norm": 0.8828125, "learning_rate": 0.00018786011406489604, "loss": 0.1074, "step": 3403 }, { "epoch": 1.5887981330221703, "grad_norm": 0.91015625, "learning_rate": 0.00018785310570468972, "loss": 0.132, "step": 3404 }, { "epoch": 1.58926487747958, "grad_norm": 1.0234375, "learning_rate": 0.00018784609545290855, "loss": 0.1805, "step": 3405 }, { "epoch": 1.5897316219369895, "grad_norm": 0.91796875, "learning_rate": 0.00018783908330970346, "loss": 0.1233, "step": 3406 }, { "epoch": 1.590198366394399, "grad_norm": 0.9453125, "learning_rate": 0.00018783206927522548, "loss": 0.1518, "step": 3407 }, { "epoch": 1.5906651108518086, "grad_norm": 1.0234375, "learning_rate": 0.00018782505334962558, "loss": 0.1431, "step": 3408 }, { "epoch": 1.5911318553092182, "grad_norm": 0.953125, "learning_rate": 0.00018781803553305482, "loss": 0.1385, "step": 3409 }, { "epoch": 1.5915985997666278, "grad_norm": 0.9609375, "learning_rate": 0.00018781101582566434, "loss": 0.1273, "step": 3410 }, { "epoch": 1.5920653442240373, "grad_norm": 0.77734375, "learning_rate": 0.00018780399422760525, "loss": 0.1133, "step": 3411 }, { "epoch": 1.592532088681447, "grad_norm": 0.93359375, "learning_rate": 0.00018779697073902877, "loss": 0.1172, "step": 3412 }, { "epoch": 1.5929988331388565, "grad_norm": 0.69921875, "learning_rate": 0.00018778994536008609, "loss": 0.0939, "step": 3413 }, { "epoch": 1.593465577596266, "grad_norm": 0.921875, "learning_rate": 0.00018778291809092842, "loss": 0.0979, "step": 3414 }, { "epoch": 1.5939323220536756, "grad_norm": 1.296875, "learning_rate": 0.00018777588893170718, "loss": 0.1596, "step": 3415 }, { "epoch": 1.5943990665110852, "grad_norm": 1.0546875, "learning_rate": 0.00018776885788257368, "loss": 0.1494, "step": 3416 }, { "epoch": 1.5948658109684948, "grad_norm": 0.98046875, "learning_rate": 0.00018776182494367928, "loss": 0.142, "step": 3417 }, { "epoch": 1.5953325554259044, "grad_norm": 1.1484375, "learning_rate": 0.00018775479011517537, "loss": 0.1347, "step": 3418 }, { "epoch": 1.595799299883314, "grad_norm": 0.875, "learning_rate": 0.0001877477533972135, "loss": 0.1227, "step": 3419 }, { "epoch": 1.5962660443407235, "grad_norm": 1.296875, "learning_rate": 0.00018774071478994513, "loss": 0.1524, "step": 3420 }, { "epoch": 1.596732788798133, "grad_norm": 1.0625, "learning_rate": 0.00018773367429352186, "loss": 0.1503, "step": 3421 }, { "epoch": 1.5971995332555426, "grad_norm": 0.94921875, "learning_rate": 0.0001877266319080952, "loss": 0.1236, "step": 3422 }, { "epoch": 1.597666277712952, "grad_norm": 1.140625, "learning_rate": 0.00018771958763381683, "loss": 0.1544, "step": 3423 }, { "epoch": 1.5981330221703618, "grad_norm": 1.125, "learning_rate": 0.00018771254147083842, "loss": 0.1376, "step": 3424 }, { "epoch": 1.5985997666277711, "grad_norm": 0.8515625, "learning_rate": 0.00018770549341931165, "loss": 0.1075, "step": 3425 }, { "epoch": 1.599066511085181, "grad_norm": 0.9609375, "learning_rate": 0.0001876984434793883, "loss": 0.1232, "step": 3426 }, { "epoch": 1.5995332555425903, "grad_norm": 0.80078125, "learning_rate": 0.00018769139165122017, "loss": 0.1288, "step": 3427 }, { "epoch": 1.6, "grad_norm": 1.03125, "learning_rate": 0.00018768433793495907, "loss": 0.1331, "step": 3428 }, { "epoch": 1.6004667444574094, "grad_norm": 0.76171875, "learning_rate": 0.0001876772823307569, "loss": 0.0756, "step": 3429 }, { "epoch": 1.6009334889148192, "grad_norm": 1.1171875, "learning_rate": 0.00018767022483876558, "loss": 0.1273, "step": 3430 }, { "epoch": 1.6014002333722286, "grad_norm": 0.91796875, "learning_rate": 0.00018766316545913703, "loss": 0.1452, "step": 3431 }, { "epoch": 1.6018669778296384, "grad_norm": 0.9609375, "learning_rate": 0.00018765610419202325, "loss": 0.1117, "step": 3432 }, { "epoch": 1.6023337222870477, "grad_norm": 1.1171875, "learning_rate": 0.00018764904103757627, "loss": 0.1139, "step": 3433 }, { "epoch": 1.6028004667444575, "grad_norm": 1.1640625, "learning_rate": 0.00018764197599594823, "loss": 0.1248, "step": 3434 }, { "epoch": 1.6032672112018669, "grad_norm": 1.1015625, "learning_rate": 0.00018763490906729122, "loss": 0.1329, "step": 3435 }, { "epoch": 1.6037339556592767, "grad_norm": 1.078125, "learning_rate": 0.00018762784025175736, "loss": 0.1108, "step": 3436 }, { "epoch": 1.604200700116686, "grad_norm": 0.98046875, "learning_rate": 0.00018762076954949888, "loss": 0.1271, "step": 3437 }, { "epoch": 1.6046674445740958, "grad_norm": 1.03125, "learning_rate": 0.00018761369696066797, "loss": 0.09, "step": 3438 }, { "epoch": 1.6051341890315052, "grad_norm": 0.98046875, "learning_rate": 0.00018760662248541702, "loss": 0.1286, "step": 3439 }, { "epoch": 1.605600933488915, "grad_norm": 0.9921875, "learning_rate": 0.00018759954612389824, "loss": 0.1328, "step": 3440 }, { "epoch": 1.6060676779463243, "grad_norm": 1.0, "learning_rate": 0.0001875924678762641, "loss": 0.1163, "step": 3441 }, { "epoch": 1.6065344224037341, "grad_norm": 0.90625, "learning_rate": 0.00018758538774266687, "loss": 0.127, "step": 3442 }, { "epoch": 1.6070011668611435, "grad_norm": 1.0546875, "learning_rate": 0.00018757830572325909, "loss": 0.118, "step": 3443 }, { "epoch": 1.607467911318553, "grad_norm": 0.89453125, "learning_rate": 0.00018757122181819325, "loss": 0.1063, "step": 3444 }, { "epoch": 1.6079346557759626, "grad_norm": 0.9140625, "learning_rate": 0.00018756413602762182, "loss": 0.1236, "step": 3445 }, { "epoch": 1.6084014002333722, "grad_norm": 1.015625, "learning_rate": 0.00018755704835169738, "loss": 0.131, "step": 3446 }, { "epoch": 1.6088681446907818, "grad_norm": 1.0078125, "learning_rate": 0.00018754995879057253, "loss": 0.1402, "step": 3447 }, { "epoch": 1.6093348891481913, "grad_norm": 1.25, "learning_rate": 0.00018754286734439996, "loss": 0.1812, "step": 3448 }, { "epoch": 1.609801633605601, "grad_norm": 1.0703125, "learning_rate": 0.00018753577401333228, "loss": 0.1159, "step": 3449 }, { "epoch": 1.6102683780630105, "grad_norm": 0.82421875, "learning_rate": 0.00018752867879752233, "loss": 0.1026, "step": 3450 }, { "epoch": 1.61073512252042, "grad_norm": 0.9765625, "learning_rate": 0.00018752158169712274, "loss": 0.1228, "step": 3451 }, { "epoch": 1.6112018669778296, "grad_norm": 0.89453125, "learning_rate": 0.00018751448271228644, "loss": 0.118, "step": 3452 }, { "epoch": 1.6116686114352392, "grad_norm": 0.9296875, "learning_rate": 0.0001875073818431662, "loss": 0.1111, "step": 3453 }, { "epoch": 1.6121353558926488, "grad_norm": 1.0, "learning_rate": 0.00018750027908991497, "loss": 0.121, "step": 3454 }, { "epoch": 1.6126021003500584, "grad_norm": 0.890625, "learning_rate": 0.00018749317445268562, "loss": 0.0994, "step": 3455 }, { "epoch": 1.613068844807468, "grad_norm": 0.89453125, "learning_rate": 0.00018748606793163116, "loss": 0.1224, "step": 3456 }, { "epoch": 1.6135355892648775, "grad_norm": 0.9375, "learning_rate": 0.00018747895952690458, "loss": 0.089, "step": 3457 }, { "epoch": 1.614002333722287, "grad_norm": 0.9140625, "learning_rate": 0.00018747184923865895, "loss": 0.1202, "step": 3458 }, { "epoch": 1.6144690781796966, "grad_norm": 0.9140625, "learning_rate": 0.00018746473706704735, "loss": 0.1146, "step": 3459 }, { "epoch": 1.6149358226371062, "grad_norm": 1.1640625, "learning_rate": 0.00018745762301222292, "loss": 0.13, "step": 3460 }, { "epoch": 1.6154025670945158, "grad_norm": 0.875, "learning_rate": 0.00018745050707433882, "loss": 0.1004, "step": 3461 }, { "epoch": 1.6158693115519254, "grad_norm": 0.92578125, "learning_rate": 0.00018744338925354829, "loss": 0.1203, "step": 3462 }, { "epoch": 1.616336056009335, "grad_norm": 1.0390625, "learning_rate": 0.00018743626955000458, "loss": 0.1421, "step": 3463 }, { "epoch": 1.6168028004667443, "grad_norm": 1.0625, "learning_rate": 0.00018742914796386094, "loss": 0.1404, "step": 3464 }, { "epoch": 1.617269544924154, "grad_norm": 0.83984375, "learning_rate": 0.00018742202449527077, "loss": 0.103, "step": 3465 }, { "epoch": 1.6177362893815634, "grad_norm": 0.97265625, "learning_rate": 0.0001874148991443874, "loss": 0.1081, "step": 3466 }, { "epoch": 1.6182030338389732, "grad_norm": 0.91015625, "learning_rate": 0.00018740777191136426, "loss": 0.1117, "step": 3467 }, { "epoch": 1.6186697782963826, "grad_norm": 0.9765625, "learning_rate": 0.00018740064279635483, "loss": 0.1245, "step": 3468 }, { "epoch": 1.6191365227537924, "grad_norm": 1.2109375, "learning_rate": 0.00018739351179951257, "loss": 0.1549, "step": 3469 }, { "epoch": 1.6196032672112017, "grad_norm": 1.03125, "learning_rate": 0.00018738637892099104, "loss": 0.1213, "step": 3470 }, { "epoch": 1.6200700116686115, "grad_norm": 1.1171875, "learning_rate": 0.00018737924416094382, "loss": 0.1481, "step": 3471 }, { "epoch": 1.6205367561260209, "grad_norm": 1.015625, "learning_rate": 0.00018737210751952453, "loss": 0.1079, "step": 3472 }, { "epoch": 1.6210035005834307, "grad_norm": 0.7734375, "learning_rate": 0.0001873649689968868, "loss": 0.0891, "step": 3473 }, { "epoch": 1.62147024504084, "grad_norm": 0.88671875, "learning_rate": 0.00018735782859318438, "loss": 0.1318, "step": 3474 }, { "epoch": 1.6219369894982498, "grad_norm": 1.0, "learning_rate": 0.00018735068630857096, "loss": 0.13, "step": 3475 }, { "epoch": 1.6224037339556592, "grad_norm": 0.90234375, "learning_rate": 0.00018734354214320037, "loss": 0.1157, "step": 3476 }, { "epoch": 1.622870478413069, "grad_norm": 0.8671875, "learning_rate": 0.00018733639609722639, "loss": 0.1035, "step": 3477 }, { "epoch": 1.6233372228704783, "grad_norm": 0.921875, "learning_rate": 0.0001873292481708029, "loss": 0.0999, "step": 3478 }, { "epoch": 1.6238039673278881, "grad_norm": 0.8828125, "learning_rate": 0.0001873220983640838, "loss": 0.1551, "step": 3479 }, { "epoch": 1.6242707117852975, "grad_norm": 0.87890625, "learning_rate": 0.00018731494667722308, "loss": 0.1131, "step": 3480 }, { "epoch": 1.6247374562427073, "grad_norm": 0.94921875, "learning_rate": 0.00018730779311037466, "loss": 0.1192, "step": 3481 }, { "epoch": 1.6252042007001166, "grad_norm": 0.97265625, "learning_rate": 0.00018730063766369255, "loss": 0.1023, "step": 3482 }, { "epoch": 1.6256709451575264, "grad_norm": 0.90234375, "learning_rate": 0.00018729348033733086, "loss": 0.0854, "step": 3483 }, { "epoch": 1.6261376896149358, "grad_norm": 1.03125, "learning_rate": 0.00018728632113144373, "loss": 0.1187, "step": 3484 }, { "epoch": 1.6266044340723453, "grad_norm": 0.91015625, "learning_rate": 0.00018727916004618523, "loss": 0.0961, "step": 3485 }, { "epoch": 1.627071178529755, "grad_norm": 0.91015625, "learning_rate": 0.00018727199708170957, "loss": 0.093, "step": 3486 }, { "epoch": 1.6275379229871645, "grad_norm": 1.0546875, "learning_rate": 0.00018726483223817098, "loss": 0.1379, "step": 3487 }, { "epoch": 1.628004667444574, "grad_norm": 1.0390625, "learning_rate": 0.00018725766551572375, "loss": 0.0947, "step": 3488 }, { "epoch": 1.6284714119019836, "grad_norm": 0.90234375, "learning_rate": 0.00018725049691452214, "loss": 0.1178, "step": 3489 }, { "epoch": 1.6289381563593932, "grad_norm": 1.0234375, "learning_rate": 0.00018724332643472056, "loss": 0.1235, "step": 3490 }, { "epoch": 1.6294049008168028, "grad_norm": 0.9453125, "learning_rate": 0.00018723615407647334, "loss": 0.1087, "step": 3491 }, { "epoch": 1.6298716452742124, "grad_norm": 1.109375, "learning_rate": 0.00018722897983993491, "loss": 0.1111, "step": 3492 }, { "epoch": 1.630338389731622, "grad_norm": 0.7734375, "learning_rate": 0.00018722180372525982, "loss": 0.0701, "step": 3493 }, { "epoch": 1.6308051341890315, "grad_norm": 0.8984375, "learning_rate": 0.00018721462573260248, "loss": 0.1055, "step": 3494 }, { "epoch": 1.631271878646441, "grad_norm": 0.84765625, "learning_rate": 0.0001872074458621175, "loss": 0.1324, "step": 3495 }, { "epoch": 1.6317386231038506, "grad_norm": 1.2109375, "learning_rate": 0.00018720026411395946, "loss": 0.1787, "step": 3496 }, { "epoch": 1.6322053675612602, "grad_norm": 0.92578125, "learning_rate": 0.00018719308048828294, "loss": 0.099, "step": 3497 }, { "epoch": 1.6326721120186698, "grad_norm": 1.09375, "learning_rate": 0.0001871858949852427, "loss": 0.1315, "step": 3498 }, { "epoch": 1.6331388564760794, "grad_norm": 1.09375, "learning_rate": 0.00018717870760499337, "loss": 0.0942, "step": 3499 }, { "epoch": 1.633605600933489, "grad_norm": 0.8125, "learning_rate": 0.00018717151834768978, "loss": 0.1061, "step": 3500 }, { "epoch": 1.6340723453908985, "grad_norm": 1.0546875, "learning_rate": 0.00018716432721348664, "loss": 0.1113, "step": 3501 }, { "epoch": 1.634539089848308, "grad_norm": 1.0078125, "learning_rate": 0.00018715713420253884, "loss": 0.1471, "step": 3502 }, { "epoch": 1.6350058343057177, "grad_norm": 0.8671875, "learning_rate": 0.00018714993931500128, "loss": 0.1036, "step": 3503 }, { "epoch": 1.6354725787631272, "grad_norm": 0.92578125, "learning_rate": 0.00018714274255102878, "loss": 0.1013, "step": 3504 }, { "epoch": 1.6359393232205366, "grad_norm": 0.97265625, "learning_rate": 0.00018713554391077634, "loss": 0.1105, "step": 3505 }, { "epoch": 1.6364060676779464, "grad_norm": 1.2109375, "learning_rate": 0.00018712834339439896, "loss": 0.1498, "step": 3506 }, { "epoch": 1.6368728121353557, "grad_norm": 1.1640625, "learning_rate": 0.00018712114100205175, "loss": 0.1235, "step": 3507 }, { "epoch": 1.6373395565927655, "grad_norm": 0.7734375, "learning_rate": 0.00018711393673388963, "loss": 0.0877, "step": 3508 }, { "epoch": 1.6378063010501749, "grad_norm": 0.828125, "learning_rate": 0.00018710673059006783, "loss": 0.0917, "step": 3509 }, { "epoch": 1.6382730455075847, "grad_norm": 1.109375, "learning_rate": 0.00018709952257074149, "loss": 0.1262, "step": 3510 }, { "epoch": 1.638739789964994, "grad_norm": 0.95703125, "learning_rate": 0.00018709231267606576, "loss": 0.1259, "step": 3511 }, { "epoch": 1.6392065344224038, "grad_norm": 0.859375, "learning_rate": 0.00018708510090619592, "loss": 0.1114, "step": 3512 }, { "epoch": 1.6396732788798132, "grad_norm": 0.91796875, "learning_rate": 0.00018707788726128724, "loss": 0.1255, "step": 3513 }, { "epoch": 1.640140023337223, "grad_norm": 1.015625, "learning_rate": 0.00018707067174149498, "loss": 0.1133, "step": 3514 }, { "epoch": 1.6406067677946323, "grad_norm": 1.3125, "learning_rate": 0.0001870634543469746, "loss": 0.0997, "step": 3515 }, { "epoch": 1.6410735122520421, "grad_norm": 0.8828125, "learning_rate": 0.00018705623507788146, "loss": 0.1056, "step": 3516 }, { "epoch": 1.6415402567094515, "grad_norm": 0.8984375, "learning_rate": 0.00018704901393437097, "loss": 0.102, "step": 3517 }, { "epoch": 1.6420070011668613, "grad_norm": 1.046875, "learning_rate": 0.00018704179091659864, "loss": 0.126, "step": 3518 }, { "epoch": 1.6424737456242706, "grad_norm": 1.1796875, "learning_rate": 0.00018703456602471997, "loss": 0.1861, "step": 3519 }, { "epoch": 1.6429404900816804, "grad_norm": 1.1171875, "learning_rate": 0.00018702733925889052, "loss": 0.152, "step": 3520 }, { "epoch": 1.6434072345390898, "grad_norm": 1.0390625, "learning_rate": 0.0001870201106192659, "loss": 0.1251, "step": 3521 }, { "epoch": 1.6438739789964996, "grad_norm": 0.953125, "learning_rate": 0.00018701288010600177, "loss": 0.1242, "step": 3522 }, { "epoch": 1.644340723453909, "grad_norm": 0.859375, "learning_rate": 0.00018700564771925376, "loss": 0.0818, "step": 3523 }, { "epoch": 1.6448074679113187, "grad_norm": 0.8359375, "learning_rate": 0.00018699841345917764, "loss": 0.1194, "step": 3524 }, { "epoch": 1.645274212368728, "grad_norm": 0.984375, "learning_rate": 0.00018699117732592915, "loss": 0.0872, "step": 3525 }, { "epoch": 1.6457409568261376, "grad_norm": 0.97265625, "learning_rate": 0.00018698393931966407, "loss": 0.1181, "step": 3526 }, { "epoch": 1.6462077012835472, "grad_norm": 0.8203125, "learning_rate": 0.00018697669944053828, "loss": 0.1007, "step": 3527 }, { "epoch": 1.6466744457409568, "grad_norm": 0.984375, "learning_rate": 0.00018696945768870764, "loss": 0.1032, "step": 3528 }, { "epoch": 1.6471411901983664, "grad_norm": 0.984375, "learning_rate": 0.00018696221406432813, "loss": 0.107, "step": 3529 }, { "epoch": 1.647607934655776, "grad_norm": 0.95703125, "learning_rate": 0.0001869549685675556, "loss": 0.1077, "step": 3530 }, { "epoch": 1.6480746791131855, "grad_norm": 1.3359375, "learning_rate": 0.00018694772119854617, "loss": 0.1684, "step": 3531 }, { "epoch": 1.648541423570595, "grad_norm": 0.75390625, "learning_rate": 0.00018694047195745578, "loss": 0.0765, "step": 3532 }, { "epoch": 1.6490081680280047, "grad_norm": 0.7890625, "learning_rate": 0.0001869332208444406, "loss": 0.0813, "step": 3533 }, { "epoch": 1.6494749124854142, "grad_norm": 1.015625, "learning_rate": 0.00018692596785965674, "loss": 0.1289, "step": 3534 }, { "epoch": 1.6499416569428238, "grad_norm": 0.875, "learning_rate": 0.0001869187130032603, "loss": 0.0869, "step": 3535 }, { "epoch": 1.6504084014002334, "grad_norm": 1.09375, "learning_rate": 0.00018691145627540757, "loss": 0.0939, "step": 3536 }, { "epoch": 1.650875145857643, "grad_norm": 1.046875, "learning_rate": 0.0001869041976762547, "loss": 0.1406, "step": 3537 }, { "epoch": 1.6513418903150525, "grad_norm": 1.0234375, "learning_rate": 0.00018689693720595808, "loss": 0.1091, "step": 3538 }, { "epoch": 1.651808634772462, "grad_norm": 1.1328125, "learning_rate": 0.000186889674864674, "loss": 0.1147, "step": 3539 }, { "epoch": 1.6522753792298717, "grad_norm": 1.109375, "learning_rate": 0.00018688241065255878, "loss": 0.1437, "step": 3540 }, { "epoch": 1.6527421236872812, "grad_norm": 0.80859375, "learning_rate": 0.00018687514456976886, "loss": 0.0942, "step": 3541 }, { "epoch": 1.6532088681446908, "grad_norm": 0.90234375, "learning_rate": 0.00018686787661646072, "loss": 0.1087, "step": 3542 }, { "epoch": 1.6536756126021004, "grad_norm": 1.0625, "learning_rate": 0.0001868606067927908, "loss": 0.1341, "step": 3543 }, { "epoch": 1.65414235705951, "grad_norm": 0.91015625, "learning_rate": 0.00018685333509891564, "loss": 0.1126, "step": 3544 }, { "epoch": 1.6546091015169195, "grad_norm": 0.98046875, "learning_rate": 0.0001868460615349918, "loss": 0.1407, "step": 3545 }, { "epoch": 1.655075845974329, "grad_norm": 1.046875, "learning_rate": 0.00018683878610117592, "loss": 0.1398, "step": 3546 }, { "epoch": 1.6555425904317387, "grad_norm": 0.94140625, "learning_rate": 0.0001868315087976246, "loss": 0.0965, "step": 3547 }, { "epoch": 1.656009334889148, "grad_norm": 0.86328125, "learning_rate": 0.0001868242296244946, "loss": 0.0853, "step": 3548 }, { "epoch": 1.6564760793465578, "grad_norm": 0.84375, "learning_rate": 0.00018681694858194256, "loss": 0.0973, "step": 3549 }, { "epoch": 1.6569428238039672, "grad_norm": 0.84375, "learning_rate": 0.0001868096656701253, "loss": 0.0998, "step": 3550 }, { "epoch": 1.657409568261377, "grad_norm": 0.94921875, "learning_rate": 0.00018680238088919962, "loss": 0.1244, "step": 3551 }, { "epoch": 1.6578763127187863, "grad_norm": 1.2109375, "learning_rate": 0.0001867950942393224, "loss": 0.1234, "step": 3552 }, { "epoch": 1.6583430571761961, "grad_norm": 0.91796875, "learning_rate": 0.00018678780572065046, "loss": 0.1044, "step": 3553 }, { "epoch": 1.6588098016336055, "grad_norm": 0.98828125, "learning_rate": 0.00018678051533334078, "loss": 0.1419, "step": 3554 }, { "epoch": 1.6592765460910153, "grad_norm": 1.2578125, "learning_rate": 0.00018677322307755032, "loss": 0.1454, "step": 3555 }, { "epoch": 1.6597432905484246, "grad_norm": 0.9375, "learning_rate": 0.0001867659289534361, "loss": 0.0975, "step": 3556 }, { "epoch": 1.6602100350058344, "grad_norm": 0.73828125, "learning_rate": 0.00018675863296115514, "loss": 0.0928, "step": 3557 }, { "epoch": 1.6606767794632438, "grad_norm": 1.0234375, "learning_rate": 0.00018675133510086457, "loss": 0.1004, "step": 3558 }, { "epoch": 1.6611435239206536, "grad_norm": 1.0703125, "learning_rate": 0.00018674403537272152, "loss": 0.1357, "step": 3559 }, { "epoch": 1.661610268378063, "grad_norm": 0.84375, "learning_rate": 0.00018673673377688312, "loss": 0.0829, "step": 3560 }, { "epoch": 1.6620770128354727, "grad_norm": 0.890625, "learning_rate": 0.0001867294303135066, "loss": 0.1016, "step": 3561 }, { "epoch": 1.662543757292882, "grad_norm": 0.921875, "learning_rate": 0.00018672212498274923, "loss": 0.1184, "step": 3562 }, { "epoch": 1.6630105017502919, "grad_norm": 0.92578125, "learning_rate": 0.0001867148177847683, "loss": 0.0973, "step": 3563 }, { "epoch": 1.6634772462077012, "grad_norm": 0.91015625, "learning_rate": 0.00018670750871972112, "loss": 0.1038, "step": 3564 }, { "epoch": 1.663943990665111, "grad_norm": 0.8671875, "learning_rate": 0.00018670019778776507, "loss": 0.1071, "step": 3565 }, { "epoch": 1.6644107351225204, "grad_norm": 0.88671875, "learning_rate": 0.00018669288498905756, "loss": 0.0918, "step": 3566 }, { "epoch": 1.6648774795799302, "grad_norm": 0.9140625, "learning_rate": 0.00018668557032375607, "loss": 0.1077, "step": 3567 }, { "epoch": 1.6653442240373395, "grad_norm": 0.9140625, "learning_rate": 0.00018667825379201807, "loss": 0.1125, "step": 3568 }, { "epoch": 1.665810968494749, "grad_norm": 0.8203125, "learning_rate": 0.00018667093539400108, "loss": 0.0886, "step": 3569 }, { "epoch": 1.6662777129521587, "grad_norm": 1.0703125, "learning_rate": 0.00018666361512986268, "loss": 0.1095, "step": 3570 }, { "epoch": 1.6667444574095682, "grad_norm": 0.98046875, "learning_rate": 0.00018665629299976052, "loss": 0.1078, "step": 3571 }, { "epoch": 1.6672112018669778, "grad_norm": 0.8984375, "learning_rate": 0.0001866489690038522, "loss": 0.0694, "step": 3572 }, { "epoch": 1.6676779463243874, "grad_norm": 1.0, "learning_rate": 0.00018664164314229545, "loss": 0.1241, "step": 3573 }, { "epoch": 1.668144690781797, "grad_norm": 0.89453125, "learning_rate": 0.00018663431541524799, "loss": 0.0936, "step": 3574 }, { "epoch": 1.6686114352392065, "grad_norm": 1.109375, "learning_rate": 0.00018662698582286758, "loss": 0.1189, "step": 3575 }, { "epoch": 1.669078179696616, "grad_norm": 0.9140625, "learning_rate": 0.00018661965436531208, "loss": 0.0966, "step": 3576 }, { "epoch": 1.6695449241540257, "grad_norm": 0.94921875, "learning_rate": 0.0001866123210427393, "loss": 0.1019, "step": 3577 }, { "epoch": 1.6700116686114352, "grad_norm": 0.96875, "learning_rate": 0.00018660498585530716, "loss": 0.0984, "step": 3578 }, { "epoch": 1.6704784130688448, "grad_norm": 0.9921875, "learning_rate": 0.00018659764880317358, "loss": 0.0938, "step": 3579 }, { "epoch": 1.6709451575262544, "grad_norm": 0.96484375, "learning_rate": 0.00018659030988649652, "loss": 0.0961, "step": 3580 }, { "epoch": 1.671411901983664, "grad_norm": 1.0234375, "learning_rate": 0.00018658296910543405, "loss": 0.0865, "step": 3581 }, { "epoch": 1.6718786464410735, "grad_norm": 0.9765625, "learning_rate": 0.00018657562646014414, "loss": 0.0997, "step": 3582 }, { "epoch": 1.672345390898483, "grad_norm": 1.109375, "learning_rate": 0.00018656828195078498, "loss": 0.0884, "step": 3583 }, { "epoch": 1.6728121353558927, "grad_norm": 0.95703125, "learning_rate": 0.00018656093557751463, "loss": 0.135, "step": 3584 }, { "epoch": 1.6732788798133023, "grad_norm": 1.0234375, "learning_rate": 0.00018655358734049132, "loss": 0.1157, "step": 3585 }, { "epoch": 1.6737456242707118, "grad_norm": 0.96484375, "learning_rate": 0.00018654623723987325, "loss": 0.1034, "step": 3586 }, { "epoch": 1.6742123687281214, "grad_norm": 1.1171875, "learning_rate": 0.00018653888527581867, "loss": 0.1191, "step": 3587 }, { "epoch": 1.674679113185531, "grad_norm": 0.96484375, "learning_rate": 0.00018653153144848584, "loss": 0.1135, "step": 3588 }, { "epoch": 1.6751458576429403, "grad_norm": 1.2265625, "learning_rate": 0.00018652417575803316, "loss": 0.1553, "step": 3589 }, { "epoch": 1.6756126021003501, "grad_norm": 1.109375, "learning_rate": 0.00018651681820461895, "loss": 0.1416, "step": 3590 }, { "epoch": 1.6760793465577595, "grad_norm": 0.99609375, "learning_rate": 0.00018650945878840168, "loss": 0.1154, "step": 3591 }, { "epoch": 1.6765460910151693, "grad_norm": 0.84375, "learning_rate": 0.00018650209750953973, "loss": 0.0871, "step": 3592 }, { "epoch": 1.6770128354725786, "grad_norm": 0.94921875, "learning_rate": 0.00018649473436819166, "loss": 0.112, "step": 3593 }, { "epoch": 1.6774795799299884, "grad_norm": 0.9609375, "learning_rate": 0.00018648736936451604, "loss": 0.108, "step": 3594 }, { "epoch": 1.6779463243873978, "grad_norm": 0.96875, "learning_rate": 0.00018648000249867134, "loss": 0.1284, "step": 3595 }, { "epoch": 1.6784130688448076, "grad_norm": 1.015625, "learning_rate": 0.00018647263377081626, "loss": 0.1165, "step": 3596 }, { "epoch": 1.678879813302217, "grad_norm": 0.953125, "learning_rate": 0.00018646526318110947, "loss": 0.0901, "step": 3597 }, { "epoch": 1.6793465577596267, "grad_norm": 0.859375, "learning_rate": 0.00018645789072970956, "loss": 0.096, "step": 3598 }, { "epoch": 1.679813302217036, "grad_norm": 0.828125, "learning_rate": 0.00018645051641677537, "loss": 0.0874, "step": 3599 }, { "epoch": 1.6802800466744459, "grad_norm": 0.91796875, "learning_rate": 0.00018644314024246565, "loss": 0.1144, "step": 3600 }, { "epoch": 1.6807467911318552, "grad_norm": 0.89453125, "learning_rate": 0.0001864357622069392, "loss": 0.0933, "step": 3601 }, { "epoch": 1.681213535589265, "grad_norm": 1.0078125, "learning_rate": 0.00018642838231035488, "loss": 0.0965, "step": 3602 }, { "epoch": 1.6816802800466744, "grad_norm": 0.859375, "learning_rate": 0.00018642100055287161, "loss": 0.0946, "step": 3603 }, { "epoch": 1.6821470245040842, "grad_norm": 2.3125, "learning_rate": 0.0001864136169346483, "loss": 0.1412, "step": 3604 }, { "epoch": 1.6826137689614935, "grad_norm": 1.0078125, "learning_rate": 0.00018640623145584396, "loss": 0.0988, "step": 3605 }, { "epoch": 1.6830805134189033, "grad_norm": 0.921875, "learning_rate": 0.00018639884411661756, "loss": 0.1067, "step": 3606 }, { "epoch": 1.6835472578763127, "grad_norm": 0.97265625, "learning_rate": 0.0001863914549171282, "loss": 0.0917, "step": 3607 }, { "epoch": 1.6840140023337224, "grad_norm": 0.89453125, "learning_rate": 0.00018638406385753495, "loss": 0.1246, "step": 3608 }, { "epoch": 1.6844807467911318, "grad_norm": 1.015625, "learning_rate": 0.00018637667093799698, "loss": 0.124, "step": 3609 }, { "epoch": 1.6849474912485414, "grad_norm": 1.1875, "learning_rate": 0.00018636927615867343, "loss": 0.1136, "step": 3610 }, { "epoch": 1.685414235705951, "grad_norm": 1.0703125, "learning_rate": 0.00018636187951972356, "loss": 0.1301, "step": 3611 }, { "epoch": 1.6858809801633605, "grad_norm": 0.8828125, "learning_rate": 0.00018635448102130657, "loss": 0.0894, "step": 3612 }, { "epoch": 1.68634772462077, "grad_norm": 1.0546875, "learning_rate": 0.0001863470806635818, "loss": 0.1527, "step": 3613 }, { "epoch": 1.6868144690781797, "grad_norm": 0.85546875, "learning_rate": 0.0001863396784467086, "loss": 0.0736, "step": 3614 }, { "epoch": 1.6872812135355892, "grad_norm": 0.91015625, "learning_rate": 0.00018633227437084632, "loss": 0.1101, "step": 3615 }, { "epoch": 1.6877479579929988, "grad_norm": 0.98046875, "learning_rate": 0.0001863248684361544, "loss": 0.115, "step": 3616 }, { "epoch": 1.6882147024504084, "grad_norm": 0.93359375, "learning_rate": 0.00018631746064279225, "loss": 0.0881, "step": 3617 }, { "epoch": 1.688681446907818, "grad_norm": 0.99609375, "learning_rate": 0.00018631005099091944, "loss": 0.1193, "step": 3618 }, { "epoch": 1.6891481913652275, "grad_norm": 1.1953125, "learning_rate": 0.00018630263948069543, "loss": 0.152, "step": 3619 }, { "epoch": 1.689614935822637, "grad_norm": 1.0703125, "learning_rate": 0.00018629522611227986, "loss": 0.1064, "step": 3620 }, { "epoch": 1.6900816802800467, "grad_norm": 0.8125, "learning_rate": 0.00018628781088583232, "loss": 0.0914, "step": 3621 }, { "epoch": 1.6905484247374563, "grad_norm": 0.80078125, "learning_rate": 0.0001862803938015125, "loss": 0.1054, "step": 3622 }, { "epoch": 1.6910151691948658, "grad_norm": 0.921875, "learning_rate": 0.00018627297485948, "loss": 0.0978, "step": 3623 }, { "epoch": 1.6914819136522754, "grad_norm": 0.8984375, "learning_rate": 0.00018626555405989466, "loss": 0.1098, "step": 3624 }, { "epoch": 1.691948658109685, "grad_norm": 0.71875, "learning_rate": 0.0001862581314029163, "loss": 0.0979, "step": 3625 }, { "epoch": 1.6924154025670946, "grad_norm": 0.765625, "learning_rate": 0.00018625070688870458, "loss": 0.0754, "step": 3626 }, { "epoch": 1.6928821470245041, "grad_norm": 0.8671875, "learning_rate": 0.00018624328051741946, "loss": 0.0875, "step": 3627 }, { "epoch": 1.6933488914819137, "grad_norm": 0.953125, "learning_rate": 0.00018623585228922084, "loss": 0.112, "step": 3628 }, { "epoch": 1.6938156359393233, "grad_norm": 1.2421875, "learning_rate": 0.00018622842220426865, "loss": 0.1529, "step": 3629 }, { "epoch": 1.6942823803967326, "grad_norm": 0.81640625, "learning_rate": 0.00018622099026272283, "loss": 0.084, "step": 3630 }, { "epoch": 1.6947491248541424, "grad_norm": 0.984375, "learning_rate": 0.00018621355646474347, "loss": 0.1358, "step": 3631 }, { "epoch": 1.6952158693115518, "grad_norm": 1.0703125, "learning_rate": 0.0001862061208104905, "loss": 0.1399, "step": 3632 }, { "epoch": 1.6956826137689616, "grad_norm": 0.92578125, "learning_rate": 0.0001861986833001242, "loss": 0.0766, "step": 3633 }, { "epoch": 1.696149358226371, "grad_norm": 1.1015625, "learning_rate": 0.00018619124393380455, "loss": 0.1123, "step": 3634 }, { "epoch": 1.6966161026837807, "grad_norm": 1.0390625, "learning_rate": 0.00018618380271169177, "loss": 0.1094, "step": 3635 }, { "epoch": 1.69708284714119, "grad_norm": 0.85546875, "learning_rate": 0.00018617635963394613, "loss": 0.068, "step": 3636 }, { "epoch": 1.6975495915985999, "grad_norm": 1.2109375, "learning_rate": 0.00018616891470072785, "loss": 0.1002, "step": 3637 }, { "epoch": 1.6980163360560092, "grad_norm": 1.234375, "learning_rate": 0.00018616146791219725, "loss": 0.1179, "step": 3638 }, { "epoch": 1.698483080513419, "grad_norm": 0.9296875, "learning_rate": 0.00018615401926851463, "loss": 0.1017, "step": 3639 }, { "epoch": 1.6989498249708284, "grad_norm": 0.8515625, "learning_rate": 0.00018614656876984035, "loss": 0.1034, "step": 3640 }, { "epoch": 1.6994165694282382, "grad_norm": 1.0625, "learning_rate": 0.0001861391164163349, "loss": 0.1159, "step": 3641 }, { "epoch": 1.6998833138856475, "grad_norm": 0.95703125, "learning_rate": 0.00018613166220815872, "loss": 0.1267, "step": 3642 }, { "epoch": 1.7003500583430573, "grad_norm": 1.015625, "learning_rate": 0.00018612420614547227, "loss": 0.0969, "step": 3643 }, { "epoch": 1.7008168028004667, "grad_norm": 1.3671875, "learning_rate": 0.00018611674822843608, "loss": 0.1118, "step": 3644 }, { "epoch": 1.7012835472578764, "grad_norm": 0.82421875, "learning_rate": 0.00018610928845721078, "loss": 0.0793, "step": 3645 }, { "epoch": 1.7017502917152858, "grad_norm": 0.8359375, "learning_rate": 0.00018610182683195695, "loss": 0.114, "step": 3646 }, { "epoch": 1.7022170361726956, "grad_norm": 0.98046875, "learning_rate": 0.00018609436335283524, "loss": 0.1234, "step": 3647 }, { "epoch": 1.702683780630105, "grad_norm": 0.9140625, "learning_rate": 0.00018608689802000639, "loss": 0.086, "step": 3648 }, { "epoch": 1.7031505250875147, "grad_norm": 1.0078125, "learning_rate": 0.0001860794308336311, "loss": 0.1074, "step": 3649 }, { "epoch": 1.703617269544924, "grad_norm": 0.8984375, "learning_rate": 0.00018607196179387016, "loss": 0.1074, "step": 3650 }, { "epoch": 1.7040840140023337, "grad_norm": 1.2109375, "learning_rate": 0.0001860644909008844, "loss": 0.1649, "step": 3651 }, { "epoch": 1.7045507584597432, "grad_norm": 0.98046875, "learning_rate": 0.00018605701815483462, "loss": 0.1171, "step": 3652 }, { "epoch": 1.7050175029171528, "grad_norm": 1.1015625, "learning_rate": 0.0001860495435558818, "loss": 0.1151, "step": 3653 }, { "epoch": 1.7054842473745624, "grad_norm": 1.1328125, "learning_rate": 0.00018604206710418682, "loss": 0.1371, "step": 3654 }, { "epoch": 1.705950991831972, "grad_norm": 1.1484375, "learning_rate": 0.00018603458879991064, "loss": 0.1308, "step": 3655 }, { "epoch": 1.7064177362893815, "grad_norm": 0.98046875, "learning_rate": 0.00018602710864321434, "loss": 0.1181, "step": 3656 }, { "epoch": 1.706884480746791, "grad_norm": 0.890625, "learning_rate": 0.00018601962663425894, "loss": 0.0837, "step": 3657 }, { "epoch": 1.7073512252042007, "grad_norm": 1.2109375, "learning_rate": 0.00018601214277320552, "loss": 0.1602, "step": 3658 }, { "epoch": 1.7078179696616103, "grad_norm": 1.0, "learning_rate": 0.00018600465706021524, "loss": 0.1126, "step": 3659 }, { "epoch": 1.7082847141190198, "grad_norm": 1.0625, "learning_rate": 0.0001859971694954493, "loss": 0.1395, "step": 3660 }, { "epoch": 1.7087514585764294, "grad_norm": 0.87109375, "learning_rate": 0.00018598968007906886, "loss": 0.0905, "step": 3661 }, { "epoch": 1.709218203033839, "grad_norm": 0.87109375, "learning_rate": 0.00018598218881123518, "loss": 0.1079, "step": 3662 }, { "epoch": 1.7096849474912486, "grad_norm": 0.7890625, "learning_rate": 0.00018597469569210962, "loss": 0.1126, "step": 3663 }, { "epoch": 1.7101516919486581, "grad_norm": 0.93359375, "learning_rate": 0.00018596720072185346, "loss": 0.1175, "step": 3664 }, { "epoch": 1.7106184364060677, "grad_norm": 0.96875, "learning_rate": 0.00018595970390062805, "loss": 0.0989, "step": 3665 }, { "epoch": 1.7110851808634773, "grad_norm": 0.82421875, "learning_rate": 0.00018595220522859486, "loss": 0.1003, "step": 3666 }, { "epoch": 1.7115519253208868, "grad_norm": 1.109375, "learning_rate": 0.00018594470470591535, "loss": 0.1117, "step": 3667 }, { "epoch": 1.7120186697782964, "grad_norm": 1.2578125, "learning_rate": 0.00018593720233275096, "loss": 0.1645, "step": 3668 }, { "epoch": 1.712485414235706, "grad_norm": 0.97265625, "learning_rate": 0.00018592969810926326, "loss": 0.1166, "step": 3669 }, { "epoch": 1.7129521586931156, "grad_norm": 0.83984375, "learning_rate": 0.00018592219203561382, "loss": 0.0914, "step": 3670 }, { "epoch": 1.7134189031505251, "grad_norm": 1.09375, "learning_rate": 0.00018591468411196426, "loss": 0.1366, "step": 3671 }, { "epoch": 1.7138856476079347, "grad_norm": 1.0546875, "learning_rate": 0.00018590717433847624, "loss": 0.1388, "step": 3672 }, { "epoch": 1.714352392065344, "grad_norm": 0.7265625, "learning_rate": 0.00018589966271531142, "loss": 0.0812, "step": 3673 }, { "epoch": 1.7148191365227539, "grad_norm": 1.0390625, "learning_rate": 0.00018589214924263157, "loss": 0.1273, "step": 3674 }, { "epoch": 1.7152858809801632, "grad_norm": 1.2109375, "learning_rate": 0.00018588463392059843, "loss": 0.1298, "step": 3675 }, { "epoch": 1.715752625437573, "grad_norm": 1.1015625, "learning_rate": 0.00018587711674937385, "loss": 0.1253, "step": 3676 }, { "epoch": 1.7162193698949824, "grad_norm": 0.94921875, "learning_rate": 0.00018586959772911964, "loss": 0.0957, "step": 3677 }, { "epoch": 1.7166861143523922, "grad_norm": 1.0703125, "learning_rate": 0.0001858620768599977, "loss": 0.0978, "step": 3678 }, { "epoch": 1.7171528588098015, "grad_norm": 0.83203125, "learning_rate": 0.00018585455414217003, "loss": 0.0942, "step": 3679 }, { "epoch": 1.7176196032672113, "grad_norm": 0.9375, "learning_rate": 0.00018584702957579854, "loss": 0.101, "step": 3680 }, { "epoch": 1.7180863477246207, "grad_norm": 1.1953125, "learning_rate": 0.00018583950316104523, "loss": 0.1299, "step": 3681 }, { "epoch": 1.7185530921820305, "grad_norm": 0.859375, "learning_rate": 0.00018583197489807223, "loss": 0.087, "step": 3682 }, { "epoch": 1.7190198366394398, "grad_norm": 1.21875, "learning_rate": 0.00018582444478704152, "loss": 0.0805, "step": 3683 }, { "epoch": 1.7194865810968496, "grad_norm": 0.98046875, "learning_rate": 0.0001858169128281153, "loss": 0.0869, "step": 3684 }, { "epoch": 1.719953325554259, "grad_norm": 0.9296875, "learning_rate": 0.00018580937902145576, "loss": 0.0941, "step": 3685 }, { "epoch": 1.7204200700116687, "grad_norm": 0.94140625, "learning_rate": 0.00018580184336722505, "loss": 0.0925, "step": 3686 }, { "epoch": 1.720886814469078, "grad_norm": 1.1328125, "learning_rate": 0.0001857943058655855, "loss": 0.0949, "step": 3687 }, { "epoch": 1.721353558926488, "grad_norm": 0.9921875, "learning_rate": 0.0001857867665166993, "loss": 0.1011, "step": 3688 }, { "epoch": 1.7218203033838972, "grad_norm": 1.1171875, "learning_rate": 0.00018577922532072886, "loss": 0.0884, "step": 3689 }, { "epoch": 1.722287047841307, "grad_norm": 1.15625, "learning_rate": 0.00018577168227783654, "loss": 0.1166, "step": 3690 }, { "epoch": 1.7227537922987164, "grad_norm": 1.0078125, "learning_rate": 0.0001857641373881847, "loss": 0.0932, "step": 3691 }, { "epoch": 1.7232205367561262, "grad_norm": 1.203125, "learning_rate": 0.00018575659065193584, "loss": 0.0996, "step": 3692 }, { "epoch": 1.7236872812135355, "grad_norm": 0.9375, "learning_rate": 0.00018574904206925243, "loss": 0.1152, "step": 3693 }, { "epoch": 1.7241540256709451, "grad_norm": 1.0, "learning_rate": 0.00018574149164029703, "loss": 0.0994, "step": 3694 }, { "epoch": 1.7246207701283547, "grad_norm": 1.1953125, "learning_rate": 0.00018573393936523216, "loss": 0.1292, "step": 3695 }, { "epoch": 1.7250875145857643, "grad_norm": 0.984375, "learning_rate": 0.00018572638524422047, "loss": 0.1135, "step": 3696 }, { "epoch": 1.7255542590431738, "grad_norm": 1.0234375, "learning_rate": 0.00018571882927742456, "loss": 0.1447, "step": 3697 }, { "epoch": 1.7260210035005834, "grad_norm": 1.0234375, "learning_rate": 0.00018571127146500718, "loss": 0.1479, "step": 3698 }, { "epoch": 1.726487747957993, "grad_norm": 0.84765625, "learning_rate": 0.000185703711807131, "loss": 0.102, "step": 3699 }, { "epoch": 1.7269544924154026, "grad_norm": 1.0, "learning_rate": 0.00018569615030395885, "loss": 0.1398, "step": 3700 }, { "epoch": 1.7274212368728121, "grad_norm": 0.75, "learning_rate": 0.00018568858695565345, "loss": 0.0915, "step": 3701 }, { "epoch": 1.7278879813302217, "grad_norm": 0.95703125, "learning_rate": 0.00018568102176237775, "loss": 0.1029, "step": 3702 }, { "epoch": 1.7283547257876313, "grad_norm": 0.99609375, "learning_rate": 0.00018567345472429456, "loss": 0.1234, "step": 3703 }, { "epoch": 1.7288214702450408, "grad_norm": 1.09375, "learning_rate": 0.00018566588584156683, "loss": 0.157, "step": 3704 }, { "epoch": 1.7292882147024504, "grad_norm": 0.9296875, "learning_rate": 0.00018565831511435757, "loss": 0.1155, "step": 3705 }, { "epoch": 1.72975495915986, "grad_norm": 0.96484375, "learning_rate": 0.0001856507425428297, "loss": 0.0976, "step": 3706 }, { "epoch": 1.7302217036172696, "grad_norm": 0.95703125, "learning_rate": 0.00018564316812714636, "loss": 0.0977, "step": 3707 }, { "epoch": 1.7306884480746791, "grad_norm": 0.87109375, "learning_rate": 0.00018563559186747056, "loss": 0.1068, "step": 3708 }, { "epoch": 1.7311551925320887, "grad_norm": 1.0078125, "learning_rate": 0.00018562801376396544, "loss": 0.0898, "step": 3709 }, { "epoch": 1.7316219369894983, "grad_norm": 0.859375, "learning_rate": 0.00018562043381679418, "loss": 0.0868, "step": 3710 }, { "epoch": 1.7320886814469079, "grad_norm": 1.1484375, "learning_rate": 0.00018561285202611997, "loss": 0.1146, "step": 3711 }, { "epoch": 1.7325554259043174, "grad_norm": 1.171875, "learning_rate": 0.00018560526839210608, "loss": 0.1063, "step": 3712 }, { "epoch": 1.733022170361727, "grad_norm": 0.76953125, "learning_rate": 0.0001855976829149158, "loss": 0.0681, "step": 3713 }, { "epoch": 1.7334889148191364, "grad_norm": 0.96875, "learning_rate": 0.00018559009559471242, "loss": 0.1153, "step": 3714 }, { "epoch": 1.7339556592765462, "grad_norm": 1.03125, "learning_rate": 0.00018558250643165933, "loss": 0.1109, "step": 3715 }, { "epoch": 1.7344224037339555, "grad_norm": 0.8359375, "learning_rate": 0.0001855749154259199, "loss": 0.0992, "step": 3716 }, { "epoch": 1.7348891481913653, "grad_norm": 1.0, "learning_rate": 0.0001855673225776576, "loss": 0.1373, "step": 3717 }, { "epoch": 1.7353558926487747, "grad_norm": 1.1171875, "learning_rate": 0.00018555972788703593, "loss": 0.1075, "step": 3718 }, { "epoch": 1.7358226371061845, "grad_norm": 0.93359375, "learning_rate": 0.00018555213135421834, "loss": 0.0706, "step": 3719 }, { "epoch": 1.7362893815635938, "grad_norm": 0.73828125, "learning_rate": 0.00018554453297936848, "loss": 0.0837, "step": 3720 }, { "epoch": 1.7367561260210036, "grad_norm": 1.078125, "learning_rate": 0.0001855369327626499, "loss": 0.1385, "step": 3721 }, { "epoch": 1.737222870478413, "grad_norm": 1.0703125, "learning_rate": 0.00018552933070422624, "loss": 0.0956, "step": 3722 }, { "epoch": 1.7376896149358227, "grad_norm": 0.984375, "learning_rate": 0.00018552172680426124, "loss": 0.1038, "step": 3723 }, { "epoch": 1.738156359393232, "grad_norm": 1.171875, "learning_rate": 0.00018551412106291852, "loss": 0.1031, "step": 3724 }, { "epoch": 1.738623103850642, "grad_norm": 0.9140625, "learning_rate": 0.00018550651348036193, "loss": 0.087, "step": 3725 }, { "epoch": 1.7390898483080512, "grad_norm": 0.76171875, "learning_rate": 0.00018549890405675522, "loss": 0.0707, "step": 3726 }, { "epoch": 1.739556592765461, "grad_norm": 1.078125, "learning_rate": 0.00018549129279226223, "loss": 0.1369, "step": 3727 }, { "epoch": 1.7400233372228704, "grad_norm": 1.1796875, "learning_rate": 0.00018548367968704687, "loss": 0.1269, "step": 3728 }, { "epoch": 1.7404900816802802, "grad_norm": 0.83203125, "learning_rate": 0.00018547606474127304, "loss": 0.0739, "step": 3729 }, { "epoch": 1.7409568261376895, "grad_norm": 0.93359375, "learning_rate": 0.00018546844795510467, "loss": 0.1033, "step": 3730 }, { "epoch": 1.7414235705950993, "grad_norm": 1.0, "learning_rate": 0.0001854608293287058, "loss": 0.1084, "step": 3731 }, { "epoch": 1.7418903150525087, "grad_norm": 0.95703125, "learning_rate": 0.0001854532088622405, "loss": 0.1093, "step": 3732 }, { "epoch": 1.7423570595099185, "grad_norm": 0.8671875, "learning_rate": 0.00018544558655587276, "loss": 0.1117, "step": 3733 }, { "epoch": 1.7428238039673278, "grad_norm": 1.09375, "learning_rate": 0.00018543796240976673, "loss": 0.1261, "step": 3734 }, { "epoch": 1.7432905484247374, "grad_norm": 1.0234375, "learning_rate": 0.00018543033642408658, "loss": 0.1143, "step": 3735 }, { "epoch": 1.743757292882147, "grad_norm": 0.84375, "learning_rate": 0.00018542270859899654, "loss": 0.0934, "step": 3736 }, { "epoch": 1.7442240373395566, "grad_norm": 0.82421875, "learning_rate": 0.00018541507893466075, "loss": 0.0842, "step": 3737 }, { "epoch": 1.7446907817969661, "grad_norm": 1.0234375, "learning_rate": 0.00018540744743124357, "loss": 0.1123, "step": 3738 }, { "epoch": 1.7451575262543757, "grad_norm": 1.0859375, "learning_rate": 0.00018539981408890929, "loss": 0.1094, "step": 3739 }, { "epoch": 1.7456242707117853, "grad_norm": 0.92578125, "learning_rate": 0.00018539217890782222, "loss": 0.1185, "step": 3740 }, { "epoch": 1.7460910151691948, "grad_norm": 0.73828125, "learning_rate": 0.00018538454188814685, "loss": 0.0744, "step": 3741 }, { "epoch": 1.7465577596266044, "grad_norm": 1.1015625, "learning_rate": 0.0001853769030300475, "loss": 0.0983, "step": 3742 }, { "epoch": 1.747024504084014, "grad_norm": 0.90625, "learning_rate": 0.00018536926233368874, "loss": 0.0922, "step": 3743 }, { "epoch": 1.7474912485414236, "grad_norm": 0.875, "learning_rate": 0.00018536161979923504, "loss": 0.0842, "step": 3744 }, { "epoch": 1.7479579929988331, "grad_norm": 0.953125, "learning_rate": 0.00018535397542685094, "loss": 0.0974, "step": 3745 }, { "epoch": 1.7484247374562427, "grad_norm": 1.0703125, "learning_rate": 0.00018534632921670107, "loss": 0.0907, "step": 3746 }, { "epoch": 1.7488914819136523, "grad_norm": 1.203125, "learning_rate": 0.00018533868116895, "loss": 0.1031, "step": 3747 }, { "epoch": 1.7493582263710619, "grad_norm": 1.125, "learning_rate": 0.00018533103128376248, "loss": 0.1227, "step": 3748 }, { "epoch": 1.7498249708284714, "grad_norm": 0.8984375, "learning_rate": 0.0001853233795613032, "loss": 0.0832, "step": 3749 }, { "epoch": 1.750291715285881, "grad_norm": 1.015625, "learning_rate": 0.0001853157260017368, "loss": 0.0964, "step": 3750 }, { "epoch": 1.7507584597432906, "grad_norm": 0.91015625, "learning_rate": 0.00018530807060522823, "loss": 0.0779, "step": 3751 }, { "epoch": 1.7512252042007002, "grad_norm": 0.9453125, "learning_rate": 0.00018530041337194226, "loss": 0.1441, "step": 3752 }, { "epoch": 1.7512252042007002, "eval_loss": 1.46182119846344, "eval_runtime": 94.8522, "eval_samples_per_second": 19.019, "eval_steps_per_second": 2.383, "step": 3752 }, { "epoch": 1.7516919486581097, "grad_norm": 1.21875, "learning_rate": 0.0001852927543020437, "loss": 0.1097, "step": 3753 }, { "epoch": 1.7521586931155193, "grad_norm": 1.25, "learning_rate": 0.00018528509339569755, "loss": 0.1364, "step": 3754 }, { "epoch": 1.7526254375729287, "grad_norm": 1.046875, "learning_rate": 0.00018527743065306867, "loss": 0.1132, "step": 3755 }, { "epoch": 1.7530921820303385, "grad_norm": 1.09375, "learning_rate": 0.00018526976607432212, "loss": 0.1266, "step": 3756 }, { "epoch": 1.7535589264877478, "grad_norm": 0.9609375, "learning_rate": 0.0001852620996596229, "loss": 0.1122, "step": 3757 }, { "epoch": 1.7540256709451576, "grad_norm": 0.8984375, "learning_rate": 0.00018525443140913605, "loss": 0.1017, "step": 3758 }, { "epoch": 1.754492415402567, "grad_norm": 0.86328125, "learning_rate": 0.00018524676132302672, "loss": 0.0835, "step": 3759 }, { "epoch": 1.7549591598599767, "grad_norm": 0.83984375, "learning_rate": 0.00018523908940146005, "loss": 0.1003, "step": 3760 }, { "epoch": 1.755425904317386, "grad_norm": 1.0546875, "learning_rate": 0.00018523141564460115, "loss": 0.1446, "step": 3761 }, { "epoch": 1.755892648774796, "grad_norm": 1.078125, "learning_rate": 0.0001852237400526154, "loss": 0.1253, "step": 3762 }, { "epoch": 1.7563593932322052, "grad_norm": 0.90234375, "learning_rate": 0.00018521606262566787, "loss": 0.1101, "step": 3763 }, { "epoch": 1.756826137689615, "grad_norm": 0.8828125, "learning_rate": 0.00018520838336392404, "loss": 0.0999, "step": 3764 }, { "epoch": 1.7572928821470244, "grad_norm": 0.99609375, "learning_rate": 0.00018520070226754913, "loss": 0.0991, "step": 3765 }, { "epoch": 1.7577596266044342, "grad_norm": 0.97265625, "learning_rate": 0.00018519301933670859, "loss": 0.1192, "step": 3766 }, { "epoch": 1.7582263710618435, "grad_norm": 1.0234375, "learning_rate": 0.0001851853345715678, "loss": 0.1076, "step": 3767 }, { "epoch": 1.7586931155192533, "grad_norm": 0.9609375, "learning_rate": 0.00018517764797229224, "loss": 0.0971, "step": 3768 }, { "epoch": 1.7591598599766627, "grad_norm": 0.86328125, "learning_rate": 0.00018516995953904742, "loss": 0.0882, "step": 3769 }, { "epoch": 1.7596266044340725, "grad_norm": 1.046875, "learning_rate": 0.0001851622692719989, "loss": 0.1058, "step": 3770 }, { "epoch": 1.7600933488914818, "grad_norm": 0.9453125, "learning_rate": 0.00018515457717131222, "loss": 0.1063, "step": 3771 }, { "epoch": 1.7605600933488916, "grad_norm": 0.98828125, "learning_rate": 0.000185146883237153, "loss": 0.0974, "step": 3772 }, { "epoch": 1.761026837806301, "grad_norm": 0.84375, "learning_rate": 0.00018513918746968693, "loss": 0.0916, "step": 3773 }, { "epoch": 1.7614935822637108, "grad_norm": 1.2578125, "learning_rate": 0.00018513148986907968, "loss": 0.1313, "step": 3774 }, { "epoch": 1.7619603267211201, "grad_norm": 0.91796875, "learning_rate": 0.00018512379043549699, "loss": 0.0916, "step": 3775 }, { "epoch": 1.7624270711785297, "grad_norm": 0.89453125, "learning_rate": 0.00018511608916910464, "loss": 0.0721, "step": 3776 }, { "epoch": 1.7628938156359393, "grad_norm": 0.83984375, "learning_rate": 0.00018510838607006848, "loss": 0.0969, "step": 3777 }, { "epoch": 1.7633605600933488, "grad_norm": 1.1953125, "learning_rate": 0.00018510068113855433, "loss": 0.0896, "step": 3778 }, { "epoch": 1.7638273045507584, "grad_norm": 1.21875, "learning_rate": 0.0001850929743747281, "loss": 0.1078, "step": 3779 }, { "epoch": 1.764294049008168, "grad_norm": 1.109375, "learning_rate": 0.0001850852657787557, "loss": 0.1194, "step": 3780 }, { "epoch": 1.7647607934655776, "grad_norm": 1.0078125, "learning_rate": 0.00018507755535080312, "loss": 0.114, "step": 3781 }, { "epoch": 1.7652275379229871, "grad_norm": 1.03125, "learning_rate": 0.00018506984309103638, "loss": 0.0993, "step": 3782 }, { "epoch": 1.7656942823803967, "grad_norm": 1.046875, "learning_rate": 0.00018506212899962156, "loss": 0.1162, "step": 3783 }, { "epoch": 1.7661610268378063, "grad_norm": 0.83984375, "learning_rate": 0.00018505441307672472, "loss": 0.0636, "step": 3784 }, { "epoch": 1.7666277712952159, "grad_norm": 0.734375, "learning_rate": 0.000185046695322512, "loss": 0.0706, "step": 3785 }, { "epoch": 1.7670945157526254, "grad_norm": 0.94921875, "learning_rate": 0.00018503897573714952, "loss": 0.104, "step": 3786 }, { "epoch": 1.767561260210035, "grad_norm": 0.85546875, "learning_rate": 0.00018503125432080356, "loss": 0.0986, "step": 3787 }, { "epoch": 1.7680280046674446, "grad_norm": 0.9453125, "learning_rate": 0.0001850235310736404, "loss": 0.0874, "step": 3788 }, { "epoch": 1.7684947491248542, "grad_norm": 0.87109375, "learning_rate": 0.00018501580599582622, "loss": 0.0729, "step": 3789 }, { "epoch": 1.7689614935822637, "grad_norm": 0.83203125, "learning_rate": 0.00018500807908752745, "loss": 0.0752, "step": 3790 }, { "epoch": 1.7694282380396733, "grad_norm": 1.03125, "learning_rate": 0.0001850003503489104, "loss": 0.1197, "step": 3791 }, { "epoch": 1.7698949824970829, "grad_norm": 1.0, "learning_rate": 0.0001849926197801415, "loss": 0.0857, "step": 3792 }, { "epoch": 1.7703617269544925, "grad_norm": 0.91796875, "learning_rate": 0.0001849848873813872, "loss": 0.0802, "step": 3793 }, { "epoch": 1.770828471411902, "grad_norm": 1.3671875, "learning_rate": 0.00018497715315281396, "loss": 0.1092, "step": 3794 }, { "epoch": 1.7712952158693116, "grad_norm": 1.1953125, "learning_rate": 0.00018496941709458836, "loss": 0.0813, "step": 3795 }, { "epoch": 1.7717619603267212, "grad_norm": 0.9921875, "learning_rate": 0.0001849616792068769, "loss": 0.0848, "step": 3796 }, { "epoch": 1.7722287047841307, "grad_norm": 1.0078125, "learning_rate": 0.00018495393948984626, "loss": 0.1235, "step": 3797 }, { "epoch": 1.77269544924154, "grad_norm": 1.0703125, "learning_rate": 0.00018494619794366305, "loss": 0.083, "step": 3798 }, { "epoch": 1.77316219369895, "grad_norm": 0.92578125, "learning_rate": 0.00018493845456849387, "loss": 0.0859, "step": 3799 }, { "epoch": 1.7736289381563592, "grad_norm": 1.03125, "learning_rate": 0.00018493070936450562, "loss": 0.1075, "step": 3800 }, { "epoch": 1.774095682613769, "grad_norm": 0.9453125, "learning_rate": 0.0001849229623318649, "loss": 0.1053, "step": 3801 }, { "epoch": 1.7745624270711784, "grad_norm": 1.046875, "learning_rate": 0.00018491521347073858, "loss": 0.0763, "step": 3802 }, { "epoch": 1.7750291715285882, "grad_norm": 1.015625, "learning_rate": 0.00018490746278129353, "loss": 0.0979, "step": 3803 }, { "epoch": 1.7754959159859975, "grad_norm": 0.90234375, "learning_rate": 0.0001848997102636966, "loss": 0.0945, "step": 3804 }, { "epoch": 1.7759626604434073, "grad_norm": 1.1171875, "learning_rate": 0.0001848919559181147, "loss": 0.0933, "step": 3805 }, { "epoch": 1.7764294049008167, "grad_norm": 1.0625, "learning_rate": 0.00018488419974471481, "loss": 0.1016, "step": 3806 }, { "epoch": 1.7768961493582265, "grad_norm": 0.99609375, "learning_rate": 0.0001848764417436639, "loss": 0.1144, "step": 3807 }, { "epoch": 1.7773628938156358, "grad_norm": 1.1484375, "learning_rate": 0.00018486868191512902, "loss": 0.107, "step": 3808 }, { "epoch": 1.7778296382730456, "grad_norm": 0.84765625, "learning_rate": 0.00018486092025927727, "loss": 0.0887, "step": 3809 }, { "epoch": 1.778296382730455, "grad_norm": 1.015625, "learning_rate": 0.00018485315677627576, "loss": 0.1196, "step": 3810 }, { "epoch": 1.7787631271878648, "grad_norm": 1.15625, "learning_rate": 0.00018484539146629162, "loss": 0.1247, "step": 3811 }, { "epoch": 1.7792298716452741, "grad_norm": 0.8828125, "learning_rate": 0.00018483762432949208, "loss": 0.0837, "step": 3812 }, { "epoch": 1.779696616102684, "grad_norm": 0.8671875, "learning_rate": 0.00018482985536604437, "loss": 0.1015, "step": 3813 }, { "epoch": 1.7801633605600933, "grad_norm": 1.15625, "learning_rate": 0.0001848220845761157, "loss": 0.1258, "step": 3814 }, { "epoch": 1.780630105017503, "grad_norm": 0.88671875, "learning_rate": 0.00018481431195987348, "loss": 0.0834, "step": 3815 }, { "epoch": 1.7810968494749124, "grad_norm": 0.9921875, "learning_rate": 0.00018480653751748504, "loss": 0.093, "step": 3816 }, { "epoch": 1.781563593932322, "grad_norm": 1.0, "learning_rate": 0.0001847987612491177, "loss": 0.1321, "step": 3817 }, { "epoch": 1.7820303383897316, "grad_norm": 1.015625, "learning_rate": 0.000184790983154939, "loss": 0.098, "step": 3818 }, { "epoch": 1.7824970828471411, "grad_norm": 0.72265625, "learning_rate": 0.00018478320323511634, "loss": 0.0821, "step": 3819 }, { "epoch": 1.7829638273045507, "grad_norm": 1.078125, "learning_rate": 0.00018477542148981723, "loss": 0.1082, "step": 3820 }, { "epoch": 1.7834305717619603, "grad_norm": 0.83984375, "learning_rate": 0.00018476763791920922, "loss": 0.0917, "step": 3821 }, { "epoch": 1.7838973162193699, "grad_norm": 1.078125, "learning_rate": 0.00018475985252345995, "loss": 0.0959, "step": 3822 }, { "epoch": 1.7843640606767794, "grad_norm": 1.109375, "learning_rate": 0.00018475206530273698, "loss": 0.1518, "step": 3823 }, { "epoch": 1.784830805134189, "grad_norm": 0.8359375, "learning_rate": 0.00018474427625720803, "loss": 0.0786, "step": 3824 }, { "epoch": 1.7852975495915986, "grad_norm": 1.109375, "learning_rate": 0.00018473648538704077, "loss": 0.0987, "step": 3825 }, { "epoch": 1.7857642940490082, "grad_norm": 1.1484375, "learning_rate": 0.000184728692692403, "loss": 0.1259, "step": 3826 }, { "epoch": 1.7862310385064177, "grad_norm": 1.046875, "learning_rate": 0.00018472089817346242, "loss": 0.1071, "step": 3827 }, { "epoch": 1.7866977829638273, "grad_norm": 0.9296875, "learning_rate": 0.00018471310183038696, "loss": 0.1308, "step": 3828 }, { "epoch": 1.7871645274212369, "grad_norm": 0.98828125, "learning_rate": 0.00018470530366334438, "loss": 0.1065, "step": 3829 }, { "epoch": 1.7876312718786465, "grad_norm": 0.92578125, "learning_rate": 0.00018469750367250263, "loss": 0.1253, "step": 3830 }, { "epoch": 1.788098016336056, "grad_norm": 1.0, "learning_rate": 0.00018468970185802968, "loss": 0.1057, "step": 3831 }, { "epoch": 1.7885647607934656, "grad_norm": 0.68359375, "learning_rate": 0.00018468189822009348, "loss": 0.0777, "step": 3832 }, { "epoch": 1.7890315052508752, "grad_norm": 0.96875, "learning_rate": 0.00018467409275886202, "loss": 0.1072, "step": 3833 }, { "epoch": 1.7894982497082847, "grad_norm": 1.1015625, "learning_rate": 0.0001846662854745034, "loss": 0.1175, "step": 3834 }, { "epoch": 1.7899649941656943, "grad_norm": 1.03125, "learning_rate": 0.00018465847636718575, "loss": 0.1134, "step": 3835 }, { "epoch": 1.790431738623104, "grad_norm": 0.60546875, "learning_rate": 0.00018465066543707717, "loss": 0.0662, "step": 3836 }, { "epoch": 1.7908984830805135, "grad_norm": 0.8984375, "learning_rate": 0.00018464285268434582, "loss": 0.0944, "step": 3837 }, { "epoch": 1.791365227537923, "grad_norm": 0.80859375, "learning_rate": 0.00018463503810915995, "loss": 0.0891, "step": 3838 }, { "epoch": 1.7918319719953324, "grad_norm": 1.109375, "learning_rate": 0.00018462722171168783, "loss": 0.1148, "step": 3839 }, { "epoch": 1.7922987164527422, "grad_norm": 0.79296875, "learning_rate": 0.0001846194034920977, "loss": 0.0893, "step": 3840 }, { "epoch": 1.7927654609101515, "grad_norm": 1.03125, "learning_rate": 0.0001846115834505579, "loss": 0.1159, "step": 3841 }, { "epoch": 1.7932322053675613, "grad_norm": 0.99609375, "learning_rate": 0.00018460376158723689, "loss": 0.0933, "step": 3842 }, { "epoch": 1.7936989498249707, "grad_norm": 0.94921875, "learning_rate": 0.00018459593790230299, "loss": 0.0755, "step": 3843 }, { "epoch": 1.7941656942823805, "grad_norm": 0.7734375, "learning_rate": 0.0001845881123959247, "loss": 0.0827, "step": 3844 }, { "epoch": 1.7946324387397898, "grad_norm": 0.8046875, "learning_rate": 0.00018458028506827047, "loss": 0.0745, "step": 3845 }, { "epoch": 1.7950991831971996, "grad_norm": 0.84375, "learning_rate": 0.00018457245591950886, "loss": 0.0844, "step": 3846 }, { "epoch": 1.795565927654609, "grad_norm": 0.84765625, "learning_rate": 0.00018456462494980847, "loss": 0.0886, "step": 3847 }, { "epoch": 1.7960326721120188, "grad_norm": 0.9375, "learning_rate": 0.00018455679215933788, "loss": 0.0838, "step": 3848 }, { "epoch": 1.7964994165694281, "grad_norm": 0.921875, "learning_rate": 0.00018454895754826573, "loss": 0.073, "step": 3849 }, { "epoch": 1.796966161026838, "grad_norm": 0.94921875, "learning_rate": 0.0001845411211167607, "loss": 0.0955, "step": 3850 }, { "epoch": 1.7974329054842473, "grad_norm": 1.0859375, "learning_rate": 0.00018453328286499154, "loss": 0.0768, "step": 3851 }, { "epoch": 1.797899649941657, "grad_norm": 0.78515625, "learning_rate": 0.00018452544279312703, "loss": 0.0859, "step": 3852 }, { "epoch": 1.7983663943990664, "grad_norm": 0.8515625, "learning_rate": 0.00018451760090133591, "loss": 0.0676, "step": 3853 }, { "epoch": 1.7988331388564762, "grad_norm": 1.1875, "learning_rate": 0.0001845097571897871, "loss": 0.1309, "step": 3854 }, { "epoch": 1.7992998833138856, "grad_norm": 1.140625, "learning_rate": 0.00018450191165864945, "loss": 0.0865, "step": 3855 }, { "epoch": 1.7997666277712954, "grad_norm": 0.94140625, "learning_rate": 0.00018449406430809186, "loss": 0.0926, "step": 3856 }, { "epoch": 1.8002333722287047, "grad_norm": 1.09375, "learning_rate": 0.00018448621513828334, "loss": 0.0857, "step": 3857 }, { "epoch": 1.8007001166861145, "grad_norm": 0.90234375, "learning_rate": 0.00018447836414939287, "loss": 0.0829, "step": 3858 }, { "epoch": 1.8011668611435239, "grad_norm": 0.8203125, "learning_rate": 0.0001844705113415895, "loss": 0.0753, "step": 3859 }, { "epoch": 1.8016336056009334, "grad_norm": 1.078125, "learning_rate": 0.00018446265671504227, "loss": 0.0852, "step": 3860 }, { "epoch": 1.802100350058343, "grad_norm": 0.8984375, "learning_rate": 0.00018445480026992037, "loss": 0.0915, "step": 3861 }, { "epoch": 1.8025670945157526, "grad_norm": 1.0, "learning_rate": 0.00018444694200639287, "loss": 0.0848, "step": 3862 }, { "epoch": 1.8030338389731622, "grad_norm": 1.0234375, "learning_rate": 0.00018443908192462905, "loss": 0.1146, "step": 3863 }, { "epoch": 1.8035005834305717, "grad_norm": 0.9375, "learning_rate": 0.0001844312200247981, "loss": 0.087, "step": 3864 }, { "epoch": 1.8039673278879813, "grad_norm": 0.9765625, "learning_rate": 0.0001844233563070693, "loss": 0.1719, "step": 3865 }, { "epoch": 1.8044340723453909, "grad_norm": 0.96875, "learning_rate": 0.00018441549077161194, "loss": 0.0931, "step": 3866 }, { "epoch": 1.8049008168028005, "grad_norm": 0.77734375, "learning_rate": 0.00018440762341859543, "loss": 0.077, "step": 3867 }, { "epoch": 1.80536756126021, "grad_norm": 1.03125, "learning_rate": 0.00018439975424818913, "loss": 0.1099, "step": 3868 }, { "epoch": 1.8058343057176196, "grad_norm": 1.0078125, "learning_rate": 0.00018439188326056249, "loss": 0.1242, "step": 3869 }, { "epoch": 1.8063010501750292, "grad_norm": 0.94921875, "learning_rate": 0.00018438401045588497, "loss": 0.1104, "step": 3870 }, { "epoch": 1.8067677946324388, "grad_norm": 0.94921875, "learning_rate": 0.00018437613583432606, "loss": 0.0989, "step": 3871 }, { "epoch": 1.8072345390898483, "grad_norm": 0.953125, "learning_rate": 0.00018436825939605536, "loss": 0.1336, "step": 3872 }, { "epoch": 1.807701283547258, "grad_norm": 0.9453125, "learning_rate": 0.00018436038114124238, "loss": 0.0844, "step": 3873 }, { "epoch": 1.8081680280046675, "grad_norm": 0.91796875, "learning_rate": 0.00018435250107005682, "loss": 0.1055, "step": 3874 }, { "epoch": 1.808634772462077, "grad_norm": 1.0625, "learning_rate": 0.00018434461918266832, "loss": 0.1219, "step": 3875 }, { "epoch": 1.8091015169194866, "grad_norm": 0.95703125, "learning_rate": 0.0001843367354792466, "loss": 0.108, "step": 3876 }, { "epoch": 1.8095682613768962, "grad_norm": 0.93359375, "learning_rate": 0.0001843288499599614, "loss": 0.0936, "step": 3877 }, { "epoch": 1.8100350058343058, "grad_norm": 0.953125, "learning_rate": 0.0001843209626249825, "loss": 0.0775, "step": 3878 }, { "epoch": 1.8105017502917153, "grad_norm": 1.15625, "learning_rate": 0.00018431307347447967, "loss": 0.1005, "step": 3879 }, { "epoch": 1.8109684947491247, "grad_norm": 1.1171875, "learning_rate": 0.00018430518250862283, "loss": 0.1253, "step": 3880 }, { "epoch": 1.8114352392065345, "grad_norm": 0.88671875, "learning_rate": 0.00018429728972758192, "loss": 0.086, "step": 3881 }, { "epoch": 1.8119019836639438, "grad_norm": 1.0546875, "learning_rate": 0.00018428939513152678, "loss": 0.1013, "step": 3882 }, { "epoch": 1.8123687281213536, "grad_norm": 0.890625, "learning_rate": 0.00018428149872062746, "loss": 0.0956, "step": 3883 }, { "epoch": 1.812835472578763, "grad_norm": 1.0625, "learning_rate": 0.00018427360049505398, "loss": 0.1251, "step": 3884 }, { "epoch": 1.8133022170361728, "grad_norm": 0.8984375, "learning_rate": 0.00018426570045497638, "loss": 0.095, "step": 3885 }, { "epoch": 1.8137689614935821, "grad_norm": 0.8984375, "learning_rate": 0.00018425779860056473, "loss": 0.0983, "step": 3886 }, { "epoch": 1.814235705950992, "grad_norm": 1.2109375, "learning_rate": 0.0001842498949319892, "loss": 0.1059, "step": 3887 }, { "epoch": 1.8147024504084013, "grad_norm": 0.7890625, "learning_rate": 0.00018424198944941994, "loss": 0.0814, "step": 3888 }, { "epoch": 1.815169194865811, "grad_norm": 1.0078125, "learning_rate": 0.0001842340821530272, "loss": 0.1074, "step": 3889 }, { "epoch": 1.8156359393232204, "grad_norm": 0.86328125, "learning_rate": 0.0001842261730429812, "loss": 0.0931, "step": 3890 }, { "epoch": 1.8161026837806302, "grad_norm": 0.96875, "learning_rate": 0.00018421826211945225, "loss": 0.1113, "step": 3891 }, { "epoch": 1.8165694282380396, "grad_norm": 1.0078125, "learning_rate": 0.00018421034938261068, "loss": 0.0825, "step": 3892 }, { "epoch": 1.8170361726954494, "grad_norm": 1.1015625, "learning_rate": 0.00018420243483262684, "loss": 0.1021, "step": 3893 }, { "epoch": 1.8175029171528587, "grad_norm": 1.109375, "learning_rate": 0.00018419451846967119, "loss": 0.1404, "step": 3894 }, { "epoch": 1.8179696616102685, "grad_norm": 0.80859375, "learning_rate": 0.0001841866002939141, "loss": 0.095, "step": 3895 }, { "epoch": 1.8184364060676779, "grad_norm": 1.21875, "learning_rate": 0.0001841786803055261, "loss": 0.0921, "step": 3896 }, { "epoch": 1.8189031505250877, "grad_norm": 0.87109375, "learning_rate": 0.00018417075850467773, "loss": 0.0765, "step": 3897 }, { "epoch": 1.819369894982497, "grad_norm": 1.0390625, "learning_rate": 0.00018416283489153953, "loss": 0.0953, "step": 3898 }, { "epoch": 1.8198366394399068, "grad_norm": 1.015625, "learning_rate": 0.00018415490946628214, "loss": 0.0896, "step": 3899 }, { "epoch": 1.8203033838973162, "grad_norm": 1.03125, "learning_rate": 0.00018414698222907617, "loss": 0.0912, "step": 3900 }, { "epoch": 1.8207701283547257, "grad_norm": 1.0, "learning_rate": 0.0001841390531800923, "loss": 0.1125, "step": 3901 }, { "epoch": 1.8212368728121353, "grad_norm": 1.0625, "learning_rate": 0.00018413112231950124, "loss": 0.115, "step": 3902 }, { "epoch": 1.8217036172695449, "grad_norm": 0.7578125, "learning_rate": 0.00018412318964747382, "loss": 0.0856, "step": 3903 }, { "epoch": 1.8221703617269545, "grad_norm": 1.0078125, "learning_rate": 0.00018411525516418077, "loss": 0.1201, "step": 3904 }, { "epoch": 1.822637106184364, "grad_norm": 1.234375, "learning_rate": 0.0001841073188697929, "loss": 0.1108, "step": 3905 }, { "epoch": 1.8231038506417736, "grad_norm": 0.93359375, "learning_rate": 0.0001840993807644812, "loss": 0.0961, "step": 3906 }, { "epoch": 1.8235705950991832, "grad_norm": 0.8359375, "learning_rate": 0.0001840914408484165, "loss": 0.0753, "step": 3907 }, { "epoch": 1.8240373395565928, "grad_norm": 1.09375, "learning_rate": 0.0001840834991217698, "loss": 0.1569, "step": 3908 }, { "epoch": 1.8245040840140023, "grad_norm": 0.91796875, "learning_rate": 0.00018407555558471204, "loss": 0.1007, "step": 3909 }, { "epoch": 1.824970828471412, "grad_norm": 1.015625, "learning_rate": 0.00018406761023741428, "loss": 0.0774, "step": 3910 }, { "epoch": 1.8254375729288215, "grad_norm": 0.9921875, "learning_rate": 0.00018405966308004764, "loss": 0.101, "step": 3911 }, { "epoch": 1.825904317386231, "grad_norm": 0.875, "learning_rate": 0.00018405171411278314, "loss": 0.0845, "step": 3912 }, { "epoch": 1.8263710618436406, "grad_norm": 0.921875, "learning_rate": 0.000184043763335792, "loss": 0.0975, "step": 3913 }, { "epoch": 1.8268378063010502, "grad_norm": 0.9375, "learning_rate": 0.0001840358107492454, "loss": 0.085, "step": 3914 }, { "epoch": 1.8273045507584598, "grad_norm": 0.72265625, "learning_rate": 0.00018402785635331453, "loss": 0.0631, "step": 3915 }, { "epoch": 1.8277712952158693, "grad_norm": 1.1015625, "learning_rate": 0.0001840199001481707, "loss": 0.1053, "step": 3916 }, { "epoch": 1.828238039673279, "grad_norm": 1.0, "learning_rate": 0.00018401194213398518, "loss": 0.117, "step": 3917 }, { "epoch": 1.8287047841306885, "grad_norm": 0.9375, "learning_rate": 0.00018400398231092934, "loss": 0.0769, "step": 3918 }, { "epoch": 1.829171528588098, "grad_norm": 0.8828125, "learning_rate": 0.00018399602067917456, "loss": 0.0819, "step": 3919 }, { "epoch": 1.8296382730455076, "grad_norm": 1.015625, "learning_rate": 0.00018398805723889228, "loss": 0.0954, "step": 3920 }, { "epoch": 1.830105017502917, "grad_norm": 0.81640625, "learning_rate": 0.0001839800919902539, "loss": 0.0844, "step": 3921 }, { "epoch": 1.8305717619603268, "grad_norm": 1.0, "learning_rate": 0.000183972124933431, "loss": 0.0982, "step": 3922 }, { "epoch": 1.8310385064177361, "grad_norm": 1.0625, "learning_rate": 0.00018396415606859506, "loss": 0.0809, "step": 3923 }, { "epoch": 1.831505250875146, "grad_norm": 0.96484375, "learning_rate": 0.0001839561853959177, "loss": 0.1076, "step": 3924 }, { "epoch": 1.8319719953325553, "grad_norm": 0.98828125, "learning_rate": 0.0001839482129155705, "loss": 0.08, "step": 3925 }, { "epoch": 1.832438739789965, "grad_norm": 1.2578125, "learning_rate": 0.00018394023862772514, "loss": 0.1377, "step": 3926 }, { "epoch": 1.8329054842473744, "grad_norm": 0.9140625, "learning_rate": 0.0001839322625325533, "loss": 0.0973, "step": 3927 }, { "epoch": 1.8333722287047842, "grad_norm": 0.83984375, "learning_rate": 0.00018392428463022672, "loss": 0.0952, "step": 3928 }, { "epoch": 1.8338389731621936, "grad_norm": 1.1328125, "learning_rate": 0.0001839163049209172, "loss": 0.0782, "step": 3929 }, { "epoch": 1.8343057176196034, "grad_norm": 0.9453125, "learning_rate": 0.0001839083234047965, "loss": 0.0784, "step": 3930 }, { "epoch": 1.8347724620770127, "grad_norm": 1.03125, "learning_rate": 0.0001839003400820365, "loss": 0.1112, "step": 3931 }, { "epoch": 1.8352392065344225, "grad_norm": 0.78125, "learning_rate": 0.0001838923549528091, "loss": 0.0735, "step": 3932 }, { "epoch": 1.8357059509918319, "grad_norm": 1.21875, "learning_rate": 0.00018388436801728623, "loss": 0.0975, "step": 3933 }, { "epoch": 1.8361726954492417, "grad_norm": 1.2109375, "learning_rate": 0.00018387637927563984, "loss": 0.0765, "step": 3934 }, { "epoch": 1.836639439906651, "grad_norm": 1.015625, "learning_rate": 0.00018386838872804195, "loss": 0.1003, "step": 3935 }, { "epoch": 1.8371061843640608, "grad_norm": 1.109375, "learning_rate": 0.00018386039637466455, "loss": 0.1046, "step": 3936 }, { "epoch": 1.8375729288214702, "grad_norm": 1.0625, "learning_rate": 0.00018385240221567984, "loss": 0.1162, "step": 3937 }, { "epoch": 1.83803967327888, "grad_norm": 1.046875, "learning_rate": 0.00018384440625125983, "loss": 0.0915, "step": 3938 }, { "epoch": 1.8385064177362893, "grad_norm": 1.046875, "learning_rate": 0.00018383640848157674, "loss": 0.0986, "step": 3939 }, { "epoch": 1.838973162193699, "grad_norm": 0.83984375, "learning_rate": 0.00018382840890680276, "loss": 0.0927, "step": 3940 }, { "epoch": 1.8394399066511085, "grad_norm": 1.03125, "learning_rate": 0.0001838204075271101, "loss": 0.1065, "step": 3941 }, { "epoch": 1.839906651108518, "grad_norm": 0.87890625, "learning_rate": 0.00018381240434267107, "loss": 0.0728, "step": 3942 }, { "epoch": 1.8403733955659276, "grad_norm": 1.109375, "learning_rate": 0.000183804399353658, "loss": 0.1295, "step": 3943 }, { "epoch": 1.8408401400233372, "grad_norm": 0.9765625, "learning_rate": 0.00018379639256024322, "loss": 0.1083, "step": 3944 }, { "epoch": 1.8413068844807468, "grad_norm": 0.9453125, "learning_rate": 0.00018378838396259915, "loss": 0.0928, "step": 3945 }, { "epoch": 1.8417736289381563, "grad_norm": 1.078125, "learning_rate": 0.00018378037356089823, "loss": 0.12, "step": 3946 }, { "epoch": 1.842240373395566, "grad_norm": 0.98828125, "learning_rate": 0.00018377236135531288, "loss": 0.1009, "step": 3947 }, { "epoch": 1.8427071178529755, "grad_norm": 0.9921875, "learning_rate": 0.00018376434734601564, "loss": 0.0881, "step": 3948 }, { "epoch": 1.843173862310385, "grad_norm": 0.9296875, "learning_rate": 0.0001837563315331791, "loss": 0.0904, "step": 3949 }, { "epoch": 1.8436406067677946, "grad_norm": 0.96484375, "learning_rate": 0.00018374831391697574, "loss": 0.1047, "step": 3950 }, { "epoch": 1.8441073512252042, "grad_norm": 1.0390625, "learning_rate": 0.00018374029449757831, "loss": 0.0852, "step": 3951 }, { "epoch": 1.8445740956826138, "grad_norm": 1.15625, "learning_rate": 0.0001837322732751594, "loss": 0.1025, "step": 3952 }, { "epoch": 1.8450408401400233, "grad_norm": 1.0234375, "learning_rate": 0.0001837242502498918, "loss": 0.089, "step": 3953 }, { "epoch": 1.845507584597433, "grad_norm": 0.99609375, "learning_rate": 0.00018371622542194814, "loss": 0.088, "step": 3954 }, { "epoch": 1.8459743290548425, "grad_norm": 0.83203125, "learning_rate": 0.00018370819879150128, "loss": 0.0758, "step": 3955 }, { "epoch": 1.846441073512252, "grad_norm": 1.0, "learning_rate": 0.00018370017035872406, "loss": 0.1093, "step": 3956 }, { "epoch": 1.8469078179696616, "grad_norm": 0.8203125, "learning_rate": 0.00018369214012378926, "loss": 0.0886, "step": 3957 }, { "epoch": 1.8473745624270712, "grad_norm": 0.95703125, "learning_rate": 0.00018368410808686982, "loss": 0.0771, "step": 3958 }, { "epoch": 1.8478413068844808, "grad_norm": 0.9453125, "learning_rate": 0.00018367607424813872, "loss": 0.1009, "step": 3959 }, { "epoch": 1.8483080513418904, "grad_norm": 0.89453125, "learning_rate": 0.0001836680386077689, "loss": 0.0932, "step": 3960 }, { "epoch": 1.8487747957993, "grad_norm": 1.0234375, "learning_rate": 0.00018366000116593333, "loss": 0.1112, "step": 3961 }, { "epoch": 1.8492415402567095, "grad_norm": 0.96484375, "learning_rate": 0.00018365196192280517, "loss": 0.0976, "step": 3962 }, { "epoch": 1.849708284714119, "grad_norm": 0.85546875, "learning_rate": 0.00018364392087855743, "loss": 0.0749, "step": 3963 }, { "epoch": 1.8501750291715284, "grad_norm": 0.87890625, "learning_rate": 0.00018363587803336327, "loss": 0.0939, "step": 3964 }, { "epoch": 1.8506417736289382, "grad_norm": 0.8984375, "learning_rate": 0.00018362783338739583, "loss": 0.0762, "step": 3965 }, { "epoch": 1.8511085180863476, "grad_norm": 1.1171875, "learning_rate": 0.0001836197869408284, "loss": 0.0985, "step": 3966 }, { "epoch": 1.8515752625437574, "grad_norm": 1.0703125, "learning_rate": 0.00018361173869383416, "loss": 0.089, "step": 3967 }, { "epoch": 1.8520420070011667, "grad_norm": 0.96484375, "learning_rate": 0.0001836036886465864, "loss": 0.0871, "step": 3968 }, { "epoch": 1.8525087514585765, "grad_norm": 1.0625, "learning_rate": 0.00018359563679925847, "loss": 0.1358, "step": 3969 }, { "epoch": 1.8529754959159859, "grad_norm": 0.90234375, "learning_rate": 0.00018358758315202377, "loss": 0.1076, "step": 3970 }, { "epoch": 1.8534422403733957, "grad_norm": 0.8203125, "learning_rate": 0.00018357952770505564, "loss": 0.0901, "step": 3971 }, { "epoch": 1.853908984830805, "grad_norm": 1.0390625, "learning_rate": 0.00018357147045852754, "loss": 0.1258, "step": 3972 }, { "epoch": 1.8543757292882148, "grad_norm": 0.94140625, "learning_rate": 0.00018356341141261294, "loss": 0.0738, "step": 3973 }, { "epoch": 1.8548424737456242, "grad_norm": 0.7890625, "learning_rate": 0.0001835553505674854, "loss": 0.0652, "step": 3974 }, { "epoch": 1.855309218203034, "grad_norm": 0.9765625, "learning_rate": 0.00018354728792331843, "loss": 0.0766, "step": 3975 }, { "epoch": 1.8557759626604433, "grad_norm": 0.93359375, "learning_rate": 0.0001835392234802857, "loss": 0.1044, "step": 3976 }, { "epoch": 1.856242707117853, "grad_norm": 0.80859375, "learning_rate": 0.00018353115723856073, "loss": 0.091, "step": 3977 }, { "epoch": 1.8567094515752625, "grad_norm": 0.8515625, "learning_rate": 0.00018352308919831732, "loss": 0.0725, "step": 3978 }, { "epoch": 1.8571761960326723, "grad_norm": 0.9609375, "learning_rate": 0.00018351501935972913, "loss": 0.0873, "step": 3979 }, { "epoch": 1.8576429404900816, "grad_norm": 1.0546875, "learning_rate": 0.00018350694772296987, "loss": 0.0922, "step": 3980 }, { "epoch": 1.8581096849474914, "grad_norm": 0.94140625, "learning_rate": 0.00018349887428821342, "loss": 0.0926, "step": 3981 }, { "epoch": 1.8585764294049008, "grad_norm": 1.03125, "learning_rate": 0.00018349079905563356, "loss": 0.0881, "step": 3982 }, { "epoch": 1.8590431738623105, "grad_norm": 1.1015625, "learning_rate": 0.00018348272202540412, "loss": 0.0834, "step": 3983 }, { "epoch": 1.85950991831972, "grad_norm": 0.94921875, "learning_rate": 0.00018347464319769908, "loss": 0.0707, "step": 3984 }, { "epoch": 1.8599766627771295, "grad_norm": 1.015625, "learning_rate": 0.00018346656257269234, "loss": 0.1147, "step": 3985 }, { "epoch": 1.860443407234539, "grad_norm": 0.890625, "learning_rate": 0.00018345848015055794, "loss": 0.096, "step": 3986 }, { "epoch": 1.8609101516919486, "grad_norm": 0.77734375, "learning_rate": 0.00018345039593146983, "loss": 0.0786, "step": 3987 }, { "epoch": 1.8613768961493582, "grad_norm": 0.98828125, "learning_rate": 0.0001834423099156021, "loss": 0.0858, "step": 3988 }, { "epoch": 1.8618436406067678, "grad_norm": 1.0078125, "learning_rate": 0.00018343422210312884, "loss": 0.11, "step": 3989 }, { "epoch": 1.8623103850641773, "grad_norm": 0.8515625, "learning_rate": 0.00018342613249422424, "loss": 0.0772, "step": 3990 }, { "epoch": 1.862777129521587, "grad_norm": 1.2734375, "learning_rate": 0.00018341804108906243, "loss": 0.1147, "step": 3991 }, { "epoch": 1.8632438739789965, "grad_norm": 1.0859375, "learning_rate": 0.00018340994788781768, "loss": 0.0997, "step": 3992 }, { "epoch": 1.863710618436406, "grad_norm": 0.9375, "learning_rate": 0.00018340185289066416, "loss": 0.1001, "step": 3993 }, { "epoch": 1.8641773628938156, "grad_norm": 0.86328125, "learning_rate": 0.00018339375609777622, "loss": 0.0802, "step": 3994 }, { "epoch": 1.8646441073512252, "grad_norm": 1.0625, "learning_rate": 0.0001833856575093282, "loss": 0.1102, "step": 3995 }, { "epoch": 1.8651108518086348, "grad_norm": 0.98828125, "learning_rate": 0.00018337755712549445, "loss": 0.0937, "step": 3996 }, { "epoch": 1.8655775962660444, "grad_norm": 0.93359375, "learning_rate": 0.00018336945494644937, "loss": 0.0858, "step": 3997 }, { "epoch": 1.866044340723454, "grad_norm": 0.953125, "learning_rate": 0.00018336135097236743, "loss": 0.0986, "step": 3998 }, { "epoch": 1.8665110851808635, "grad_norm": 0.9921875, "learning_rate": 0.00018335324520342314, "loss": 0.0963, "step": 3999 }, { "epoch": 1.866977829638273, "grad_norm": 1.0625, "learning_rate": 0.00018334513763979096, "loss": 0.0862, "step": 4000 }, { "epoch": 1.8674445740956827, "grad_norm": 1.015625, "learning_rate": 0.00018333702828164552, "loss": 0.0949, "step": 4001 }, { "epoch": 1.8679113185530922, "grad_norm": 0.94921875, "learning_rate": 0.0001833289171291614, "loss": 0.0908, "step": 4002 }, { "epoch": 1.8683780630105018, "grad_norm": 0.8359375, "learning_rate": 0.00018332080418251322, "loss": 0.0792, "step": 4003 }, { "epoch": 1.8688448074679114, "grad_norm": 0.87890625, "learning_rate": 0.00018331268944187567, "loss": 0.0812, "step": 4004 }, { "epoch": 1.8693115519253207, "grad_norm": 0.83984375, "learning_rate": 0.0001833045729074235, "loss": 0.0725, "step": 4005 }, { "epoch": 1.8697782963827305, "grad_norm": 1.078125, "learning_rate": 0.00018329645457933145, "loss": 0.1034, "step": 4006 }, { "epoch": 1.8702450408401399, "grad_norm": 0.96875, "learning_rate": 0.0001832883344577743, "loss": 0.0946, "step": 4007 }, { "epoch": 1.8707117852975497, "grad_norm": 0.875, "learning_rate": 0.00018328021254292694, "loss": 0.0705, "step": 4008 }, { "epoch": 1.871178529754959, "grad_norm": 0.90625, "learning_rate": 0.00018327208883496415, "loss": 0.0667, "step": 4009 }, { "epoch": 1.8716452742123688, "grad_norm": 0.8203125, "learning_rate": 0.00018326396333406093, "loss": 0.0912, "step": 4010 }, { "epoch": 1.8721120186697782, "grad_norm": 0.97265625, "learning_rate": 0.00018325583604039217, "loss": 0.0882, "step": 4011 }, { "epoch": 1.872578763127188, "grad_norm": 1.0390625, "learning_rate": 0.0001832477069541329, "loss": 0.086, "step": 4012 }, { "epoch": 1.8730455075845973, "grad_norm": 1.2734375, "learning_rate": 0.00018323957607545811, "loss": 0.1507, "step": 4013 }, { "epoch": 1.873512252042007, "grad_norm": 1.0390625, "learning_rate": 0.00018323144340454294, "loss": 0.1024, "step": 4014 }, { "epoch": 1.8739789964994165, "grad_norm": 1.015625, "learning_rate": 0.00018322330894156243, "loss": 0.0976, "step": 4015 }, { "epoch": 1.8744457409568263, "grad_norm": 1.0078125, "learning_rate": 0.00018321517268669171, "loss": 0.0949, "step": 4016 }, { "epoch": 1.8749124854142356, "grad_norm": 0.92578125, "learning_rate": 0.00018320703464010603, "loss": 0.0932, "step": 4017 }, { "epoch": 1.8753792298716454, "grad_norm": 0.97265625, "learning_rate": 0.00018319889480198053, "loss": 0.0949, "step": 4018 }, { "epoch": 1.8758459743290548, "grad_norm": 0.91796875, "learning_rate": 0.00018319075317249056, "loss": 0.0962, "step": 4019 }, { "epoch": 1.8763127187864646, "grad_norm": 1.1171875, "learning_rate": 0.00018318260975181138, "loss": 0.1067, "step": 4020 }, { "epoch": 1.876779463243874, "grad_norm": 0.9453125, "learning_rate": 0.0001831744645401183, "loss": 0.0781, "step": 4021 }, { "epoch": 1.8772462077012837, "grad_norm": 0.89453125, "learning_rate": 0.00018316631753758668, "loss": 0.1055, "step": 4022 }, { "epoch": 1.877712952158693, "grad_norm": 1.0234375, "learning_rate": 0.000183158168744392, "loss": 0.0984, "step": 4023 }, { "epoch": 1.8781796966161028, "grad_norm": 0.90625, "learning_rate": 0.00018315001816070968, "loss": 0.0858, "step": 4024 }, { "epoch": 1.8786464410735122, "grad_norm": 1.03125, "learning_rate": 0.00018314186578671522, "loss": 0.102, "step": 4025 }, { "epoch": 1.8791131855309218, "grad_norm": 1.0234375, "learning_rate": 0.0001831337116225841, "loss": 0.1047, "step": 4026 }, { "epoch": 1.8795799299883313, "grad_norm": 0.890625, "learning_rate": 0.00018312555566849198, "loss": 0.0963, "step": 4027 }, { "epoch": 1.880046674445741, "grad_norm": 1.203125, "learning_rate": 0.00018311739792461443, "loss": 0.0886, "step": 4028 }, { "epoch": 1.8805134189031505, "grad_norm": 1.03125, "learning_rate": 0.00018310923839112703, "loss": 0.1028, "step": 4029 }, { "epoch": 1.88098016336056, "grad_norm": 0.94921875, "learning_rate": 0.00018310107706820554, "loss": 0.0868, "step": 4030 }, { "epoch": 1.8814469078179696, "grad_norm": 0.9375, "learning_rate": 0.00018309291395602565, "loss": 0.0849, "step": 4031 }, { "epoch": 1.8819136522753792, "grad_norm": 1.109375, "learning_rate": 0.00018308474905476317, "loss": 0.1204, "step": 4032 }, { "epoch": 1.8823803967327888, "grad_norm": 0.8203125, "learning_rate": 0.0001830765823645938, "loss": 0.0652, "step": 4033 }, { "epoch": 1.8828471411901984, "grad_norm": 0.96484375, "learning_rate": 0.00018306841388569348, "loss": 0.0834, "step": 4034 }, { "epoch": 1.883313885647608, "grad_norm": 1.03125, "learning_rate": 0.00018306024361823805, "loss": 0.1016, "step": 4035 }, { "epoch": 1.8837806301050175, "grad_norm": 1.015625, "learning_rate": 0.0001830520715624034, "loss": 0.0864, "step": 4036 }, { "epoch": 1.884247374562427, "grad_norm": 1.03125, "learning_rate": 0.00018304389771836544, "loss": 0.1059, "step": 4037 }, { "epoch": 1.8847141190198367, "grad_norm": 0.91015625, "learning_rate": 0.0001830357220863003, "loss": 0.0983, "step": 4038 }, { "epoch": 1.8851808634772462, "grad_norm": 1.125, "learning_rate": 0.00018302754466638393, "loss": 0.0979, "step": 4039 }, { "epoch": 1.8856476079346558, "grad_norm": 0.953125, "learning_rate": 0.00018301936545879236, "loss": 0.0833, "step": 4040 }, { "epoch": 1.8861143523920654, "grad_norm": 0.90234375, "learning_rate": 0.0001830111844637018, "loss": 0.0704, "step": 4041 }, { "epoch": 1.886581096849475, "grad_norm": 1.0390625, "learning_rate": 0.00018300300168128827, "loss": 0.0793, "step": 4042 }, { "epoch": 1.8870478413068845, "grad_norm": 1.03125, "learning_rate": 0.00018299481711172807, "loss": 0.0891, "step": 4043 }, { "epoch": 1.887514585764294, "grad_norm": 1.0, "learning_rate": 0.00018298663075519734, "loss": 0.098, "step": 4044 }, { "epoch": 1.8879813302217037, "grad_norm": 0.91796875, "learning_rate": 0.0001829784426118724, "loss": 0.0844, "step": 4045 }, { "epoch": 1.888448074679113, "grad_norm": 0.6484375, "learning_rate": 0.00018297025268192952, "loss": 0.0646, "step": 4046 }, { "epoch": 1.8889148191365228, "grad_norm": 0.92578125, "learning_rate": 0.00018296206096554503, "loss": 0.0711, "step": 4047 }, { "epoch": 1.8893815635939322, "grad_norm": 1.0546875, "learning_rate": 0.00018295386746289532, "loss": 0.0719, "step": 4048 }, { "epoch": 1.889848308051342, "grad_norm": 0.90625, "learning_rate": 0.0001829456721741568, "loss": 0.0847, "step": 4049 }, { "epoch": 1.8903150525087513, "grad_norm": 1.0234375, "learning_rate": 0.00018293747509950597, "loss": 0.1078, "step": 4050 }, { "epoch": 1.890781796966161, "grad_norm": 0.875, "learning_rate": 0.00018292927623911922, "loss": 0.0889, "step": 4051 }, { "epoch": 1.8912485414235705, "grad_norm": 0.98828125, "learning_rate": 0.00018292107559317317, "loss": 0.0975, "step": 4052 }, { "epoch": 1.8917152858809803, "grad_norm": 1.21875, "learning_rate": 0.00018291287316184437, "loss": 0.107, "step": 4053 }, { "epoch": 1.8921820303383896, "grad_norm": 0.9609375, "learning_rate": 0.00018290466894530942, "loss": 0.0885, "step": 4054 }, { "epoch": 1.8926487747957994, "grad_norm": 0.87109375, "learning_rate": 0.00018289646294374494, "loss": 0.0757, "step": 4055 }, { "epoch": 1.8931155192532088, "grad_norm": 1.0859375, "learning_rate": 0.00018288825515732767, "loss": 0.1002, "step": 4056 }, { "epoch": 1.8935822637106186, "grad_norm": 0.86328125, "learning_rate": 0.00018288004558623426, "loss": 0.068, "step": 4057 }, { "epoch": 1.894049008168028, "grad_norm": 0.90625, "learning_rate": 0.00018287183423064155, "loss": 0.0816, "step": 4058 }, { "epoch": 1.8945157526254377, "grad_norm": 0.91015625, "learning_rate": 0.00018286362109072628, "loss": 0.0808, "step": 4059 }, { "epoch": 1.894982497082847, "grad_norm": 1.0859375, "learning_rate": 0.0001828554061666653, "loss": 0.0911, "step": 4060 }, { "epoch": 1.8954492415402568, "grad_norm": 0.953125, "learning_rate": 0.0001828471894586355, "loss": 0.0783, "step": 4061 }, { "epoch": 1.8959159859976662, "grad_norm": 1.140625, "learning_rate": 0.00018283897096681381, "loss": 0.0902, "step": 4062 }, { "epoch": 1.896382730455076, "grad_norm": 1.015625, "learning_rate": 0.00018283075069137715, "loss": 0.1027, "step": 4063 }, { "epoch": 1.8968494749124853, "grad_norm": 0.91796875, "learning_rate": 0.00018282252863250254, "loss": 0.0606, "step": 4064 }, { "epoch": 1.8973162193698951, "grad_norm": 1.0703125, "learning_rate": 0.00018281430479036694, "loss": 0.0868, "step": 4065 }, { "epoch": 1.8977829638273045, "grad_norm": 0.859375, "learning_rate": 0.00018280607916514754, "loss": 0.0761, "step": 4066 }, { "epoch": 1.898249708284714, "grad_norm": 1.0234375, "learning_rate": 0.00018279785175702136, "loss": 0.0858, "step": 4067 }, { "epoch": 1.8987164527421236, "grad_norm": 0.91796875, "learning_rate": 0.00018278962256616557, "loss": 0.0881, "step": 4068 }, { "epoch": 1.8991831971995332, "grad_norm": 0.8515625, "learning_rate": 0.00018278139159275732, "loss": 0.0749, "step": 4069 }, { "epoch": 1.8996499416569428, "grad_norm": 1.109375, "learning_rate": 0.00018277315883697388, "loss": 0.1185, "step": 4070 }, { "epoch": 1.9001166861143524, "grad_norm": 0.9453125, "learning_rate": 0.00018276492429899251, "loss": 0.1023, "step": 4071 }, { "epoch": 1.900583430571762, "grad_norm": 0.90625, "learning_rate": 0.00018275668797899046, "loss": 0.1018, "step": 4072 }, { "epoch": 1.9010501750291715, "grad_norm": 1.140625, "learning_rate": 0.0001827484498771451, "loss": 0.1336, "step": 4073 }, { "epoch": 1.901516919486581, "grad_norm": 1.109375, "learning_rate": 0.0001827402099936338, "loss": 0.1202, "step": 4074 }, { "epoch": 1.9019836639439907, "grad_norm": 0.91796875, "learning_rate": 0.00018273196832863402, "loss": 0.084, "step": 4075 }, { "epoch": 1.9024504084014002, "grad_norm": 0.796875, "learning_rate": 0.00018272372488232314, "loss": 0.0699, "step": 4076 }, { "epoch": 1.9029171528588098, "grad_norm": 0.9140625, "learning_rate": 0.00018271547965487868, "loss": 0.0838, "step": 4077 }, { "epoch": 1.9033838973162194, "grad_norm": 0.98046875, "learning_rate": 0.00018270723264647817, "loss": 0.0821, "step": 4078 }, { "epoch": 1.903850641773629, "grad_norm": 0.77734375, "learning_rate": 0.00018269898385729917, "loss": 0.1036, "step": 4079 }, { "epoch": 1.9043173862310385, "grad_norm": 1.234375, "learning_rate": 0.0001826907332875193, "loss": 0.1143, "step": 4080 }, { "epoch": 1.904784130688448, "grad_norm": 0.9375, "learning_rate": 0.00018268248093731621, "loss": 0.0667, "step": 4081 }, { "epoch": 1.9052508751458577, "grad_norm": 0.9140625, "learning_rate": 0.00018267422680686755, "loss": 0.0753, "step": 4082 }, { "epoch": 1.9057176196032672, "grad_norm": 1.0078125, "learning_rate": 0.00018266597089635104, "loss": 0.1102, "step": 4083 }, { "epoch": 1.9061843640606768, "grad_norm": 0.8125, "learning_rate": 0.0001826577132059445, "loss": 0.078, "step": 4084 }, { "epoch": 1.9066511085180864, "grad_norm": 0.82421875, "learning_rate": 0.00018264945373582564, "loss": 0.0794, "step": 4085 }, { "epoch": 1.907117852975496, "grad_norm": 0.8828125, "learning_rate": 0.00018264119248617236, "loss": 0.0902, "step": 4086 }, { "epoch": 1.9075845974329055, "grad_norm": 0.89453125, "learning_rate": 0.00018263292945716253, "loss": 0.089, "step": 4087 }, { "epoch": 1.9080513418903151, "grad_norm": 0.94140625, "learning_rate": 0.00018262466464897403, "loss": 0.0848, "step": 4088 }, { "epoch": 1.9085180863477245, "grad_norm": 1.0234375, "learning_rate": 0.00018261639806178482, "loss": 0.1133, "step": 4089 }, { "epoch": 1.9089848308051343, "grad_norm": 0.953125, "learning_rate": 0.0001826081296957729, "loss": 0.103, "step": 4090 }, { "epoch": 1.9094515752625436, "grad_norm": 0.87890625, "learning_rate": 0.0001825998595511163, "loss": 0.0935, "step": 4091 }, { "epoch": 1.9099183197199534, "grad_norm": 0.99609375, "learning_rate": 0.00018259158762799307, "loss": 0.1049, "step": 4092 }, { "epoch": 1.9103850641773628, "grad_norm": 0.96484375, "learning_rate": 0.00018258331392658131, "loss": 0.0908, "step": 4093 }, { "epoch": 1.9108518086347726, "grad_norm": 0.94921875, "learning_rate": 0.0001825750384470592, "loss": 0.0798, "step": 4094 }, { "epoch": 1.911318553092182, "grad_norm": 1.0234375, "learning_rate": 0.00018256676118960486, "loss": 0.0939, "step": 4095 }, { "epoch": 1.9117852975495917, "grad_norm": 0.89453125, "learning_rate": 0.0001825584821543966, "loss": 0.0742, "step": 4096 }, { "epoch": 1.912252042007001, "grad_norm": 0.8359375, "learning_rate": 0.00018255020134161256, "loss": 0.0862, "step": 4097 }, { "epoch": 1.9127187864644108, "grad_norm": 1.09375, "learning_rate": 0.00018254191875143112, "loss": 0.1023, "step": 4098 }, { "epoch": 1.9131855309218202, "grad_norm": 1.09375, "learning_rate": 0.00018253363438403057, "loss": 0.1377, "step": 4099 }, { "epoch": 1.91365227537923, "grad_norm": 0.71484375, "learning_rate": 0.00018252534823958932, "loss": 0.0719, "step": 4100 }, { "epoch": 1.9141190198366393, "grad_norm": 0.734375, "learning_rate": 0.00018251706031828575, "loss": 0.0723, "step": 4101 }, { "epoch": 1.9145857642940491, "grad_norm": 0.91015625, "learning_rate": 0.00018250877062029832, "loss": 0.0995, "step": 4102 }, { "epoch": 1.9150525087514585, "grad_norm": 1.0703125, "learning_rate": 0.00018250047914580552, "loss": 0.0839, "step": 4103 }, { "epoch": 1.9155192532088683, "grad_norm": 1.09375, "learning_rate": 0.00018249218589498587, "loss": 0.1177, "step": 4104 }, { "epoch": 1.9159859976662776, "grad_norm": 0.88671875, "learning_rate": 0.00018248389086801789, "loss": 0.0751, "step": 4105 }, { "epoch": 1.9164527421236874, "grad_norm": 0.86328125, "learning_rate": 0.00018247559406508028, "loss": 0.1018, "step": 4106 }, { "epoch": 1.9169194865810968, "grad_norm": 0.64453125, "learning_rate": 0.0001824672954863516, "loss": 0.0562, "step": 4107 }, { "epoch": 1.9173862310385066, "grad_norm": 0.8125, "learning_rate": 0.00018245899513201052, "loss": 0.0697, "step": 4108 }, { "epoch": 1.917852975495916, "grad_norm": 0.859375, "learning_rate": 0.00018245069300223583, "loss": 0.0764, "step": 4109 }, { "epoch": 1.9183197199533255, "grad_norm": 0.81640625, "learning_rate": 0.0001824423890972062, "loss": 0.0572, "step": 4110 }, { "epoch": 1.918786464410735, "grad_norm": 1.125, "learning_rate": 0.0001824340834171005, "loss": 0.104, "step": 4111 }, { "epoch": 1.9192532088681447, "grad_norm": 0.9375, "learning_rate": 0.00018242577596209748, "loss": 0.0671, "step": 4112 }, { "epoch": 1.9197199533255542, "grad_norm": 1.03125, "learning_rate": 0.00018241746673237607, "loss": 0.0794, "step": 4113 }, { "epoch": 1.9201866977829638, "grad_norm": 0.92578125, "learning_rate": 0.00018240915572811516, "loss": 0.0773, "step": 4114 }, { "epoch": 1.9206534422403734, "grad_norm": 0.8203125, "learning_rate": 0.00018240084294949372, "loss": 0.0691, "step": 4115 }, { "epoch": 1.921120186697783, "grad_norm": 1.15625, "learning_rate": 0.00018239252839669066, "loss": 0.1084, "step": 4116 }, { "epoch": 1.9215869311551925, "grad_norm": 1.046875, "learning_rate": 0.00018238421206988504, "loss": 0.0819, "step": 4117 }, { "epoch": 1.922053675612602, "grad_norm": 0.86328125, "learning_rate": 0.00018237589396925596, "loss": 0.0693, "step": 4118 }, { "epoch": 1.9225204200700117, "grad_norm": 0.90234375, "learning_rate": 0.00018236757409498247, "loss": 0.0651, "step": 4119 }, { "epoch": 1.9229871645274212, "grad_norm": 1.0546875, "learning_rate": 0.00018235925244724374, "loss": 0.1047, "step": 4120 }, { "epoch": 1.9234539089848308, "grad_norm": 1.1328125, "learning_rate": 0.0001823509290262189, "loss": 0.0896, "step": 4121 }, { "epoch": 1.9239206534422404, "grad_norm": 1.0703125, "learning_rate": 0.00018234260383208722, "loss": 0.1091, "step": 4122 }, { "epoch": 1.92438739789965, "grad_norm": 1.0, "learning_rate": 0.0001823342768650279, "loss": 0.1292, "step": 4123 }, { "epoch": 1.9248541423570595, "grad_norm": 0.7890625, "learning_rate": 0.00018232594812522026, "loss": 0.0618, "step": 4124 }, { "epoch": 1.9253208868144691, "grad_norm": 0.89453125, "learning_rate": 0.0001823176176128436, "loss": 0.0719, "step": 4125 }, { "epoch": 1.9257876312718787, "grad_norm": 0.88671875, "learning_rate": 0.00018230928532807732, "loss": 0.0695, "step": 4126 }, { "epoch": 1.9262543757292883, "grad_norm": 0.90234375, "learning_rate": 0.00018230095127110078, "loss": 0.0776, "step": 4127 }, { "epoch": 1.9267211201866978, "grad_norm": 1.046875, "learning_rate": 0.00018229261544209344, "loss": 0.0894, "step": 4128 }, { "epoch": 1.9271878646441074, "grad_norm": 1.1640625, "learning_rate": 0.0001822842778412348, "loss": 0.0983, "step": 4129 }, { "epoch": 1.9276546091015168, "grad_norm": 0.9453125, "learning_rate": 0.00018227593846870436, "loss": 0.0768, "step": 4130 }, { "epoch": 1.9281213535589266, "grad_norm": 0.8515625, "learning_rate": 0.00018226759732468167, "loss": 0.0697, "step": 4131 }, { "epoch": 1.928588098016336, "grad_norm": 0.87109375, "learning_rate": 0.00018225925440934634, "loss": 0.0761, "step": 4132 }, { "epoch": 1.9290548424737457, "grad_norm": 0.8671875, "learning_rate": 0.000182250909722878, "loss": 0.0693, "step": 4133 }, { "epoch": 1.929521586931155, "grad_norm": 0.7890625, "learning_rate": 0.0001822425632654563, "loss": 0.072, "step": 4134 }, { "epoch": 1.9299883313885648, "grad_norm": 0.94140625, "learning_rate": 0.00018223421503726095, "loss": 0.0908, "step": 4135 }, { "epoch": 1.9304550758459742, "grad_norm": 0.90234375, "learning_rate": 0.00018222586503847173, "loss": 0.0725, "step": 4136 }, { "epoch": 1.930921820303384, "grad_norm": 0.9140625, "learning_rate": 0.0001822175132692684, "loss": 0.0838, "step": 4137 }, { "epoch": 1.9313885647607933, "grad_norm": 1.0234375, "learning_rate": 0.0001822091597298308, "loss": 0.0739, "step": 4138 }, { "epoch": 1.9318553092182031, "grad_norm": 0.93359375, "learning_rate": 0.00018220080442033878, "loss": 0.0837, "step": 4139 }, { "epoch": 1.9323220536756125, "grad_norm": 0.984375, "learning_rate": 0.00018219244734097223, "loss": 0.0847, "step": 4140 }, { "epoch": 1.9327887981330223, "grad_norm": 0.84765625, "learning_rate": 0.0001821840884919111, "loss": 0.0651, "step": 4141 }, { "epoch": 1.9332555425904316, "grad_norm": 0.80859375, "learning_rate": 0.00018217572787333533, "loss": 0.0774, "step": 4142 }, { "epoch": 1.9337222870478414, "grad_norm": 1.0078125, "learning_rate": 0.00018216736548542503, "loss": 0.0731, "step": 4143 }, { "epoch": 1.9341890315052508, "grad_norm": 1.046875, "learning_rate": 0.0001821590013283601, "loss": 0.0934, "step": 4144 }, { "epoch": 1.9346557759626606, "grad_norm": 0.84375, "learning_rate": 0.00018215063540232075, "loss": 0.0751, "step": 4145 }, { "epoch": 1.93512252042007, "grad_norm": 1.1015625, "learning_rate": 0.0001821422677074871, "loss": 0.0859, "step": 4146 }, { "epoch": 1.9355892648774797, "grad_norm": 0.9609375, "learning_rate": 0.00018213389824403925, "loss": 0.0798, "step": 4147 }, { "epoch": 1.936056009334889, "grad_norm": 0.8046875, "learning_rate": 0.00018212552701215746, "loss": 0.0547, "step": 4148 }, { "epoch": 1.9365227537922989, "grad_norm": 1.078125, "learning_rate": 0.00018211715401202194, "loss": 0.1104, "step": 4149 }, { "epoch": 1.9369894982497082, "grad_norm": 0.71484375, "learning_rate": 0.00018210877924381303, "loss": 0.0684, "step": 4150 }, { "epoch": 1.9374562427071178, "grad_norm": 0.8984375, "learning_rate": 0.00018210040270771094, "loss": 0.1049, "step": 4151 }, { "epoch": 1.9379229871645274, "grad_norm": 1.171875, "learning_rate": 0.00018209202440389613, "loss": 0.1035, "step": 4152 }, { "epoch": 1.938389731621937, "grad_norm": 0.87890625, "learning_rate": 0.00018208364433254892, "loss": 0.0814, "step": 4153 }, { "epoch": 1.9388564760793465, "grad_norm": 1.015625, "learning_rate": 0.00018207526249384978, "loss": 0.1017, "step": 4154 }, { "epoch": 1.939323220536756, "grad_norm": 0.9609375, "learning_rate": 0.0001820668788879792, "loss": 0.0965, "step": 4155 }, { "epoch": 1.9397899649941657, "grad_norm": 0.99609375, "learning_rate": 0.00018205849351511763, "loss": 0.0881, "step": 4156 }, { "epoch": 1.9402567094515752, "grad_norm": 1.0625, "learning_rate": 0.00018205010637544565, "loss": 0.091, "step": 4157 }, { "epoch": 1.9407234539089848, "grad_norm": 0.83984375, "learning_rate": 0.00018204171746914386, "loss": 0.0787, "step": 4158 }, { "epoch": 1.9411901983663944, "grad_norm": 0.83203125, "learning_rate": 0.00018203332679639285, "loss": 0.0678, "step": 4159 }, { "epoch": 1.941656942823804, "grad_norm": 1.03125, "learning_rate": 0.00018202493435737335, "loss": 0.0941, "step": 4160 }, { "epoch": 1.9421236872812135, "grad_norm": 1.2734375, "learning_rate": 0.00018201654015226595, "loss": 0.0945, "step": 4161 }, { "epoch": 1.9425904317386231, "grad_norm": 0.92578125, "learning_rate": 0.00018200814418125146, "loss": 0.0889, "step": 4162 }, { "epoch": 1.9430571761960327, "grad_norm": 0.87890625, "learning_rate": 0.00018199974644451066, "loss": 0.0574, "step": 4163 }, { "epoch": 1.9435239206534423, "grad_norm": 0.98828125, "learning_rate": 0.0001819913469422243, "loss": 0.0929, "step": 4164 }, { "epoch": 1.9439906651108518, "grad_norm": 1.0390625, "learning_rate": 0.00018198294567457328, "loss": 0.0886, "step": 4165 }, { "epoch": 1.9444574095682614, "grad_norm": 1.125, "learning_rate": 0.00018197454264173849, "loss": 0.1094, "step": 4166 }, { "epoch": 1.944924154025671, "grad_norm": 0.8046875, "learning_rate": 0.00018196613784390083, "loss": 0.079, "step": 4167 }, { "epoch": 1.9453908984830806, "grad_norm": 1.203125, "learning_rate": 0.00018195773128124127, "loss": 0.1143, "step": 4168 }, { "epoch": 1.9458576429404901, "grad_norm": 0.83984375, "learning_rate": 0.00018194932295394084, "loss": 0.0636, "step": 4169 }, { "epoch": 1.9463243873978997, "grad_norm": 1.015625, "learning_rate": 0.00018194091286218055, "loss": 0.0794, "step": 4170 }, { "epoch": 1.946791131855309, "grad_norm": 0.81640625, "learning_rate": 0.00018193250100614149, "loss": 0.0788, "step": 4171 }, { "epoch": 1.9472578763127188, "grad_norm": 1.1640625, "learning_rate": 0.00018192408738600482, "loss": 0.0931, "step": 4172 }, { "epoch": 1.9477246207701282, "grad_norm": 0.81640625, "learning_rate": 0.0001819156720019516, "loss": 0.0968, "step": 4173 }, { "epoch": 1.948191365227538, "grad_norm": 1.3828125, "learning_rate": 0.00018190725485416311, "loss": 0.101, "step": 4174 }, { "epoch": 1.9486581096849473, "grad_norm": 1.015625, "learning_rate": 0.0001818988359428205, "loss": 0.1104, "step": 4175 }, { "epoch": 1.9491248541423571, "grad_norm": 0.9921875, "learning_rate": 0.00018189041526810512, "loss": 0.0716, "step": 4176 }, { "epoch": 1.9495915985997665, "grad_norm": 0.93359375, "learning_rate": 0.00018188199283019824, "loss": 0.0836, "step": 4177 }, { "epoch": 1.9500583430571763, "grad_norm": 0.92578125, "learning_rate": 0.00018187356862928123, "loss": 0.0889, "step": 4178 }, { "epoch": 1.9505250875145856, "grad_norm": 0.91796875, "learning_rate": 0.00018186514266553539, "loss": 0.0851, "step": 4179 }, { "epoch": 1.9509918319719954, "grad_norm": 1.1796875, "learning_rate": 0.00018185671493914222, "loss": 0.1038, "step": 4180 }, { "epoch": 1.9514585764294048, "grad_norm": 1.1015625, "learning_rate": 0.00018184828545028317, "loss": 0.0971, "step": 4181 }, { "epoch": 1.9519253208868146, "grad_norm": 0.78515625, "learning_rate": 0.0001818398541991397, "loss": 0.0587, "step": 4182 }, { "epoch": 1.952392065344224, "grad_norm": 0.96875, "learning_rate": 0.0001818314211858934, "loss": 0.0897, "step": 4183 }, { "epoch": 1.9528588098016337, "grad_norm": 1.1328125, "learning_rate": 0.00018182298641072577, "loss": 0.1065, "step": 4184 }, { "epoch": 1.953325554259043, "grad_norm": 1.1484375, "learning_rate": 0.00018181454987381847, "loss": 0.09, "step": 4185 }, { "epoch": 1.9537922987164529, "grad_norm": 0.84765625, "learning_rate": 0.00018180611157535314, "loss": 0.0958, "step": 4186 }, { "epoch": 1.9542590431738622, "grad_norm": 0.953125, "learning_rate": 0.00018179767151551147, "loss": 0.0759, "step": 4187 }, { "epoch": 1.954725787631272, "grad_norm": 1.1484375, "learning_rate": 0.00018178922969447515, "loss": 0.095, "step": 4188 }, { "epoch": 1.9551925320886814, "grad_norm": 1.0859375, "learning_rate": 0.000181780786112426, "loss": 0.0901, "step": 4189 }, { "epoch": 1.9556592765460912, "grad_norm": 0.875, "learning_rate": 0.0001817723407695458, "loss": 0.0602, "step": 4190 }, { "epoch": 1.9561260210035005, "grad_norm": 0.80078125, "learning_rate": 0.00018176389366601634, "loss": 0.0799, "step": 4191 }, { "epoch": 1.95659276546091, "grad_norm": 1.0234375, "learning_rate": 0.00018175544480201951, "loss": 0.093, "step": 4192 }, { "epoch": 1.9570595099183197, "grad_norm": 1.0625, "learning_rate": 0.00018174699417773728, "loss": 0.1169, "step": 4193 }, { "epoch": 1.9575262543757292, "grad_norm": 0.77734375, "learning_rate": 0.00018173854179335156, "loss": 0.0505, "step": 4194 }, { "epoch": 1.9579929988331388, "grad_norm": 0.90625, "learning_rate": 0.00018173008764904436, "loss": 0.0881, "step": 4195 }, { "epoch": 1.9584597432905484, "grad_norm": 1.0078125, "learning_rate": 0.00018172163174499768, "loss": 0.0902, "step": 4196 }, { "epoch": 1.958926487747958, "grad_norm": 0.98828125, "learning_rate": 0.0001817131740813936, "loss": 0.0953, "step": 4197 }, { "epoch": 1.9593932322053675, "grad_norm": 0.98828125, "learning_rate": 0.0001817047146584142, "loss": 0.0762, "step": 4198 }, { "epoch": 1.9598599766627771, "grad_norm": 1.0546875, "learning_rate": 0.00018169625347624167, "loss": 0.0983, "step": 4199 }, { "epoch": 1.9603267211201867, "grad_norm": 0.8359375, "learning_rate": 0.0001816877905350581, "loss": 0.0838, "step": 4200 }, { "epoch": 1.9607934655775963, "grad_norm": 0.94921875, "learning_rate": 0.00018167932583504584, "loss": 0.092, "step": 4201 }, { "epoch": 1.9612602100350058, "grad_norm": 0.890625, "learning_rate": 0.00018167085937638704, "loss": 0.0689, "step": 4202 }, { "epoch": 1.9617269544924154, "grad_norm": 1.046875, "learning_rate": 0.000181662391159264, "loss": 0.0765, "step": 4203 }, { "epoch": 1.962193698949825, "grad_norm": 0.921875, "learning_rate": 0.0001816539211838591, "loss": 0.0768, "step": 4204 }, { "epoch": 1.9626604434072346, "grad_norm": 0.8671875, "learning_rate": 0.0001816454494503547, "loss": 0.0654, "step": 4205 }, { "epoch": 1.9631271878646441, "grad_norm": 0.93359375, "learning_rate": 0.00018163697595893313, "loss": 0.0901, "step": 4206 }, { "epoch": 1.9635939323220537, "grad_norm": 0.7734375, "learning_rate": 0.00018162850070977692, "loss": 0.0733, "step": 4207 }, { "epoch": 1.9640606767794633, "grad_norm": 1.0078125, "learning_rate": 0.00018162002370306852, "loss": 0.0774, "step": 4208 }, { "epoch": 1.9645274212368729, "grad_norm": 1.1328125, "learning_rate": 0.00018161154493899045, "loss": 0.1083, "step": 4209 }, { "epoch": 1.9649941656942824, "grad_norm": 1.140625, "learning_rate": 0.00018160306441772527, "loss": 0.1082, "step": 4210 }, { "epoch": 1.965460910151692, "grad_norm": 0.72265625, "learning_rate": 0.00018159458213945557, "loss": 0.0535, "step": 4211 }, { "epoch": 1.9659276546091016, "grad_norm": 1.234375, "learning_rate": 0.000181586098104364, "loss": 0.1158, "step": 4212 }, { "epoch": 1.9663943990665111, "grad_norm": 0.91015625, "learning_rate": 0.0001815776123126332, "loss": 0.0836, "step": 4213 }, { "epoch": 1.9668611435239205, "grad_norm": 1.09375, "learning_rate": 0.0001815691247644459, "loss": 0.0827, "step": 4214 }, { "epoch": 1.9673278879813303, "grad_norm": 0.828125, "learning_rate": 0.00018156063545998486, "loss": 0.0723, "step": 4215 }, { "epoch": 1.9677946324387396, "grad_norm": 0.9609375, "learning_rate": 0.0001815521443994328, "loss": 0.0845, "step": 4216 }, { "epoch": 1.9682613768961494, "grad_norm": 1.015625, "learning_rate": 0.00018154365158297263, "loss": 0.0961, "step": 4217 }, { "epoch": 1.9687281213535588, "grad_norm": 0.98046875, "learning_rate": 0.00018153515701078713, "loss": 0.09, "step": 4218 }, { "epoch": 1.9691948658109686, "grad_norm": 0.89453125, "learning_rate": 0.00018152666068305926, "loss": 0.0853, "step": 4219 }, { "epoch": 1.969661610268378, "grad_norm": 0.80859375, "learning_rate": 0.00018151816259997193, "loss": 0.0621, "step": 4220 }, { "epoch": 1.9701283547257877, "grad_norm": 0.7734375, "learning_rate": 0.00018150966276170812, "loss": 0.0702, "step": 4221 }, { "epoch": 1.970595099183197, "grad_norm": 0.734375, "learning_rate": 0.00018150116116845083, "loss": 0.0612, "step": 4222 }, { "epoch": 1.9710618436406069, "grad_norm": 1.28125, "learning_rate": 0.0001814926578203831, "loss": 0.0979, "step": 4223 }, { "epoch": 1.9715285880980162, "grad_norm": 0.9609375, "learning_rate": 0.000181484152717688, "loss": 0.0918, "step": 4224 }, { "epoch": 1.971995332555426, "grad_norm": 0.90234375, "learning_rate": 0.00018147564586054875, "loss": 0.0999, "step": 4225 }, { "epoch": 1.9724620770128354, "grad_norm": 0.84375, "learning_rate": 0.00018146713724914842, "loss": 0.0872, "step": 4226 }, { "epoch": 1.9729288214702452, "grad_norm": 0.86328125, "learning_rate": 0.0001814586268836702, "loss": 0.0709, "step": 4227 }, { "epoch": 1.9733955659276545, "grad_norm": 1.0625, "learning_rate": 0.0001814501147642974, "loss": 0.082, "step": 4228 }, { "epoch": 1.9738623103850643, "grad_norm": 1.2734375, "learning_rate": 0.00018144160089121324, "loss": 0.0824, "step": 4229 }, { "epoch": 1.9743290548424737, "grad_norm": 0.87109375, "learning_rate": 0.00018143308526460107, "loss": 0.0707, "step": 4230 }, { "epoch": 1.9747957992998835, "grad_norm": 1.1328125, "learning_rate": 0.00018142456788464425, "loss": 0.1082, "step": 4231 }, { "epoch": 1.9752625437572928, "grad_norm": 1.0703125, "learning_rate": 0.0001814160487515261, "loss": 0.0805, "step": 4232 }, { "epoch": 1.9757292882147024, "grad_norm": 0.9921875, "learning_rate": 0.0001814075278654301, "loss": 0.0834, "step": 4233 }, { "epoch": 1.976196032672112, "grad_norm": 0.8515625, "learning_rate": 0.0001813990052265397, "loss": 0.0764, "step": 4234 }, { "epoch": 1.9766627771295215, "grad_norm": 0.875, "learning_rate": 0.0001813904808350384, "loss": 0.0634, "step": 4235 }, { "epoch": 1.9771295215869311, "grad_norm": 0.9765625, "learning_rate": 0.00018138195469110974, "loss": 0.0815, "step": 4236 }, { "epoch": 1.9775962660443407, "grad_norm": 0.97265625, "learning_rate": 0.0001813734267949373, "loss": 0.1021, "step": 4237 }, { "epoch": 1.9780630105017503, "grad_norm": 0.94921875, "learning_rate": 0.0001813648971467047, "loss": 0.075, "step": 4238 }, { "epoch": 1.9785297549591598, "grad_norm": 0.78515625, "learning_rate": 0.00018135636574659558, "loss": 0.0619, "step": 4239 }, { "epoch": 1.9789964994165694, "grad_norm": 1.09375, "learning_rate": 0.00018134783259479367, "loss": 0.1008, "step": 4240 }, { "epoch": 1.979463243873979, "grad_norm": 1.1484375, "learning_rate": 0.00018133929769148264, "loss": 0.0957, "step": 4241 }, { "epoch": 1.9799299883313886, "grad_norm": 0.9765625, "learning_rate": 0.00018133076103684628, "loss": 0.0961, "step": 4242 }, { "epoch": 1.9803967327887981, "grad_norm": 1.0234375, "learning_rate": 0.00018132222263106843, "loss": 0.077, "step": 4243 }, { "epoch": 1.9808634772462077, "grad_norm": 1.0078125, "learning_rate": 0.00018131368247433286, "loss": 0.0892, "step": 4244 }, { "epoch": 1.9813302217036173, "grad_norm": 1.109375, "learning_rate": 0.0001813051405668235, "loss": 0.0996, "step": 4245 }, { "epoch": 1.9817969661610269, "grad_norm": 0.953125, "learning_rate": 0.00018129659690872425, "loss": 0.1232, "step": 4246 }, { "epoch": 1.9822637106184364, "grad_norm": 1.0390625, "learning_rate": 0.00018128805150021906, "loss": 0.088, "step": 4247 }, { "epoch": 1.982730455075846, "grad_norm": 0.91015625, "learning_rate": 0.00018127950434149197, "loss": 0.0817, "step": 4248 }, { "epoch": 1.9831971995332556, "grad_norm": 0.89453125, "learning_rate": 0.00018127095543272694, "loss": 0.0796, "step": 4249 }, { "epoch": 1.9836639439906651, "grad_norm": 0.9375, "learning_rate": 0.00018126240477410804, "loss": 0.0885, "step": 4250 }, { "epoch": 1.9841306884480747, "grad_norm": 0.86328125, "learning_rate": 0.00018125385236581945, "loss": 0.0828, "step": 4251 }, { "epoch": 1.9845974329054843, "grad_norm": 1.0390625, "learning_rate": 0.00018124529820804524, "loss": 0.087, "step": 4252 }, { "epoch": 1.9850641773628939, "grad_norm": 0.82421875, "learning_rate": 0.00018123674230096964, "loss": 0.0706, "step": 4253 }, { "epoch": 1.9855309218203034, "grad_norm": 0.98828125, "learning_rate": 0.00018122818464477684, "loss": 0.0871, "step": 4254 }, { "epoch": 1.9859976662777128, "grad_norm": 1.109375, "learning_rate": 0.0001812196252396511, "loss": 0.0876, "step": 4255 }, { "epoch": 1.9864644107351226, "grad_norm": 1.078125, "learning_rate": 0.0001812110640857767, "loss": 0.0693, "step": 4256 }, { "epoch": 1.986931155192532, "grad_norm": 0.74609375, "learning_rate": 0.00018120250118333799, "loss": 0.0652, "step": 4257 }, { "epoch": 1.9873978996499417, "grad_norm": 1.0546875, "learning_rate": 0.00018119393653251935, "loss": 0.0922, "step": 4258 }, { "epoch": 1.987864644107351, "grad_norm": 0.87890625, "learning_rate": 0.00018118537013350517, "loss": 0.1167, "step": 4259 }, { "epoch": 1.9883313885647609, "grad_norm": 1.0, "learning_rate": 0.00018117680198647987, "loss": 0.0897, "step": 4260 }, { "epoch": 1.9887981330221702, "grad_norm": 0.9453125, "learning_rate": 0.00018116823209162798, "loss": 0.0802, "step": 4261 }, { "epoch": 1.98926487747958, "grad_norm": 1.0078125, "learning_rate": 0.00018115966044913396, "loss": 0.0863, "step": 4262 }, { "epoch": 1.9897316219369894, "grad_norm": 0.859375, "learning_rate": 0.00018115108705918243, "loss": 0.0687, "step": 4263 }, { "epoch": 1.9901983663943992, "grad_norm": 0.890625, "learning_rate": 0.00018114251192195794, "loss": 0.0796, "step": 4264 }, { "epoch": 1.9906651108518085, "grad_norm": 0.984375, "learning_rate": 0.00018113393503764512, "loss": 0.0867, "step": 4265 }, { "epoch": 1.9911318553092183, "grad_norm": 0.765625, "learning_rate": 0.00018112535640642873, "loss": 0.0629, "step": 4266 }, { "epoch": 1.9915985997666277, "grad_norm": 1.25, "learning_rate": 0.00018111677602849336, "loss": 0.1159, "step": 4267 }, { "epoch": 1.9920653442240375, "grad_norm": 0.9453125, "learning_rate": 0.00018110819390402378, "loss": 0.0746, "step": 4268 }, { "epoch": 1.9925320886814468, "grad_norm": 0.9609375, "learning_rate": 0.00018109961003320483, "loss": 0.0988, "step": 4269 }, { "epoch": 1.9929988331388566, "grad_norm": 0.87890625, "learning_rate": 0.0001810910244162213, "loss": 0.0916, "step": 4270 }, { "epoch": 1.993465577596266, "grad_norm": 1.015625, "learning_rate": 0.000181082437053258, "loss": 0.1245, "step": 4271 }, { "epoch": 1.9939323220536758, "grad_norm": 0.87109375, "learning_rate": 0.0001810738479444999, "loss": 0.0735, "step": 4272 }, { "epoch": 1.9943990665110851, "grad_norm": 0.8515625, "learning_rate": 0.0001810652570901319, "loss": 0.0764, "step": 4273 }, { "epoch": 1.994865810968495, "grad_norm": 0.8359375, "learning_rate": 0.00018105666449033897, "loss": 0.0993, "step": 4274 }, { "epoch": 1.9953325554259043, "grad_norm": 1.09375, "learning_rate": 0.00018104807014530613, "loss": 0.07, "step": 4275 }, { "epoch": 1.9957992998833138, "grad_norm": 1.046875, "learning_rate": 0.0001810394740552184, "loss": 0.084, "step": 4276 }, { "epoch": 1.9962660443407234, "grad_norm": 1.1875, "learning_rate": 0.00018103087622026086, "loss": 0.086, "step": 4277 }, { "epoch": 1.996732788798133, "grad_norm": 1.0703125, "learning_rate": 0.00018102227664061867, "loss": 0.1095, "step": 4278 }, { "epoch": 1.9971995332555426, "grad_norm": 1.0234375, "learning_rate": 0.00018101367531647697, "loss": 0.0952, "step": 4279 }, { "epoch": 1.9976662777129521, "grad_norm": 0.98046875, "learning_rate": 0.00018100507224802097, "loss": 0.0988, "step": 4280 }, { "epoch": 1.9981330221703617, "grad_norm": 0.63671875, "learning_rate": 0.00018099646743543584, "loss": 0.0571, "step": 4281 }, { "epoch": 1.9985997666277713, "grad_norm": 0.79296875, "learning_rate": 0.00018098786087890697, "loss": 0.0668, "step": 4282 }, { "epoch": 1.9990665110851809, "grad_norm": 1.0703125, "learning_rate": 0.00018097925257861954, "loss": 0.0848, "step": 4283 }, { "epoch": 1.9995332555425904, "grad_norm": 0.82421875, "learning_rate": 0.00018097064253475898, "loss": 0.059, "step": 4284 }, { "epoch": 2.0, "grad_norm": 0.8125, "learning_rate": 0.00018096203074751066, "loss": 0.0623, "step": 4285 }, { "epoch": 2.0004667444574094, "grad_norm": 0.6640625, "learning_rate": 0.00018095341721705995, "loss": 0.052, "step": 4286 }, { "epoch": 2.000933488914819, "grad_norm": 0.8515625, "learning_rate": 0.0001809448019435924, "loss": 0.0554, "step": 4287 }, { "epoch": 2.0014002333722285, "grad_norm": 0.81640625, "learning_rate": 0.00018093618492729337, "loss": 0.0604, "step": 4288 }, { "epoch": 2.0014002333722285, "eval_loss": 1.522891879081726, "eval_runtime": 94.7932, "eval_samples_per_second": 19.031, "eval_steps_per_second": 2.384, "step": 4288 }, { "epoch": 2.0018669778296383, "grad_norm": 0.99609375, "learning_rate": 0.00018092756616834853, "loss": 0.0776, "step": 4289 }, { "epoch": 2.0023337222870476, "grad_norm": 0.8671875, "learning_rate": 0.0001809189456669434, "loss": 0.0731, "step": 4290 }, { "epoch": 2.0028004667444574, "grad_norm": 1.03125, "learning_rate": 0.0001809103234232636, "loss": 0.0816, "step": 4291 }, { "epoch": 2.003267211201867, "grad_norm": 1.0859375, "learning_rate": 0.00018090169943749476, "loss": 0.0853, "step": 4292 }, { "epoch": 2.0037339556592766, "grad_norm": 0.85546875, "learning_rate": 0.00018089307370982254, "loss": 0.0745, "step": 4293 }, { "epoch": 2.004200700116686, "grad_norm": 0.7734375, "learning_rate": 0.00018088444624043273, "loss": 0.0552, "step": 4294 }, { "epoch": 2.0046674445740957, "grad_norm": 0.87109375, "learning_rate": 0.00018087581702951101, "loss": 0.0632, "step": 4295 }, { "epoch": 2.005134189031505, "grad_norm": 0.89453125, "learning_rate": 0.00018086718607724324, "loss": 0.0516, "step": 4296 }, { "epoch": 2.005600933488915, "grad_norm": 0.87109375, "learning_rate": 0.00018085855338381522, "loss": 0.0699, "step": 4297 }, { "epoch": 2.0060676779463242, "grad_norm": 0.640625, "learning_rate": 0.00018084991894941286, "loss": 0.0563, "step": 4298 }, { "epoch": 2.006534422403734, "grad_norm": 0.7109375, "learning_rate": 0.00018084128277422203, "loss": 0.0566, "step": 4299 }, { "epoch": 2.0070011668611434, "grad_norm": 0.9609375, "learning_rate": 0.0001808326448584287, "loss": 0.0758, "step": 4300 }, { "epoch": 2.007467911318553, "grad_norm": 1.03125, "learning_rate": 0.00018082400520221878, "loss": 0.0844, "step": 4301 }, { "epoch": 2.0079346557759625, "grad_norm": 0.90625, "learning_rate": 0.00018081536380577839, "loss": 0.084, "step": 4302 }, { "epoch": 2.0084014002333723, "grad_norm": 0.81640625, "learning_rate": 0.00018080672066929357, "loss": 0.0732, "step": 4303 }, { "epoch": 2.0088681446907817, "grad_norm": 0.8203125, "learning_rate": 0.00018079807579295037, "loss": 0.0606, "step": 4304 }, { "epoch": 2.0093348891481915, "grad_norm": 0.96484375, "learning_rate": 0.00018078942917693495, "loss": 0.0716, "step": 4305 }, { "epoch": 2.009801633605601, "grad_norm": 0.8359375, "learning_rate": 0.0001807807808214335, "loss": 0.063, "step": 4306 }, { "epoch": 2.0102683780630106, "grad_norm": 0.94140625, "learning_rate": 0.0001807721307266322, "loss": 0.0634, "step": 4307 }, { "epoch": 2.01073512252042, "grad_norm": 0.97265625, "learning_rate": 0.00018076347889271733, "loss": 0.0626, "step": 4308 }, { "epoch": 2.0112018669778298, "grad_norm": 1.1875, "learning_rate": 0.00018075482531987512, "loss": 0.0827, "step": 4309 }, { "epoch": 2.011668611435239, "grad_norm": 1.09375, "learning_rate": 0.00018074617000829192, "loss": 0.086, "step": 4310 }, { "epoch": 2.012135355892649, "grad_norm": 0.8984375, "learning_rate": 0.0001807375129581541, "loss": 0.0842, "step": 4311 }, { "epoch": 2.0126021003500583, "grad_norm": 0.9375, "learning_rate": 0.00018072885416964805, "loss": 0.0728, "step": 4312 }, { "epoch": 2.013068844807468, "grad_norm": 0.74609375, "learning_rate": 0.0001807201936429602, "loss": 0.059, "step": 4313 }, { "epoch": 2.0135355892648774, "grad_norm": 0.90234375, "learning_rate": 0.000180711531378277, "loss": 0.0724, "step": 4314 }, { "epoch": 2.014002333722287, "grad_norm": 0.87890625, "learning_rate": 0.00018070286737578503, "loss": 0.071, "step": 4315 }, { "epoch": 2.0144690781796966, "grad_norm": 0.890625, "learning_rate": 0.00018069420163567074, "loss": 0.0494, "step": 4316 }, { "epoch": 2.0149358226371064, "grad_norm": 0.8828125, "learning_rate": 0.00018068553415812077, "loss": 0.0641, "step": 4317 }, { "epoch": 2.0154025670945157, "grad_norm": 1.1171875, "learning_rate": 0.00018067686494332175, "loss": 0.08, "step": 4318 }, { "epoch": 2.0158693115519255, "grad_norm": 0.93359375, "learning_rate": 0.00018066819399146028, "loss": 0.0682, "step": 4319 }, { "epoch": 2.016336056009335, "grad_norm": 1.125, "learning_rate": 0.00018065952130272308, "loss": 0.0832, "step": 4320 }, { "epoch": 2.0168028004667446, "grad_norm": 1.03125, "learning_rate": 0.00018065084687729694, "loss": 0.0583, "step": 4321 }, { "epoch": 2.017269544924154, "grad_norm": 0.85546875, "learning_rate": 0.00018064217071536853, "loss": 0.0675, "step": 4322 }, { "epoch": 2.017736289381564, "grad_norm": 0.8359375, "learning_rate": 0.00018063349281712476, "loss": 0.0584, "step": 4323 }, { "epoch": 2.018203033838973, "grad_norm": 1.0234375, "learning_rate": 0.0001806248131827524, "loss": 0.0732, "step": 4324 }, { "epoch": 2.018669778296383, "grad_norm": 0.82421875, "learning_rate": 0.00018061613181243836, "loss": 0.0533, "step": 4325 }, { "epoch": 2.0191365227537923, "grad_norm": 0.9453125, "learning_rate": 0.00018060744870636956, "loss": 0.0696, "step": 4326 }, { "epoch": 2.0196032672112016, "grad_norm": 1.0390625, "learning_rate": 0.00018059876386473294, "loss": 0.068, "step": 4327 }, { "epoch": 2.0200700116686114, "grad_norm": 1.0703125, "learning_rate": 0.00018059007728771554, "loss": 0.0796, "step": 4328 }, { "epoch": 2.020536756126021, "grad_norm": 0.75, "learning_rate": 0.0001805813889755043, "loss": 0.0561, "step": 4329 }, { "epoch": 2.0210035005834306, "grad_norm": 0.89453125, "learning_rate": 0.0001805726989282864, "loss": 0.0654, "step": 4330 }, { "epoch": 2.02147024504084, "grad_norm": 0.80078125, "learning_rate": 0.0001805640071462489, "loss": 0.0584, "step": 4331 }, { "epoch": 2.0219369894982497, "grad_norm": 1.09375, "learning_rate": 0.00018055531362957893, "loss": 0.0767, "step": 4332 }, { "epoch": 2.022403733955659, "grad_norm": 1.0859375, "learning_rate": 0.00018054661837846366, "loss": 0.1072, "step": 4333 }, { "epoch": 2.022870478413069, "grad_norm": 0.78515625, "learning_rate": 0.00018053792139309036, "loss": 0.0618, "step": 4334 }, { "epoch": 2.0233372228704782, "grad_norm": 0.87890625, "learning_rate": 0.00018052922267364622, "loss": 0.0615, "step": 4335 }, { "epoch": 2.023803967327888, "grad_norm": 0.96484375, "learning_rate": 0.0001805205222203186, "loss": 0.0671, "step": 4336 }, { "epoch": 2.0242707117852974, "grad_norm": 0.76171875, "learning_rate": 0.00018051182003329477, "loss": 0.0593, "step": 4337 }, { "epoch": 2.024737456242707, "grad_norm": 0.93359375, "learning_rate": 0.0001805031161127621, "loss": 0.0884, "step": 4338 }, { "epoch": 2.0252042007001165, "grad_norm": 0.921875, "learning_rate": 0.00018049441045890808, "loss": 0.0682, "step": 4339 }, { "epoch": 2.0256709451575263, "grad_norm": 1.1640625, "learning_rate": 0.00018048570307192008, "loss": 0.1023, "step": 4340 }, { "epoch": 2.0261376896149357, "grad_norm": 0.875, "learning_rate": 0.00018047699395198558, "loss": 0.0566, "step": 4341 }, { "epoch": 2.0266044340723455, "grad_norm": 0.90234375, "learning_rate": 0.0001804682830992921, "loss": 0.0865, "step": 4342 }, { "epoch": 2.027071178529755, "grad_norm": 1.0625, "learning_rate": 0.0001804595705140272, "loss": 0.0753, "step": 4343 }, { "epoch": 2.0275379229871646, "grad_norm": 0.83984375, "learning_rate": 0.00018045085619637852, "loss": 0.0643, "step": 4344 }, { "epoch": 2.028004667444574, "grad_norm": 0.82421875, "learning_rate": 0.0001804421401465336, "loss": 0.0732, "step": 4345 }, { "epoch": 2.0284714119019838, "grad_norm": 0.80859375, "learning_rate": 0.00018043342236468015, "loss": 0.0574, "step": 4346 }, { "epoch": 2.028938156359393, "grad_norm": 0.93359375, "learning_rate": 0.0001804247028510059, "loss": 0.0836, "step": 4347 }, { "epoch": 2.029404900816803, "grad_norm": 0.74609375, "learning_rate": 0.00018041598160569854, "loss": 0.056, "step": 4348 }, { "epoch": 2.0298716452742123, "grad_norm": 0.859375, "learning_rate": 0.0001804072586289459, "loss": 0.0625, "step": 4349 }, { "epoch": 2.030338389731622, "grad_norm": 0.92578125, "learning_rate": 0.00018039853392093575, "loss": 0.0649, "step": 4350 }, { "epoch": 2.0308051341890314, "grad_norm": 0.83203125, "learning_rate": 0.00018038980748185597, "loss": 0.0631, "step": 4351 }, { "epoch": 2.031271878646441, "grad_norm": 1.109375, "learning_rate": 0.00018038107931189442, "loss": 0.0648, "step": 4352 }, { "epoch": 2.0317386231038506, "grad_norm": 0.9453125, "learning_rate": 0.00018037234941123908, "loss": 0.0654, "step": 4353 }, { "epoch": 2.0322053675612604, "grad_norm": 1.09375, "learning_rate": 0.00018036361778007786, "loss": 0.0791, "step": 4354 }, { "epoch": 2.0326721120186697, "grad_norm": 0.9140625, "learning_rate": 0.0001803548844185988, "loss": 0.0736, "step": 4355 }, { "epoch": 2.0331388564760795, "grad_norm": 1.015625, "learning_rate": 0.0001803461493269899, "loss": 0.0625, "step": 4356 }, { "epoch": 2.033605600933489, "grad_norm": 0.9375, "learning_rate": 0.00018033741250543926, "loss": 0.0681, "step": 4357 }, { "epoch": 2.0340723453908987, "grad_norm": 0.9140625, "learning_rate": 0.000180328673954135, "loss": 0.0689, "step": 4358 }, { "epoch": 2.034539089848308, "grad_norm": 0.76171875, "learning_rate": 0.0001803199336732653, "loss": 0.0518, "step": 4359 }, { "epoch": 2.035005834305718, "grad_norm": 0.93359375, "learning_rate": 0.00018031119166301828, "loss": 0.0589, "step": 4360 }, { "epoch": 2.035472578763127, "grad_norm": 1.0234375, "learning_rate": 0.0001803024479235822, "loss": 0.0689, "step": 4361 }, { "epoch": 2.035939323220537, "grad_norm": 0.890625, "learning_rate": 0.00018029370245514528, "loss": 0.0615, "step": 4362 }, { "epoch": 2.0364060676779463, "grad_norm": 0.96484375, "learning_rate": 0.00018028495525789593, "loss": 0.0612, "step": 4363 }, { "epoch": 2.036872812135356, "grad_norm": 1.15625, "learning_rate": 0.00018027620633202237, "loss": 0.0575, "step": 4364 }, { "epoch": 2.0373395565927654, "grad_norm": 1.0234375, "learning_rate": 0.00018026745567771302, "loss": 0.0756, "step": 4365 }, { "epoch": 2.0378063010501752, "grad_norm": 0.99609375, "learning_rate": 0.00018025870329515633, "loss": 0.0783, "step": 4366 }, { "epoch": 2.0382730455075846, "grad_norm": 0.93359375, "learning_rate": 0.00018024994918454067, "loss": 0.0733, "step": 4367 }, { "epoch": 2.0387397899649944, "grad_norm": 0.83984375, "learning_rate": 0.00018024119334605456, "loss": 0.0764, "step": 4368 }, { "epoch": 2.0392065344224037, "grad_norm": 0.7734375, "learning_rate": 0.00018023243577988655, "loss": 0.0627, "step": 4369 }, { "epoch": 2.039673278879813, "grad_norm": 0.87109375, "learning_rate": 0.0001802236764862252, "loss": 0.0738, "step": 4370 }, { "epoch": 2.040140023337223, "grad_norm": 0.99609375, "learning_rate": 0.00018021491546525904, "loss": 0.0753, "step": 4371 }, { "epoch": 2.0406067677946322, "grad_norm": 0.69921875, "learning_rate": 0.00018020615271717677, "loss": 0.0584, "step": 4372 }, { "epoch": 2.041073512252042, "grad_norm": 0.83984375, "learning_rate": 0.00018019738824216704, "loss": 0.0709, "step": 4373 }, { "epoch": 2.0415402567094514, "grad_norm": 0.9296875, "learning_rate": 0.00018018862204041854, "loss": 0.0764, "step": 4374 }, { "epoch": 2.042007001166861, "grad_norm": 0.89453125, "learning_rate": 0.00018017985411212007, "loss": 0.071, "step": 4375 }, { "epoch": 2.0424737456242705, "grad_norm": 1.0625, "learning_rate": 0.00018017108445746036, "loss": 0.0855, "step": 4376 }, { "epoch": 2.0429404900816803, "grad_norm": 0.86328125, "learning_rate": 0.00018016231307662828, "loss": 0.0788, "step": 4377 }, { "epoch": 2.0434072345390897, "grad_norm": 0.91796875, "learning_rate": 0.00018015353996981263, "loss": 0.0695, "step": 4378 }, { "epoch": 2.0438739789964995, "grad_norm": 0.73828125, "learning_rate": 0.00018014476513720232, "loss": 0.0579, "step": 4379 }, { "epoch": 2.044340723453909, "grad_norm": 0.90625, "learning_rate": 0.00018013598857898633, "loss": 0.0821, "step": 4380 }, { "epoch": 2.0448074679113186, "grad_norm": 0.94921875, "learning_rate": 0.00018012721029535356, "loss": 0.0692, "step": 4381 }, { "epoch": 2.045274212368728, "grad_norm": 1.0703125, "learning_rate": 0.00018011843028649307, "loss": 0.095, "step": 4382 }, { "epoch": 2.0457409568261378, "grad_norm": 0.70703125, "learning_rate": 0.0001801096485525939, "loss": 0.0649, "step": 4383 }, { "epoch": 2.046207701283547, "grad_norm": 1.078125, "learning_rate": 0.00018010086509384507, "loss": 0.1107, "step": 4384 }, { "epoch": 2.046674445740957, "grad_norm": 1.203125, "learning_rate": 0.00018009207991043577, "loss": 0.0925, "step": 4385 }, { "epoch": 2.0471411901983663, "grad_norm": 0.921875, "learning_rate": 0.0001800832930025551, "loss": 0.0677, "step": 4386 }, { "epoch": 2.047607934655776, "grad_norm": 1.0625, "learning_rate": 0.00018007450437039233, "loss": 0.0864, "step": 4387 }, { "epoch": 2.0480746791131854, "grad_norm": 1.0390625, "learning_rate": 0.00018006571401413658, "loss": 0.0746, "step": 4388 }, { "epoch": 2.048541423570595, "grad_norm": 0.95703125, "learning_rate": 0.0001800569219339772, "loss": 0.0929, "step": 4389 }, { "epoch": 2.0490081680280046, "grad_norm": 0.8828125, "learning_rate": 0.00018004812813010346, "loss": 0.0677, "step": 4390 }, { "epoch": 2.0494749124854144, "grad_norm": 0.625, "learning_rate": 0.00018003933260270474, "loss": 0.0574, "step": 4391 }, { "epoch": 2.0499416569428237, "grad_norm": 0.6328125, "learning_rate": 0.00018003053535197032, "loss": 0.0456, "step": 4392 }, { "epoch": 2.0504084014002335, "grad_norm": 0.921875, "learning_rate": 0.00018002173637808973, "loss": 0.0551, "step": 4393 }, { "epoch": 2.050875145857643, "grad_norm": 0.87109375, "learning_rate": 0.00018001293568125232, "loss": 0.0578, "step": 4394 }, { "epoch": 2.0513418903150527, "grad_norm": 0.95703125, "learning_rate": 0.0001800041332616477, "loss": 0.0738, "step": 4395 }, { "epoch": 2.051808634772462, "grad_norm": 0.890625, "learning_rate": 0.00017999532911946528, "loss": 0.076, "step": 4396 }, { "epoch": 2.052275379229872, "grad_norm": 0.87890625, "learning_rate": 0.00017998652325489463, "loss": 0.0621, "step": 4397 }, { "epoch": 2.052742123687281, "grad_norm": 1.234375, "learning_rate": 0.00017997771566812546, "loss": 0.1016, "step": 4398 }, { "epoch": 2.053208868144691, "grad_norm": 1.0625, "learning_rate": 0.00017996890635934732, "loss": 0.0671, "step": 4399 }, { "epoch": 2.0536756126021003, "grad_norm": 0.94921875, "learning_rate": 0.00017996009532874986, "loss": 0.0559, "step": 4400 }, { "epoch": 2.05414235705951, "grad_norm": 1.03125, "learning_rate": 0.00017995128257652286, "loss": 0.0744, "step": 4401 }, { "epoch": 2.0546091015169194, "grad_norm": 0.80859375, "learning_rate": 0.00017994246810285603, "loss": 0.078, "step": 4402 }, { "epoch": 2.0550758459743292, "grad_norm": 0.73828125, "learning_rate": 0.00017993365190793918, "loss": 0.0713, "step": 4403 }, { "epoch": 2.0555425904317386, "grad_norm": 0.6640625, "learning_rate": 0.00017992483399196209, "loss": 0.0557, "step": 4404 }, { "epoch": 2.0560093348891484, "grad_norm": 0.87890625, "learning_rate": 0.00017991601435511466, "loss": 0.0583, "step": 4405 }, { "epoch": 2.0564760793465577, "grad_norm": 0.94921875, "learning_rate": 0.00017990719299758678, "loss": 0.0711, "step": 4406 }, { "epoch": 2.0569428238039675, "grad_norm": 0.796875, "learning_rate": 0.00017989836991956837, "loss": 0.0694, "step": 4407 }, { "epoch": 2.057409568261377, "grad_norm": 1.0625, "learning_rate": 0.0001798895451212494, "loss": 0.0755, "step": 4408 }, { "epoch": 2.0578763127187867, "grad_norm": 0.84765625, "learning_rate": 0.0001798807186028199, "loss": 0.0723, "step": 4409 }, { "epoch": 2.058343057176196, "grad_norm": 0.83203125, "learning_rate": 0.00017987189036446988, "loss": 0.0637, "step": 4410 }, { "epoch": 2.0588098016336054, "grad_norm": 0.73828125, "learning_rate": 0.00017986306040638944, "loss": 0.0504, "step": 4411 }, { "epoch": 2.059276546091015, "grad_norm": 0.96875, "learning_rate": 0.00017985422872876872, "loss": 0.0584, "step": 4412 }, { "epoch": 2.0597432905484245, "grad_norm": 0.75390625, "learning_rate": 0.00017984539533179785, "loss": 0.0513, "step": 4413 }, { "epoch": 2.0602100350058343, "grad_norm": 0.84765625, "learning_rate": 0.00017983656021566705, "loss": 0.0639, "step": 4414 }, { "epoch": 2.0606767794632437, "grad_norm": 1.25, "learning_rate": 0.0001798277233805665, "loss": 0.0955, "step": 4415 }, { "epoch": 2.0611435239206535, "grad_norm": 0.91015625, "learning_rate": 0.00017981888482668647, "loss": 0.0705, "step": 4416 }, { "epoch": 2.061610268378063, "grad_norm": 0.81640625, "learning_rate": 0.00017981004455421733, "loss": 0.0599, "step": 4417 }, { "epoch": 2.0620770128354726, "grad_norm": 0.9140625, "learning_rate": 0.00017980120256334936, "loss": 0.0549, "step": 4418 }, { "epoch": 2.062543757292882, "grad_norm": 0.78515625, "learning_rate": 0.00017979235885427297, "loss": 0.0569, "step": 4419 }, { "epoch": 2.0630105017502918, "grad_norm": 0.86328125, "learning_rate": 0.00017978351342717855, "loss": 0.0763, "step": 4420 }, { "epoch": 2.063477246207701, "grad_norm": 0.9609375, "learning_rate": 0.00017977466628225658, "loss": 0.0591, "step": 4421 }, { "epoch": 2.063943990665111, "grad_norm": 0.921875, "learning_rate": 0.00017976581741969752, "loss": 0.0544, "step": 4422 }, { "epoch": 2.0644107351225203, "grad_norm": 0.9609375, "learning_rate": 0.0001797569668396919, "loss": 0.0568, "step": 4423 }, { "epoch": 2.06487747957993, "grad_norm": 1.03125, "learning_rate": 0.0001797481145424303, "loss": 0.0705, "step": 4424 }, { "epoch": 2.0653442240373394, "grad_norm": 0.7421875, "learning_rate": 0.00017973926052810332, "loss": 0.0501, "step": 4425 }, { "epoch": 2.065810968494749, "grad_norm": 0.80859375, "learning_rate": 0.0001797304047969016, "loss": 0.0526, "step": 4426 }, { "epoch": 2.0662777129521586, "grad_norm": 0.96875, "learning_rate": 0.00017972154734901578, "loss": 0.0778, "step": 4427 }, { "epoch": 2.0667444574095684, "grad_norm": 0.859375, "learning_rate": 0.0001797126881846366, "loss": 0.0461, "step": 4428 }, { "epoch": 2.0672112018669777, "grad_norm": 1.03125, "learning_rate": 0.0001797038273039548, "loss": 0.0759, "step": 4429 }, { "epoch": 2.0676779463243875, "grad_norm": 0.99609375, "learning_rate": 0.00017969496470716118, "loss": 0.0768, "step": 4430 }, { "epoch": 2.068144690781797, "grad_norm": 0.96875, "learning_rate": 0.00017968610039444655, "loss": 0.0707, "step": 4431 }, { "epoch": 2.0686114352392067, "grad_norm": 0.8984375, "learning_rate": 0.00017967723436600175, "loss": 0.0611, "step": 4432 }, { "epoch": 2.069078179696616, "grad_norm": 0.84375, "learning_rate": 0.00017966836662201767, "loss": 0.0644, "step": 4433 }, { "epoch": 2.069544924154026, "grad_norm": 0.90625, "learning_rate": 0.0001796594971626853, "loss": 0.0685, "step": 4434 }, { "epoch": 2.070011668611435, "grad_norm": 0.77734375, "learning_rate": 0.00017965062598819555, "loss": 0.0478, "step": 4435 }, { "epoch": 2.070478413068845, "grad_norm": 0.7734375, "learning_rate": 0.00017964175309873945, "loss": 0.031, "step": 4436 }, { "epoch": 2.0709451575262543, "grad_norm": 0.80078125, "learning_rate": 0.00017963287849450806, "loss": 0.0548, "step": 4437 }, { "epoch": 2.071411901983664, "grad_norm": 1.1015625, "learning_rate": 0.00017962400217569243, "loss": 0.0704, "step": 4438 }, { "epoch": 2.0718786464410734, "grad_norm": 1.0859375, "learning_rate": 0.00017961512414248367, "loss": 0.0901, "step": 4439 }, { "epoch": 2.0723453908984832, "grad_norm": 1.046875, "learning_rate": 0.000179606244395073, "loss": 0.0903, "step": 4440 }, { "epoch": 2.0728121353558926, "grad_norm": 0.80859375, "learning_rate": 0.0001795973629336515, "loss": 0.05, "step": 4441 }, { "epoch": 2.0732788798133024, "grad_norm": 0.8515625, "learning_rate": 0.00017958847975841052, "loss": 0.0608, "step": 4442 }, { "epoch": 2.0737456242707117, "grad_norm": 0.93359375, "learning_rate": 0.00017957959486954124, "loss": 0.0677, "step": 4443 }, { "epoch": 2.0742123687281215, "grad_norm": 1.125, "learning_rate": 0.00017957070826723497, "loss": 0.0845, "step": 4444 }, { "epoch": 2.074679113185531, "grad_norm": 1.0234375, "learning_rate": 0.00017956181995168309, "loss": 0.0949, "step": 4445 }, { "epoch": 2.0751458576429407, "grad_norm": 0.91796875, "learning_rate": 0.00017955292992307694, "loss": 0.0797, "step": 4446 }, { "epoch": 2.07561260210035, "grad_norm": 1.0078125, "learning_rate": 0.00017954403818160795, "loss": 0.0853, "step": 4447 }, { "epoch": 2.07607934655776, "grad_norm": 0.828125, "learning_rate": 0.00017953514472746757, "loss": 0.0513, "step": 4448 }, { "epoch": 2.076546091015169, "grad_norm": 0.94921875, "learning_rate": 0.00017952624956084728, "loss": 0.0724, "step": 4449 }, { "epoch": 2.077012835472579, "grad_norm": 0.90234375, "learning_rate": 0.00017951735268193855, "loss": 0.0639, "step": 4450 }, { "epoch": 2.0774795799299883, "grad_norm": 0.94921875, "learning_rate": 0.00017950845409093303, "loss": 0.0689, "step": 4451 }, { "epoch": 2.0779463243873977, "grad_norm": 0.890625, "learning_rate": 0.0001794995537880223, "loss": 0.0832, "step": 4452 }, { "epoch": 2.0784130688448075, "grad_norm": 0.86328125, "learning_rate": 0.00017949065177339795, "loss": 0.0693, "step": 4453 }, { "epoch": 2.078879813302217, "grad_norm": 0.875, "learning_rate": 0.00017948174804725166, "loss": 0.085, "step": 4454 }, { "epoch": 2.0793465577596266, "grad_norm": 0.88671875, "learning_rate": 0.00017947284260977516, "loss": 0.068, "step": 4455 }, { "epoch": 2.079813302217036, "grad_norm": 0.7890625, "learning_rate": 0.0001794639354611602, "loss": 0.0594, "step": 4456 }, { "epoch": 2.0802800466744458, "grad_norm": 1.0, "learning_rate": 0.0001794550266015985, "loss": 0.0765, "step": 4457 }, { "epoch": 2.080746791131855, "grad_norm": 0.859375, "learning_rate": 0.00017944611603128196, "loss": 0.0755, "step": 4458 }, { "epoch": 2.081213535589265, "grad_norm": 0.94921875, "learning_rate": 0.00017943720375040236, "loss": 0.0592, "step": 4459 }, { "epoch": 2.0816802800466743, "grad_norm": 0.9765625, "learning_rate": 0.00017942828975915168, "loss": 0.0869, "step": 4460 }, { "epoch": 2.082147024504084, "grad_norm": 0.8203125, "learning_rate": 0.00017941937405772173, "loss": 0.0612, "step": 4461 }, { "epoch": 2.0826137689614934, "grad_norm": 0.8828125, "learning_rate": 0.00017941045664630458, "loss": 0.0683, "step": 4462 }, { "epoch": 2.083080513418903, "grad_norm": 0.82421875, "learning_rate": 0.00017940153752509218, "loss": 0.0675, "step": 4463 }, { "epoch": 2.0835472578763126, "grad_norm": 1.0625, "learning_rate": 0.00017939261669427657, "loss": 0.0892, "step": 4464 }, { "epoch": 2.0840140023337224, "grad_norm": 1.015625, "learning_rate": 0.00017938369415404983, "loss": 0.0719, "step": 4465 }, { "epoch": 2.0844807467911317, "grad_norm": 0.890625, "learning_rate": 0.0001793747699046041, "loss": 0.0644, "step": 4466 }, { "epoch": 2.0849474912485415, "grad_norm": 1.109375, "learning_rate": 0.0001793658439461315, "loss": 0.1054, "step": 4467 }, { "epoch": 2.085414235705951, "grad_norm": 0.9921875, "learning_rate": 0.00017935691627882423, "loss": 0.0778, "step": 4468 }, { "epoch": 2.0858809801633607, "grad_norm": 0.75, "learning_rate": 0.00017934798690287447, "loss": 0.0525, "step": 4469 }, { "epoch": 2.08634772462077, "grad_norm": 0.71484375, "learning_rate": 0.00017933905581847453, "loss": 0.0557, "step": 4470 }, { "epoch": 2.08681446907818, "grad_norm": 1.171875, "learning_rate": 0.0001793301230258167, "loss": 0.0679, "step": 4471 }, { "epoch": 2.087281213535589, "grad_norm": 1.15625, "learning_rate": 0.0001793211885250933, "loss": 0.0613, "step": 4472 }, { "epoch": 2.087747957992999, "grad_norm": 0.76953125, "learning_rate": 0.0001793122523164967, "loss": 0.057, "step": 4473 }, { "epoch": 2.0882147024504083, "grad_norm": 0.90234375, "learning_rate": 0.00017930331440021933, "loss": 0.0802, "step": 4474 }, { "epoch": 2.088681446907818, "grad_norm": 0.80859375, "learning_rate": 0.00017929437477645359, "loss": 0.0512, "step": 4475 }, { "epoch": 2.0891481913652274, "grad_norm": 1.296875, "learning_rate": 0.000179285433445392, "loss": 0.0852, "step": 4476 }, { "epoch": 2.0896149358226372, "grad_norm": 1.0703125, "learning_rate": 0.00017927649040722703, "loss": 0.071, "step": 4477 }, { "epoch": 2.0900816802800466, "grad_norm": 0.9453125, "learning_rate": 0.00017926754566215128, "loss": 0.0868, "step": 4478 }, { "epoch": 2.0905484247374564, "grad_norm": 0.92578125, "learning_rate": 0.00017925859921035731, "loss": 0.0749, "step": 4479 }, { "epoch": 2.0910151691948657, "grad_norm": 0.8828125, "learning_rate": 0.00017924965105203778, "loss": 0.1042, "step": 4480 }, { "epoch": 2.0914819136522755, "grad_norm": 0.84375, "learning_rate": 0.0001792407011873853, "loss": 0.0717, "step": 4481 }, { "epoch": 2.091948658109685, "grad_norm": 0.82421875, "learning_rate": 0.00017923174961659264, "loss": 0.0601, "step": 4482 }, { "epoch": 2.0924154025670947, "grad_norm": 1.1328125, "learning_rate": 0.0001792227963398525, "loss": 0.0658, "step": 4483 }, { "epoch": 2.092882147024504, "grad_norm": 0.8203125, "learning_rate": 0.00017921384135735762, "loss": 0.063, "step": 4484 }, { "epoch": 2.093348891481914, "grad_norm": 0.80859375, "learning_rate": 0.0001792048846693009, "loss": 0.0619, "step": 4485 }, { "epoch": 2.093815635939323, "grad_norm": 0.72265625, "learning_rate": 0.00017919592627587512, "loss": 0.0707, "step": 4486 }, { "epoch": 2.094282380396733, "grad_norm": 0.87890625, "learning_rate": 0.00017918696617727316, "loss": 0.0667, "step": 4487 }, { "epoch": 2.0947491248541423, "grad_norm": 0.77734375, "learning_rate": 0.00017917800437368793, "loss": 0.0597, "step": 4488 }, { "epoch": 2.095215869311552, "grad_norm": 0.875, "learning_rate": 0.00017916904086531248, "loss": 0.0694, "step": 4489 }, { "epoch": 2.0956826137689615, "grad_norm": 0.8046875, "learning_rate": 0.0001791600756523397, "loss": 0.0564, "step": 4490 }, { "epoch": 2.0961493582263713, "grad_norm": 0.97265625, "learning_rate": 0.00017915110873496266, "loss": 0.0927, "step": 4491 }, { "epoch": 2.0966161026837806, "grad_norm": 0.8671875, "learning_rate": 0.00017914214011337443, "loss": 0.078, "step": 4492 }, { "epoch": 2.0970828471411904, "grad_norm": 1.0, "learning_rate": 0.00017913316978776815, "loss": 0.0675, "step": 4493 }, { "epoch": 2.0975495915985998, "grad_norm": 1.0859375, "learning_rate": 0.00017912419775833688, "loss": 0.091, "step": 4494 }, { "epoch": 2.098016336056009, "grad_norm": 0.71875, "learning_rate": 0.00017911522402527387, "loss": 0.056, "step": 4495 }, { "epoch": 2.098483080513419, "grad_norm": 0.94140625, "learning_rate": 0.00017910624858877232, "loss": 0.0753, "step": 4496 }, { "epoch": 2.0989498249708283, "grad_norm": 0.9765625, "learning_rate": 0.00017909727144902544, "loss": 0.0578, "step": 4497 }, { "epoch": 2.099416569428238, "grad_norm": 0.86328125, "learning_rate": 0.00017908829260622654, "loss": 0.0778, "step": 4498 }, { "epoch": 2.0998833138856474, "grad_norm": 0.98828125, "learning_rate": 0.000179079312060569, "loss": 0.0818, "step": 4499 }, { "epoch": 2.100350058343057, "grad_norm": 1.1796875, "learning_rate": 0.00017907032981224607, "loss": 0.059, "step": 4500 }, { "epoch": 2.1008168028004666, "grad_norm": 0.921875, "learning_rate": 0.0001790613458614512, "loss": 0.0717, "step": 4501 }, { "epoch": 2.1012835472578764, "grad_norm": 1.2109375, "learning_rate": 0.00017905236020837787, "loss": 0.0831, "step": 4502 }, { "epoch": 2.1017502917152857, "grad_norm": 0.73828125, "learning_rate": 0.00017904337285321953, "loss": 0.0487, "step": 4503 }, { "epoch": 2.1022170361726955, "grad_norm": 0.83203125, "learning_rate": 0.00017903438379616966, "loss": 0.0707, "step": 4504 }, { "epoch": 2.102683780630105, "grad_norm": 0.8984375, "learning_rate": 0.0001790253930374218, "loss": 0.0525, "step": 4505 }, { "epoch": 2.1031505250875147, "grad_norm": 0.8671875, "learning_rate": 0.00017901640057716955, "loss": 0.0576, "step": 4506 }, { "epoch": 2.103617269544924, "grad_norm": 1.0546875, "learning_rate": 0.00017900740641560653, "loss": 0.0779, "step": 4507 }, { "epoch": 2.104084014002334, "grad_norm": 1.0, "learning_rate": 0.0001789984105529264, "loss": 0.091, "step": 4508 }, { "epoch": 2.104550758459743, "grad_norm": 0.91015625, "learning_rate": 0.00017898941298932284, "loss": 0.0523, "step": 4509 }, { "epoch": 2.105017502917153, "grad_norm": 0.90234375, "learning_rate": 0.00017898041372498957, "loss": 0.065, "step": 4510 }, { "epoch": 2.1054842473745623, "grad_norm": 0.8125, "learning_rate": 0.00017897141276012034, "loss": 0.0629, "step": 4511 }, { "epoch": 2.105950991831972, "grad_norm": 0.9140625, "learning_rate": 0.00017896241009490903, "loss": 0.0722, "step": 4512 }, { "epoch": 2.1064177362893814, "grad_norm": 1.015625, "learning_rate": 0.00017895340572954938, "loss": 0.0972, "step": 4513 }, { "epoch": 2.1068844807467912, "grad_norm": 0.87109375, "learning_rate": 0.0001789443996642353, "loss": 0.0612, "step": 4514 }, { "epoch": 2.1073512252042006, "grad_norm": 0.6796875, "learning_rate": 0.0001789353918991607, "loss": 0.0498, "step": 4515 }, { "epoch": 2.1078179696616104, "grad_norm": 1.0390625, "learning_rate": 0.0001789263824345196, "loss": 0.0894, "step": 4516 }, { "epoch": 2.1082847141190197, "grad_norm": 0.8828125, "learning_rate": 0.00017891737127050586, "loss": 0.0646, "step": 4517 }, { "epoch": 2.1087514585764295, "grad_norm": 0.8046875, "learning_rate": 0.00017890835840731354, "loss": 0.0541, "step": 4518 }, { "epoch": 2.109218203033839, "grad_norm": 0.94140625, "learning_rate": 0.00017889934384513674, "loss": 0.0691, "step": 4519 }, { "epoch": 2.1096849474912487, "grad_norm": 0.75, "learning_rate": 0.0001788903275841695, "loss": 0.057, "step": 4520 }, { "epoch": 2.110151691948658, "grad_norm": 0.91015625, "learning_rate": 0.00017888130962460602, "loss": 0.073, "step": 4521 }, { "epoch": 2.110618436406068, "grad_norm": 0.8671875, "learning_rate": 0.00017887228996664038, "loss": 0.0663, "step": 4522 }, { "epoch": 2.111085180863477, "grad_norm": 0.9453125, "learning_rate": 0.00017886326861046685, "loss": 0.0855, "step": 4523 }, { "epoch": 2.111551925320887, "grad_norm": 0.96875, "learning_rate": 0.00017885424555627965, "loss": 0.0676, "step": 4524 }, { "epoch": 2.1120186697782963, "grad_norm": 0.8515625, "learning_rate": 0.00017884522080427304, "loss": 0.0622, "step": 4525 }, { "epoch": 2.112485414235706, "grad_norm": 0.85546875, "learning_rate": 0.00017883619435464136, "loss": 0.0557, "step": 4526 }, { "epoch": 2.1129521586931155, "grad_norm": 1.203125, "learning_rate": 0.00017882716620757892, "loss": 0.0922, "step": 4527 }, { "epoch": 2.1134189031505253, "grad_norm": 0.9375, "learning_rate": 0.00017881813636328015, "loss": 0.093, "step": 4528 }, { "epoch": 2.1138856476079346, "grad_norm": 0.89453125, "learning_rate": 0.00017880910482193945, "loss": 0.0529, "step": 4529 }, { "epoch": 2.1143523920653444, "grad_norm": 0.953125, "learning_rate": 0.0001788000715837513, "loss": 0.0731, "step": 4530 }, { "epoch": 2.1148191365227538, "grad_norm": 0.796875, "learning_rate": 0.00017879103664891015, "loss": 0.0627, "step": 4531 }, { "epoch": 2.1152858809801636, "grad_norm": 1.03125, "learning_rate": 0.00017878200001761058, "loss": 0.0703, "step": 4532 }, { "epoch": 2.115752625437573, "grad_norm": 0.92578125, "learning_rate": 0.00017877296169004712, "loss": 0.0686, "step": 4533 }, { "epoch": 2.1162193698949823, "grad_norm": 0.95703125, "learning_rate": 0.00017876392166641444, "loss": 0.0878, "step": 4534 }, { "epoch": 2.116686114352392, "grad_norm": 1.1953125, "learning_rate": 0.0001787548799469071, "loss": 0.0672, "step": 4535 }, { "epoch": 2.1171528588098014, "grad_norm": 0.98828125, "learning_rate": 0.00017874583653171983, "loss": 0.0677, "step": 4536 }, { "epoch": 2.117619603267211, "grad_norm": 0.7421875, "learning_rate": 0.0001787367914210473, "loss": 0.046, "step": 4537 }, { "epoch": 2.1180863477246206, "grad_norm": 1.0078125, "learning_rate": 0.0001787277446150843, "loss": 0.0609, "step": 4538 }, { "epoch": 2.1185530921820304, "grad_norm": 1.0, "learning_rate": 0.00017871869611402563, "loss": 0.0849, "step": 4539 }, { "epoch": 2.1190198366394397, "grad_norm": 0.921875, "learning_rate": 0.0001787096459180661, "loss": 0.0755, "step": 4540 }, { "epoch": 2.1194865810968495, "grad_norm": 0.7890625, "learning_rate": 0.00017870059402740055, "loss": 0.0688, "step": 4541 }, { "epoch": 2.119953325554259, "grad_norm": 1.046875, "learning_rate": 0.00017869154044222385, "loss": 0.0831, "step": 4542 }, { "epoch": 2.1204200700116687, "grad_norm": 0.8046875, "learning_rate": 0.00017868248516273103, "loss": 0.0521, "step": 4543 }, { "epoch": 2.120886814469078, "grad_norm": 0.890625, "learning_rate": 0.00017867342818911696, "loss": 0.0742, "step": 4544 }, { "epoch": 2.121353558926488, "grad_norm": 0.76171875, "learning_rate": 0.00017866436952157676, "loss": 0.061, "step": 4545 }, { "epoch": 2.121820303383897, "grad_norm": 1.171875, "learning_rate": 0.00017865530916030535, "loss": 0.1028, "step": 4546 }, { "epoch": 2.122287047841307, "grad_norm": 0.94140625, "learning_rate": 0.00017864624710549787, "loss": 0.0861, "step": 4547 }, { "epoch": 2.1227537922987163, "grad_norm": 0.765625, "learning_rate": 0.00017863718335734942, "loss": 0.0612, "step": 4548 }, { "epoch": 2.123220536756126, "grad_norm": 1.0703125, "learning_rate": 0.00017862811791605521, "loss": 0.0827, "step": 4549 }, { "epoch": 2.1236872812135354, "grad_norm": 0.9921875, "learning_rate": 0.00017861905078181032, "loss": 0.0656, "step": 4550 }, { "epoch": 2.1241540256709452, "grad_norm": 1.015625, "learning_rate": 0.00017860998195481006, "loss": 0.0789, "step": 4551 }, { "epoch": 2.1246207701283546, "grad_norm": 0.85546875, "learning_rate": 0.00017860091143524968, "loss": 0.06, "step": 4552 }, { "epoch": 2.1250875145857644, "grad_norm": 1.015625, "learning_rate": 0.00017859183922332449, "loss": 0.0789, "step": 4553 }, { "epoch": 2.1255542590431737, "grad_norm": 0.765625, "learning_rate": 0.00017858276531922978, "loss": 0.0633, "step": 4554 }, { "epoch": 2.1260210035005835, "grad_norm": 0.95703125, "learning_rate": 0.0001785736897231609, "loss": 0.0846, "step": 4555 }, { "epoch": 2.126487747957993, "grad_norm": 0.76171875, "learning_rate": 0.00017856461243531338, "loss": 0.0627, "step": 4556 }, { "epoch": 2.1269544924154027, "grad_norm": 0.921875, "learning_rate": 0.00017855553345588255, "loss": 0.0899, "step": 4557 }, { "epoch": 2.127421236872812, "grad_norm": 0.984375, "learning_rate": 0.0001785464527850639, "loss": 0.0807, "step": 4558 }, { "epoch": 2.127887981330222, "grad_norm": 0.71484375, "learning_rate": 0.000178537370423053, "loss": 0.0471, "step": 4559 }, { "epoch": 2.128354725787631, "grad_norm": 1.03125, "learning_rate": 0.0001785282863700454, "loss": 0.076, "step": 4560 }, { "epoch": 2.128821470245041, "grad_norm": 0.8359375, "learning_rate": 0.00017851920062623663, "loss": 0.0566, "step": 4561 }, { "epoch": 2.1292882147024503, "grad_norm": 1.4453125, "learning_rate": 0.00017851011319182237, "loss": 0.0747, "step": 4562 }, { "epoch": 2.12975495915986, "grad_norm": 0.73828125, "learning_rate": 0.00017850102406699822, "loss": 0.0638, "step": 4563 }, { "epoch": 2.1302217036172695, "grad_norm": 0.859375, "learning_rate": 0.00017849193325195997, "loss": 0.0554, "step": 4564 }, { "epoch": 2.1306884480746793, "grad_norm": 0.99609375, "learning_rate": 0.0001784828407469033, "loss": 0.0674, "step": 4565 }, { "epoch": 2.1311551925320886, "grad_norm": 1.0078125, "learning_rate": 0.000178473746552024, "loss": 0.063, "step": 4566 }, { "epoch": 2.1316219369894984, "grad_norm": 1.1015625, "learning_rate": 0.00017846465066751784, "loss": 0.083, "step": 4567 }, { "epoch": 2.1320886814469078, "grad_norm": 1.046875, "learning_rate": 0.0001784555530935807, "loss": 0.0883, "step": 4568 }, { "epoch": 2.1325554259043176, "grad_norm": 0.85546875, "learning_rate": 0.00017844645383040847, "loss": 0.0666, "step": 4569 }, { "epoch": 2.133022170361727, "grad_norm": 0.82421875, "learning_rate": 0.00017843735287819703, "loss": 0.0615, "step": 4570 }, { "epoch": 2.1334889148191367, "grad_norm": 1.0546875, "learning_rate": 0.00017842825023714237, "loss": 0.0876, "step": 4571 }, { "epoch": 2.133955659276546, "grad_norm": 1.0078125, "learning_rate": 0.00017841914590744045, "loss": 0.0783, "step": 4572 }, { "epoch": 2.134422403733956, "grad_norm": 0.65625, "learning_rate": 0.00017841003988928734, "loss": 0.0512, "step": 4573 }, { "epoch": 2.134889148191365, "grad_norm": 0.890625, "learning_rate": 0.00017840093218287906, "loss": 0.0682, "step": 4574 }, { "epoch": 2.135355892648775, "grad_norm": 0.8515625, "learning_rate": 0.00017839182278841172, "loss": 0.0586, "step": 4575 }, { "epoch": 2.1358226371061844, "grad_norm": 0.796875, "learning_rate": 0.00017838271170608145, "loss": 0.0543, "step": 4576 }, { "epoch": 2.1362893815635937, "grad_norm": 0.79296875, "learning_rate": 0.00017837359893608444, "loss": 0.0688, "step": 4577 }, { "epoch": 2.1367561260210035, "grad_norm": 0.875, "learning_rate": 0.0001783644844786169, "loss": 0.0599, "step": 4578 }, { "epoch": 2.137222870478413, "grad_norm": 0.890625, "learning_rate": 0.00017835536833387506, "loss": 0.0692, "step": 4579 }, { "epoch": 2.1376896149358227, "grad_norm": 1.046875, "learning_rate": 0.00017834625050205518, "loss": 0.0741, "step": 4580 }, { "epoch": 2.138156359393232, "grad_norm": 1.265625, "learning_rate": 0.0001783371309833536, "loss": 0.0774, "step": 4581 }, { "epoch": 2.138623103850642, "grad_norm": 0.984375, "learning_rate": 0.00017832800977796672, "loss": 0.0689, "step": 4582 }, { "epoch": 2.139089848308051, "grad_norm": 0.859375, "learning_rate": 0.0001783188868860908, "loss": 0.067, "step": 4583 }, { "epoch": 2.139556592765461, "grad_norm": 0.80078125, "learning_rate": 0.0001783097623079224, "loss": 0.0534, "step": 4584 }, { "epoch": 2.1400233372228703, "grad_norm": 0.8984375, "learning_rate": 0.00017830063604365795, "loss": 0.0845, "step": 4585 }, { "epoch": 2.14049008168028, "grad_norm": 0.6953125, "learning_rate": 0.00017829150809349392, "loss": 0.0485, "step": 4586 }, { "epoch": 2.1409568261376895, "grad_norm": 0.87890625, "learning_rate": 0.00017828237845762682, "loss": 0.0609, "step": 4587 }, { "epoch": 2.1414235705950992, "grad_norm": 0.9609375, "learning_rate": 0.00017827324713625328, "loss": 0.0728, "step": 4588 }, { "epoch": 2.1418903150525086, "grad_norm": 0.84765625, "learning_rate": 0.0001782641141295699, "loss": 0.0679, "step": 4589 }, { "epoch": 2.1423570595099184, "grad_norm": 0.68359375, "learning_rate": 0.0001782549794377733, "loss": 0.0542, "step": 4590 }, { "epoch": 2.1428238039673277, "grad_norm": 1.125, "learning_rate": 0.00017824584306106014, "loss": 0.0984, "step": 4591 }, { "epoch": 2.1432905484247375, "grad_norm": 0.8671875, "learning_rate": 0.0001782367049996272, "loss": 0.08, "step": 4592 }, { "epoch": 2.143757292882147, "grad_norm": 0.9140625, "learning_rate": 0.00017822756525367117, "loss": 0.0871, "step": 4593 }, { "epoch": 2.1442240373395567, "grad_norm": 0.8828125, "learning_rate": 0.00017821842382338888, "loss": 0.0731, "step": 4594 }, { "epoch": 2.144690781796966, "grad_norm": 0.91015625, "learning_rate": 0.00017820928070897711, "loss": 0.0692, "step": 4595 }, { "epoch": 2.145157526254376, "grad_norm": 0.79296875, "learning_rate": 0.00017820013591063279, "loss": 0.0516, "step": 4596 }, { "epoch": 2.145624270711785, "grad_norm": 0.99609375, "learning_rate": 0.00017819098942855276, "loss": 0.0621, "step": 4597 }, { "epoch": 2.146091015169195, "grad_norm": 0.890625, "learning_rate": 0.00017818184126293396, "loss": 0.0631, "step": 4598 }, { "epoch": 2.1465577596266043, "grad_norm": 0.9375, "learning_rate": 0.00017817269141397343, "loss": 0.0625, "step": 4599 }, { "epoch": 2.147024504084014, "grad_norm": 0.71875, "learning_rate": 0.00017816353988186803, "loss": 0.0544, "step": 4600 }, { "epoch": 2.1474912485414235, "grad_norm": 0.89453125, "learning_rate": 0.00017815438666681497, "loss": 0.0782, "step": 4601 }, { "epoch": 2.1479579929988333, "grad_norm": 0.84375, "learning_rate": 0.00017814523176901123, "loss": 0.071, "step": 4602 }, { "epoch": 2.1484247374562426, "grad_norm": 0.9609375, "learning_rate": 0.0001781360751886539, "loss": 0.0744, "step": 4603 }, { "epoch": 2.1488914819136524, "grad_norm": 0.83203125, "learning_rate": 0.00017812691692594025, "loss": 0.0638, "step": 4604 }, { "epoch": 2.1493582263710618, "grad_norm": 1.0, "learning_rate": 0.00017811775698106736, "loss": 0.07, "step": 4605 }, { "epoch": 2.1498249708284716, "grad_norm": 0.9140625, "learning_rate": 0.00017810859535423248, "loss": 0.0821, "step": 4606 }, { "epoch": 2.150291715285881, "grad_norm": 0.8203125, "learning_rate": 0.0001780994320456329, "loss": 0.0792, "step": 4607 }, { "epoch": 2.1507584597432907, "grad_norm": 0.6953125, "learning_rate": 0.00017809026705546587, "loss": 0.0543, "step": 4608 }, { "epoch": 2.1512252042007, "grad_norm": 0.859375, "learning_rate": 0.00017808110038392878, "loss": 0.0518, "step": 4609 }, { "epoch": 2.15169194865811, "grad_norm": 1.0625, "learning_rate": 0.00017807193203121896, "loss": 0.0611, "step": 4610 }, { "epoch": 2.152158693115519, "grad_norm": 0.875, "learning_rate": 0.0001780627619975338, "loss": 0.0546, "step": 4611 }, { "epoch": 2.152625437572929, "grad_norm": 0.7109375, "learning_rate": 0.00017805359028307075, "loss": 0.0616, "step": 4612 }, { "epoch": 2.1530921820303384, "grad_norm": 0.8359375, "learning_rate": 0.00017804441688802736, "loss": 0.0617, "step": 4613 }, { "epoch": 2.153558926487748, "grad_norm": 0.78125, "learning_rate": 0.00017803524181260103, "loss": 0.0556, "step": 4614 }, { "epoch": 2.1540256709451575, "grad_norm": 0.92578125, "learning_rate": 0.00017802606505698938, "loss": 0.0586, "step": 4615 }, { "epoch": 2.154492415402567, "grad_norm": 1.109375, "learning_rate": 0.00017801688662138996, "loss": 0.0714, "step": 4616 }, { "epoch": 2.1549591598599767, "grad_norm": 0.87109375, "learning_rate": 0.00017800770650600045, "loss": 0.0543, "step": 4617 }, { "epoch": 2.1554259043173865, "grad_norm": 1.140625, "learning_rate": 0.00017799852471101844, "loss": 0.0905, "step": 4618 }, { "epoch": 2.155892648774796, "grad_norm": 0.921875, "learning_rate": 0.00017798934123664165, "loss": 0.0719, "step": 4619 }, { "epoch": 2.156359393232205, "grad_norm": 1.0703125, "learning_rate": 0.00017798015608306783, "loss": 0.0869, "step": 4620 }, { "epoch": 2.156826137689615, "grad_norm": 1.0390625, "learning_rate": 0.0001779709692504947, "loss": 0.0647, "step": 4621 }, { "epoch": 2.1572928821470243, "grad_norm": 0.76953125, "learning_rate": 0.0001779617807391201, "loss": 0.0561, "step": 4622 }, { "epoch": 2.157759626604434, "grad_norm": 0.9609375, "learning_rate": 0.0001779525905491419, "loss": 0.0704, "step": 4623 }, { "epoch": 2.1582263710618435, "grad_norm": 0.9375, "learning_rate": 0.00017794339868075788, "loss": 0.0782, "step": 4624 }, { "epoch": 2.1586931155192532, "grad_norm": 0.6171875, "learning_rate": 0.00017793420513416604, "loss": 0.0498, "step": 4625 }, { "epoch": 2.1591598599766626, "grad_norm": 0.7734375, "learning_rate": 0.00017792500990956427, "loss": 0.0554, "step": 4626 }, { "epoch": 2.1596266044340724, "grad_norm": 0.87890625, "learning_rate": 0.0001779158130071506, "loss": 0.0732, "step": 4627 }, { "epoch": 2.1600933488914817, "grad_norm": 1.171875, "learning_rate": 0.00017790661442712303, "loss": 0.0903, "step": 4628 }, { "epoch": 2.1605600933488915, "grad_norm": 0.87890625, "learning_rate": 0.0001778974141696796, "loss": 0.0543, "step": 4629 }, { "epoch": 2.161026837806301, "grad_norm": 1.09375, "learning_rate": 0.00017788821223501842, "loss": 0.0865, "step": 4630 }, { "epoch": 2.1614935822637107, "grad_norm": 0.8828125, "learning_rate": 0.0001778790086233376, "loss": 0.0642, "step": 4631 }, { "epoch": 2.16196032672112, "grad_norm": 0.9375, "learning_rate": 0.00017786980333483533, "loss": 0.0625, "step": 4632 }, { "epoch": 2.16242707117853, "grad_norm": 0.8828125, "learning_rate": 0.0001778605963697098, "loss": 0.079, "step": 4633 }, { "epoch": 2.162893815635939, "grad_norm": 0.91015625, "learning_rate": 0.0001778513877281592, "loss": 0.0616, "step": 4634 }, { "epoch": 2.163360560093349, "grad_norm": 0.98046875, "learning_rate": 0.0001778421774103819, "loss": 0.0646, "step": 4635 }, { "epoch": 2.1638273045507583, "grad_norm": 0.92578125, "learning_rate": 0.00017783296541657614, "loss": 0.063, "step": 4636 }, { "epoch": 2.164294049008168, "grad_norm": 1.1875, "learning_rate": 0.00017782375174694027, "loss": 0.0823, "step": 4637 }, { "epoch": 2.1647607934655775, "grad_norm": 0.76953125, "learning_rate": 0.00017781453640167266, "loss": 0.0579, "step": 4638 }, { "epoch": 2.1652275379229873, "grad_norm": 0.94140625, "learning_rate": 0.00017780531938097177, "loss": 0.0613, "step": 4639 }, { "epoch": 2.1656942823803966, "grad_norm": 0.97265625, "learning_rate": 0.00017779610068503602, "loss": 0.0591, "step": 4640 }, { "epoch": 2.1661610268378064, "grad_norm": 0.984375, "learning_rate": 0.0001777868803140639, "loss": 0.0794, "step": 4641 }, { "epoch": 2.1666277712952158, "grad_norm": 1.453125, "learning_rate": 0.00017777765826825395, "loss": 0.0986, "step": 4642 }, { "epoch": 2.1670945157526256, "grad_norm": 1.078125, "learning_rate": 0.0001777684345478047, "loss": 0.0694, "step": 4643 }, { "epoch": 2.167561260210035, "grad_norm": 0.8828125, "learning_rate": 0.00017775920915291479, "loss": 0.0819, "step": 4644 }, { "epoch": 2.1680280046674447, "grad_norm": 1.125, "learning_rate": 0.0001777499820837828, "loss": 0.0942, "step": 4645 }, { "epoch": 2.168494749124854, "grad_norm": 1.0078125, "learning_rate": 0.00017774075334060747, "loss": 0.0686, "step": 4646 }, { "epoch": 2.168961493582264, "grad_norm": 0.828125, "learning_rate": 0.00017773152292358744, "loss": 0.0739, "step": 4647 }, { "epoch": 2.169428238039673, "grad_norm": 1.0234375, "learning_rate": 0.00017772229083292152, "loss": 0.0874, "step": 4648 }, { "epoch": 2.169894982497083, "grad_norm": 0.95703125, "learning_rate": 0.00017771305706880842, "loss": 0.0595, "step": 4649 }, { "epoch": 2.1703617269544924, "grad_norm": 0.703125, "learning_rate": 0.00017770382163144696, "loss": 0.0619, "step": 4650 }, { "epoch": 2.170828471411902, "grad_norm": 0.875, "learning_rate": 0.00017769458452103603, "loss": 0.0678, "step": 4651 }, { "epoch": 2.1712952158693115, "grad_norm": 0.70703125, "learning_rate": 0.00017768534573777447, "loss": 0.0697, "step": 4652 }, { "epoch": 2.1717619603267213, "grad_norm": 0.9375, "learning_rate": 0.00017767610528186123, "loss": 0.087, "step": 4653 }, { "epoch": 2.1722287047841307, "grad_norm": 1.015625, "learning_rate": 0.00017766686315349526, "loss": 0.0703, "step": 4654 }, { "epoch": 2.1726954492415405, "grad_norm": 0.86328125, "learning_rate": 0.00017765761935287558, "loss": 0.0591, "step": 4655 }, { "epoch": 2.17316219369895, "grad_norm": 0.90625, "learning_rate": 0.00017764837388020114, "loss": 0.0816, "step": 4656 }, { "epoch": 2.1736289381563596, "grad_norm": 0.859375, "learning_rate": 0.00017763912673567108, "loss": 0.062, "step": 4657 }, { "epoch": 2.174095682613769, "grad_norm": 0.83203125, "learning_rate": 0.00017762987791948448, "loss": 0.0592, "step": 4658 }, { "epoch": 2.1745624270711783, "grad_norm": 0.90625, "learning_rate": 0.0001776206274318405, "loss": 0.0635, "step": 4659 }, { "epoch": 2.175029171528588, "grad_norm": 0.890625, "learning_rate": 0.00017761137527293826, "loss": 0.0692, "step": 4660 }, { "epoch": 2.175495915985998, "grad_norm": 0.8671875, "learning_rate": 0.000177602121442977, "loss": 0.0748, "step": 4661 }, { "epoch": 2.1759626604434072, "grad_norm": 0.9765625, "learning_rate": 0.000177592865942156, "loss": 0.078, "step": 4662 }, { "epoch": 2.1764294049008166, "grad_norm": 0.99609375, "learning_rate": 0.00017758360877067445, "loss": 0.0777, "step": 4663 }, { "epoch": 2.1768961493582264, "grad_norm": 0.85546875, "learning_rate": 0.00017757434992873177, "loss": 0.079, "step": 4664 }, { "epoch": 2.1773628938156357, "grad_norm": 0.8671875, "learning_rate": 0.00017756508941652727, "loss": 0.0707, "step": 4665 }, { "epoch": 2.1778296382730455, "grad_norm": 0.9375, "learning_rate": 0.0001775558272342603, "loss": 0.0787, "step": 4666 }, { "epoch": 2.178296382730455, "grad_norm": 1.0, "learning_rate": 0.00017754656338213033, "loss": 0.081, "step": 4667 }, { "epoch": 2.1787631271878647, "grad_norm": 0.85546875, "learning_rate": 0.0001775372978603368, "loss": 0.0725, "step": 4668 }, { "epoch": 2.179229871645274, "grad_norm": 0.8984375, "learning_rate": 0.00017752803066907924, "loss": 0.0772, "step": 4669 }, { "epoch": 2.179696616102684, "grad_norm": 1.0625, "learning_rate": 0.00017751876180855715, "loss": 0.0715, "step": 4670 }, { "epoch": 2.180163360560093, "grad_norm": 0.828125, "learning_rate": 0.00017750949127897015, "loss": 0.0564, "step": 4671 }, { "epoch": 2.180630105017503, "grad_norm": 0.78515625, "learning_rate": 0.00017750021908051776, "loss": 0.0508, "step": 4672 }, { "epoch": 2.1810968494749123, "grad_norm": 0.86328125, "learning_rate": 0.00017749094521339967, "loss": 0.0586, "step": 4673 }, { "epoch": 2.181563593932322, "grad_norm": 1.0, "learning_rate": 0.00017748166967781554, "loss": 0.0726, "step": 4674 }, { "epoch": 2.1820303383897315, "grad_norm": 1.0859375, "learning_rate": 0.0001774723924739651, "loss": 0.0767, "step": 4675 }, { "epoch": 2.1824970828471413, "grad_norm": 0.9453125, "learning_rate": 0.00017746311360204812, "loss": 0.0755, "step": 4676 }, { "epoch": 2.1829638273045506, "grad_norm": 0.91796875, "learning_rate": 0.00017745383306226432, "loss": 0.0698, "step": 4677 }, { "epoch": 2.1834305717619604, "grad_norm": 0.94921875, "learning_rate": 0.0001774445508548136, "loss": 0.0685, "step": 4678 }, { "epoch": 2.1838973162193698, "grad_norm": 0.73828125, "learning_rate": 0.00017743526697989575, "loss": 0.0559, "step": 4679 }, { "epoch": 2.1843640606767796, "grad_norm": 0.92578125, "learning_rate": 0.00017742598143771069, "loss": 0.0881, "step": 4680 }, { "epoch": 2.184830805134189, "grad_norm": 0.9375, "learning_rate": 0.00017741669422845834, "loss": 0.0559, "step": 4681 }, { "epoch": 2.1852975495915987, "grad_norm": 0.8125, "learning_rate": 0.00017740740535233867, "loss": 0.0571, "step": 4682 }, { "epoch": 2.185764294049008, "grad_norm": 1.0703125, "learning_rate": 0.00017739811480955167, "loss": 0.0719, "step": 4683 }, { "epoch": 2.186231038506418, "grad_norm": 1.0546875, "learning_rate": 0.0001773888226002974, "loss": 0.0798, "step": 4684 }, { "epoch": 2.186697782963827, "grad_norm": 0.65625, "learning_rate": 0.0001773795287247759, "loss": 0.0565, "step": 4685 }, { "epoch": 2.187164527421237, "grad_norm": 0.953125, "learning_rate": 0.0001773702331831873, "loss": 0.0727, "step": 4686 }, { "epoch": 2.1876312718786464, "grad_norm": 0.87109375, "learning_rate": 0.00017736093597573173, "loss": 0.0766, "step": 4687 }, { "epoch": 2.188098016336056, "grad_norm": 1.078125, "learning_rate": 0.00017735163710260937, "loss": 0.09, "step": 4688 }, { "epoch": 2.1885647607934655, "grad_norm": 0.96484375, "learning_rate": 0.00017734233656402045, "loss": 0.0596, "step": 4689 }, { "epoch": 2.1890315052508753, "grad_norm": 0.890625, "learning_rate": 0.00017733303436016522, "loss": 0.0582, "step": 4690 }, { "epoch": 2.1894982497082847, "grad_norm": 0.70703125, "learning_rate": 0.00017732373049124392, "loss": 0.0592, "step": 4691 }, { "epoch": 2.1899649941656945, "grad_norm": 0.68359375, "learning_rate": 0.00017731442495745695, "loss": 0.061, "step": 4692 }, { "epoch": 2.190431738623104, "grad_norm": 0.88671875, "learning_rate": 0.0001773051177590046, "loss": 0.0582, "step": 4693 }, { "epoch": 2.1908984830805136, "grad_norm": 1.21875, "learning_rate": 0.00017729580889608735, "loss": 0.079, "step": 4694 }, { "epoch": 2.191365227537923, "grad_norm": 1.15625, "learning_rate": 0.00017728649836890552, "loss": 0.0805, "step": 4695 }, { "epoch": 2.1918319719953328, "grad_norm": 0.99609375, "learning_rate": 0.00017727718617765965, "loss": 0.0726, "step": 4696 }, { "epoch": 2.192298716452742, "grad_norm": 0.82421875, "learning_rate": 0.0001772678723225502, "loss": 0.0529, "step": 4697 }, { "epoch": 2.192765460910152, "grad_norm": 0.921875, "learning_rate": 0.00017725855680377774, "loss": 0.0599, "step": 4698 }, { "epoch": 2.1932322053675612, "grad_norm": 1.0078125, "learning_rate": 0.00017724923962154283, "loss": 0.074, "step": 4699 }, { "epoch": 2.193698949824971, "grad_norm": 0.8046875, "learning_rate": 0.0001772399207760461, "loss": 0.0543, "step": 4700 }, { "epoch": 2.1941656942823804, "grad_norm": 0.91015625, "learning_rate": 0.00017723060026748818, "loss": 0.0805, "step": 4701 }, { "epoch": 2.1946324387397897, "grad_norm": 0.8359375, "learning_rate": 0.00017722127809606972, "loss": 0.0729, "step": 4702 }, { "epoch": 2.1950991831971995, "grad_norm": 0.82421875, "learning_rate": 0.00017721195426199146, "loss": 0.058, "step": 4703 }, { "epoch": 2.195565927654609, "grad_norm": 0.734375, "learning_rate": 0.00017720262876545417, "loss": 0.0431, "step": 4704 }, { "epoch": 2.1960326721120187, "grad_norm": 0.87890625, "learning_rate": 0.00017719330160665862, "loss": 0.0594, "step": 4705 }, { "epoch": 2.196499416569428, "grad_norm": 1.0078125, "learning_rate": 0.00017718397278580566, "loss": 0.0827, "step": 4706 }, { "epoch": 2.196966161026838, "grad_norm": 0.75390625, "learning_rate": 0.0001771746423030961, "loss": 0.0601, "step": 4707 }, { "epoch": 2.197432905484247, "grad_norm": 0.90625, "learning_rate": 0.00017716531015873088, "loss": 0.0682, "step": 4708 }, { "epoch": 2.197899649941657, "grad_norm": 1.0546875, "learning_rate": 0.00017715597635291092, "loss": 0.0793, "step": 4709 }, { "epoch": 2.1983663943990663, "grad_norm": 1.0234375, "learning_rate": 0.00017714664088583717, "loss": 0.0753, "step": 4710 }, { "epoch": 2.198833138856476, "grad_norm": 0.98828125, "learning_rate": 0.00017713730375771064, "loss": 0.0843, "step": 4711 }, { "epoch": 2.1992998833138855, "grad_norm": 0.7265625, "learning_rate": 0.0001771279649687324, "loss": 0.0633, "step": 4712 }, { "epoch": 2.1997666277712953, "grad_norm": 0.80078125, "learning_rate": 0.00017711862451910349, "loss": 0.058, "step": 4713 }, { "epoch": 2.2002333722287046, "grad_norm": 0.87890625, "learning_rate": 0.000177109282409025, "loss": 0.0503, "step": 4714 }, { "epoch": 2.2007001166861144, "grad_norm": 1.03125, "learning_rate": 0.00017709993863869816, "loss": 0.0848, "step": 4715 }, { "epoch": 2.201166861143524, "grad_norm": 0.8671875, "learning_rate": 0.0001770905932083241, "loss": 0.0694, "step": 4716 }, { "epoch": 2.2016336056009336, "grad_norm": 1.015625, "learning_rate": 0.00017708124611810397, "loss": 0.0789, "step": 4717 }, { "epoch": 2.202100350058343, "grad_norm": 0.875, "learning_rate": 0.00017707189736823912, "loss": 0.0701, "step": 4718 }, { "epoch": 2.2025670945157527, "grad_norm": 1.0078125, "learning_rate": 0.0001770625469589308, "loss": 0.0745, "step": 4719 }, { "epoch": 2.203033838973162, "grad_norm": 0.87890625, "learning_rate": 0.00017705319489038038, "loss": 0.0614, "step": 4720 }, { "epoch": 2.203500583430572, "grad_norm": 0.87890625, "learning_rate": 0.00017704384116278913, "loss": 0.0598, "step": 4721 }, { "epoch": 2.203967327887981, "grad_norm": 1.0625, "learning_rate": 0.00017703448577635852, "loss": 0.0584, "step": 4722 }, { "epoch": 2.204434072345391, "grad_norm": 1.1171875, "learning_rate": 0.00017702512873128996, "loss": 0.1029, "step": 4723 }, { "epoch": 2.2049008168028004, "grad_norm": 1.15625, "learning_rate": 0.00017701577002778493, "loss": 0.1049, "step": 4724 }, { "epoch": 2.20536756126021, "grad_norm": 0.91015625, "learning_rate": 0.0001770064096660449, "loss": 0.0675, "step": 4725 }, { "epoch": 2.2058343057176195, "grad_norm": 0.8046875, "learning_rate": 0.00017699704764627145, "loss": 0.0507, "step": 4726 }, { "epoch": 2.2063010501750293, "grad_norm": 0.89453125, "learning_rate": 0.0001769876839686661, "loss": 0.0743, "step": 4727 }, { "epoch": 2.2067677946324387, "grad_norm": 0.75390625, "learning_rate": 0.00017697831863343054, "loss": 0.0539, "step": 4728 }, { "epoch": 2.2072345390898485, "grad_norm": 1.1875, "learning_rate": 0.00017696895164076636, "loss": 0.0995, "step": 4729 }, { "epoch": 2.207701283547258, "grad_norm": 0.80078125, "learning_rate": 0.00017695958299087522, "loss": 0.0713, "step": 4730 }, { "epoch": 2.2081680280046676, "grad_norm": 0.859375, "learning_rate": 0.00017695021268395887, "loss": 0.0583, "step": 4731 }, { "epoch": 2.208634772462077, "grad_norm": 0.92578125, "learning_rate": 0.0001769408407202191, "loss": 0.0666, "step": 4732 }, { "epoch": 2.2091015169194868, "grad_norm": 0.93359375, "learning_rate": 0.00017693146709985764, "loss": 0.0686, "step": 4733 }, { "epoch": 2.209568261376896, "grad_norm": 0.7890625, "learning_rate": 0.00017692209182307636, "loss": 0.0764, "step": 4734 }, { "epoch": 2.210035005834306, "grad_norm": 0.8671875, "learning_rate": 0.00017691271489007706, "loss": 0.0585, "step": 4735 }, { "epoch": 2.2105017502917153, "grad_norm": 0.9453125, "learning_rate": 0.00017690333630106172, "loss": 0.0648, "step": 4736 }, { "epoch": 2.210968494749125, "grad_norm": 1.03125, "learning_rate": 0.0001768939560562322, "loss": 0.0557, "step": 4737 }, { "epoch": 2.2114352392065344, "grad_norm": 0.71875, "learning_rate": 0.00017688457415579047, "loss": 0.0638, "step": 4738 }, { "epoch": 2.211901983663944, "grad_norm": 0.953125, "learning_rate": 0.00017687519059993859, "loss": 0.0834, "step": 4739 }, { "epoch": 2.2123687281213535, "grad_norm": 0.87109375, "learning_rate": 0.00017686580538887854, "loss": 0.0429, "step": 4740 }, { "epoch": 2.212835472578763, "grad_norm": 0.8359375, "learning_rate": 0.0001768564185228124, "loss": 0.0687, "step": 4741 }, { "epoch": 2.2133022170361727, "grad_norm": 1.109375, "learning_rate": 0.0001768470300019423, "loss": 0.064, "step": 4742 }, { "epoch": 2.2137689614935825, "grad_norm": 1.3046875, "learning_rate": 0.0001768376398264704, "loss": 0.1207, "step": 4743 }, { "epoch": 2.214235705950992, "grad_norm": 1.0625, "learning_rate": 0.00017682824799659884, "loss": 0.0918, "step": 4744 }, { "epoch": 2.214702450408401, "grad_norm": 1.0, "learning_rate": 0.0001768188545125299, "loss": 0.0701, "step": 4745 }, { "epoch": 2.215169194865811, "grad_norm": 0.984375, "learning_rate": 0.00017680945937446576, "loss": 0.0792, "step": 4746 }, { "epoch": 2.2156359393232203, "grad_norm": 0.75390625, "learning_rate": 0.00017680006258260873, "loss": 0.0812, "step": 4747 }, { "epoch": 2.21610268378063, "grad_norm": 0.93359375, "learning_rate": 0.00017679066413716117, "loss": 0.0739, "step": 4748 }, { "epoch": 2.2165694282380395, "grad_norm": 0.9140625, "learning_rate": 0.00017678126403832537, "loss": 0.0663, "step": 4749 }, { "epoch": 2.2170361726954493, "grad_norm": 0.625, "learning_rate": 0.0001767718622863038, "loss": 0.0511, "step": 4750 }, { "epoch": 2.2175029171528586, "grad_norm": 0.7578125, "learning_rate": 0.00017676245888129886, "loss": 0.0409, "step": 4751 }, { "epoch": 2.2179696616102684, "grad_norm": 0.91015625, "learning_rate": 0.00017675305382351298, "loss": 0.0858, "step": 4752 }, { "epoch": 2.218436406067678, "grad_norm": 1.0625, "learning_rate": 0.00017674364711314872, "loss": 0.1016, "step": 4753 }, { "epoch": 2.2189031505250876, "grad_norm": 0.6484375, "learning_rate": 0.00017673423875040855, "loss": 0.0584, "step": 4754 }, { "epoch": 2.219369894982497, "grad_norm": 0.85546875, "learning_rate": 0.00017672482873549514, "loss": 0.0598, "step": 4755 }, { "epoch": 2.2198366394399067, "grad_norm": 0.91015625, "learning_rate": 0.000176715417068611, "loss": 0.0678, "step": 4756 }, { "epoch": 2.220303383897316, "grad_norm": 1.0625, "learning_rate": 0.00017670600374995884, "loss": 0.089, "step": 4757 }, { "epoch": 2.220770128354726, "grad_norm": 0.87890625, "learning_rate": 0.00017669658877974127, "loss": 0.0709, "step": 4758 }, { "epoch": 2.221236872812135, "grad_norm": 0.9765625, "learning_rate": 0.00017668717215816107, "loss": 0.0777, "step": 4759 }, { "epoch": 2.221703617269545, "grad_norm": 1.0234375, "learning_rate": 0.00017667775388542097, "loss": 0.0882, "step": 4760 }, { "epoch": 2.2221703617269544, "grad_norm": 0.78125, "learning_rate": 0.00017666833396172375, "loss": 0.0506, "step": 4761 }, { "epoch": 2.222637106184364, "grad_norm": 0.81640625, "learning_rate": 0.00017665891238727224, "loss": 0.0665, "step": 4762 }, { "epoch": 2.2231038506417735, "grad_norm": 0.9375, "learning_rate": 0.00017664948916226928, "loss": 0.0668, "step": 4763 }, { "epoch": 2.2235705950991833, "grad_norm": 0.87890625, "learning_rate": 0.0001766400642869178, "loss": 0.0756, "step": 4764 }, { "epoch": 2.2240373395565927, "grad_norm": 0.80078125, "learning_rate": 0.00017663063776142068, "loss": 0.0526, "step": 4765 }, { "epoch": 2.2245040840140025, "grad_norm": 0.8984375, "learning_rate": 0.00017662120958598088, "loss": 0.0661, "step": 4766 }, { "epoch": 2.224970828471412, "grad_norm": 0.9375, "learning_rate": 0.00017661177976080147, "loss": 0.0568, "step": 4767 }, { "epoch": 2.2254375729288216, "grad_norm": 0.921875, "learning_rate": 0.00017660234828608545, "loss": 0.0605, "step": 4768 }, { "epoch": 2.225904317386231, "grad_norm": 1.203125, "learning_rate": 0.00017659291516203585, "loss": 0.1026, "step": 4769 }, { "epoch": 2.2263710618436408, "grad_norm": 0.82421875, "learning_rate": 0.0001765834803888558, "loss": 0.0448, "step": 4770 }, { "epoch": 2.22683780630105, "grad_norm": 0.85546875, "learning_rate": 0.00017657404396674843, "loss": 0.0786, "step": 4771 }, { "epoch": 2.22730455075846, "grad_norm": 1.0, "learning_rate": 0.000176564605895917, "loss": 0.0824, "step": 4772 }, { "epoch": 2.2277712952158693, "grad_norm": 0.80859375, "learning_rate": 0.0001765551661765646, "loss": 0.061, "step": 4773 }, { "epoch": 2.228238039673279, "grad_norm": 0.921875, "learning_rate": 0.00017654572480889456, "loss": 0.0719, "step": 4774 }, { "epoch": 2.2287047841306884, "grad_norm": 0.87890625, "learning_rate": 0.0001765362817931101, "loss": 0.0641, "step": 4775 }, { "epoch": 2.229171528588098, "grad_norm": 1.2109375, "learning_rate": 0.0001765268371294146, "loss": 0.0858, "step": 4776 }, { "epoch": 2.2296382730455075, "grad_norm": 0.98046875, "learning_rate": 0.0001765173908180114, "loss": 0.0845, "step": 4777 }, { "epoch": 2.2301050175029173, "grad_norm": 1.03125, "learning_rate": 0.00017650794285910387, "loss": 0.0723, "step": 4778 }, { "epoch": 2.2305717619603267, "grad_norm": 0.8125, "learning_rate": 0.00017649849325289543, "loss": 0.0564, "step": 4779 }, { "epoch": 2.2310385064177365, "grad_norm": 0.74609375, "learning_rate": 0.00017648904199958957, "loss": 0.0452, "step": 4780 }, { "epoch": 2.231505250875146, "grad_norm": 0.9609375, "learning_rate": 0.00017647958909938974, "loss": 0.054, "step": 4781 }, { "epoch": 2.2319719953325556, "grad_norm": 1.1171875, "learning_rate": 0.0001764701345524995, "loss": 0.091, "step": 4782 }, { "epoch": 2.232438739789965, "grad_norm": 1.34375, "learning_rate": 0.00017646067835912246, "loss": 0.0929, "step": 4783 }, { "epoch": 2.2329054842473743, "grad_norm": 1.0390625, "learning_rate": 0.00017645122051946217, "loss": 0.0595, "step": 4784 }, { "epoch": 2.233372228704784, "grad_norm": 0.921875, "learning_rate": 0.0001764417610337223, "loss": 0.0548, "step": 4785 }, { "epoch": 2.233838973162194, "grad_norm": 0.87890625, "learning_rate": 0.00017643229990210645, "loss": 0.0655, "step": 4786 }, { "epoch": 2.2343057176196033, "grad_norm": 0.94140625, "learning_rate": 0.00017642283712481838, "loss": 0.0813, "step": 4787 }, { "epoch": 2.2347724620770126, "grad_norm": 0.99609375, "learning_rate": 0.00017641337270206186, "loss": 0.0828, "step": 4788 }, { "epoch": 2.2352392065344224, "grad_norm": 0.875, "learning_rate": 0.00017640390663404065, "loss": 0.0826, "step": 4789 }, { "epoch": 2.235705950991832, "grad_norm": 1.03125, "learning_rate": 0.00017639443892095854, "loss": 0.0788, "step": 4790 }, { "epoch": 2.2361726954492416, "grad_norm": 0.9453125, "learning_rate": 0.00017638496956301939, "loss": 0.0624, "step": 4791 }, { "epoch": 2.236639439906651, "grad_norm": 0.88671875, "learning_rate": 0.00017637549856042712, "loss": 0.06, "step": 4792 }, { "epoch": 2.2371061843640607, "grad_norm": 0.70703125, "learning_rate": 0.0001763660259133856, "loss": 0.0427, "step": 4793 }, { "epoch": 2.23757292882147, "grad_norm": 0.9140625, "learning_rate": 0.00017635655162209885, "loss": 0.0628, "step": 4794 }, { "epoch": 2.23803967327888, "grad_norm": 0.9296875, "learning_rate": 0.0001763470756867708, "loss": 0.0826, "step": 4795 }, { "epoch": 2.2385064177362892, "grad_norm": 0.87109375, "learning_rate": 0.0001763375981076055, "loss": 0.06, "step": 4796 }, { "epoch": 2.238973162193699, "grad_norm": 0.9453125, "learning_rate": 0.00017632811888480704, "loss": 0.0784, "step": 4797 }, { "epoch": 2.2394399066511084, "grad_norm": 0.9921875, "learning_rate": 0.00017631863801857947, "loss": 0.0758, "step": 4798 }, { "epoch": 2.239906651108518, "grad_norm": 0.73828125, "learning_rate": 0.00017630915550912693, "loss": 0.0627, "step": 4799 }, { "epoch": 2.2403733955659275, "grad_norm": 0.984375, "learning_rate": 0.00017629967135665363, "loss": 0.0688, "step": 4800 }, { "epoch": 2.2408401400233373, "grad_norm": 0.95703125, "learning_rate": 0.00017629018556136375, "loss": 0.0804, "step": 4801 }, { "epoch": 2.2413068844807467, "grad_norm": 0.9375, "learning_rate": 0.00017628069812346153, "loss": 0.0629, "step": 4802 }, { "epoch": 2.2417736289381565, "grad_norm": 0.8984375, "learning_rate": 0.00017627120904315123, "loss": 0.0745, "step": 4803 }, { "epoch": 2.242240373395566, "grad_norm": 1.0234375, "learning_rate": 0.00017626171832063717, "loss": 0.0806, "step": 4804 }, { "epoch": 2.2427071178529756, "grad_norm": 0.859375, "learning_rate": 0.00017625222595612372, "loss": 0.0692, "step": 4805 }, { "epoch": 2.243173862310385, "grad_norm": 1.0, "learning_rate": 0.00017624273194981522, "loss": 0.0625, "step": 4806 }, { "epoch": 2.2436406067677948, "grad_norm": 0.72265625, "learning_rate": 0.00017623323630191614, "loss": 0.0571, "step": 4807 }, { "epoch": 2.244107351225204, "grad_norm": 0.7734375, "learning_rate": 0.00017622373901263089, "loss": 0.0628, "step": 4808 }, { "epoch": 2.244574095682614, "grad_norm": 0.79296875, "learning_rate": 0.00017621424008216395, "loss": 0.0488, "step": 4809 }, { "epoch": 2.2450408401400233, "grad_norm": 0.95703125, "learning_rate": 0.0001762047395107199, "loss": 0.0782, "step": 4810 }, { "epoch": 2.245507584597433, "grad_norm": 1.078125, "learning_rate": 0.00017619523729850322, "loss": 0.0695, "step": 4811 }, { "epoch": 2.2459743290548424, "grad_norm": 0.78515625, "learning_rate": 0.00017618573344571853, "loss": 0.0449, "step": 4812 }, { "epoch": 2.246441073512252, "grad_norm": 0.90625, "learning_rate": 0.0001761762279525705, "loss": 0.0525, "step": 4813 }, { "epoch": 2.2469078179696615, "grad_norm": 0.9375, "learning_rate": 0.00017616672081926374, "loss": 0.064, "step": 4814 }, { "epoch": 2.2473745624270713, "grad_norm": 0.87890625, "learning_rate": 0.000176157212046003, "loss": 0.0796, "step": 4815 }, { "epoch": 2.2478413068844807, "grad_norm": 0.89453125, "learning_rate": 0.00017614770163299298, "loss": 0.0865, "step": 4816 }, { "epoch": 2.2483080513418905, "grad_norm": 1.125, "learning_rate": 0.00017613818958043845, "loss": 0.0688, "step": 4817 }, { "epoch": 2.2487747957993, "grad_norm": 0.9140625, "learning_rate": 0.0001761286758885442, "loss": 0.0775, "step": 4818 }, { "epoch": 2.2492415402567096, "grad_norm": 1.09375, "learning_rate": 0.00017611916055751512, "loss": 0.088, "step": 4819 }, { "epoch": 2.249708284714119, "grad_norm": 0.80078125, "learning_rate": 0.00017610964358755603, "loss": 0.0549, "step": 4820 }, { "epoch": 2.250175029171529, "grad_norm": 0.79296875, "learning_rate": 0.0001761001249788719, "loss": 0.074, "step": 4821 }, { "epoch": 2.250641773628938, "grad_norm": 0.87890625, "learning_rate": 0.00017609060473166763, "loss": 0.0741, "step": 4822 }, { "epoch": 2.2511085180863475, "grad_norm": 0.88671875, "learning_rate": 0.00017608108284614823, "loss": 0.0601, "step": 4823 }, { "epoch": 2.2515752625437573, "grad_norm": 0.97265625, "learning_rate": 0.0001760715593225187, "loss": 0.058, "step": 4824 }, { "epoch": 2.2515752625437573, "eval_loss": 1.5635347366333008, "eval_runtime": 93.6209, "eval_samples_per_second": 19.269, "eval_steps_per_second": 2.414, "step": 4824 }, { "epoch": 2.252042007001167, "grad_norm": 0.92578125, "learning_rate": 0.00017606203416098407, "loss": 0.0809, "step": 4825 }, { "epoch": 2.2525087514585764, "grad_norm": 0.86328125, "learning_rate": 0.00017605250736174948, "loss": 0.0669, "step": 4826 }, { "epoch": 2.252975495915986, "grad_norm": 0.79296875, "learning_rate": 0.00017604297892502, "loss": 0.0493, "step": 4827 }, { "epoch": 2.2534422403733956, "grad_norm": 0.9375, "learning_rate": 0.00017603344885100083, "loss": 0.0595, "step": 4828 }, { "epoch": 2.2539089848308054, "grad_norm": 0.85546875, "learning_rate": 0.00017602391713989715, "loss": 0.0675, "step": 4829 }, { "epoch": 2.2543757292882147, "grad_norm": 0.94140625, "learning_rate": 0.00017601438379191416, "loss": 0.0799, "step": 4830 }, { "epoch": 2.254842473745624, "grad_norm": 0.84375, "learning_rate": 0.00017600484880725715, "loss": 0.0541, "step": 4831 }, { "epoch": 2.255309218203034, "grad_norm": 0.75, "learning_rate": 0.00017599531218613144, "loss": 0.0591, "step": 4832 }, { "epoch": 2.2557759626604432, "grad_norm": 0.8046875, "learning_rate": 0.00017598577392874233, "loss": 0.067, "step": 4833 }, { "epoch": 2.256242707117853, "grad_norm": 0.90625, "learning_rate": 0.00017597623403529518, "loss": 0.0776, "step": 4834 }, { "epoch": 2.2567094515752624, "grad_norm": 0.8671875, "learning_rate": 0.00017596669250599548, "loss": 0.0815, "step": 4835 }, { "epoch": 2.257176196032672, "grad_norm": 0.8671875, "learning_rate": 0.00017595714934104856, "loss": 0.0591, "step": 4836 }, { "epoch": 2.2576429404900815, "grad_norm": 1.0234375, "learning_rate": 0.00017594760454065994, "loss": 0.0697, "step": 4837 }, { "epoch": 2.2581096849474913, "grad_norm": 0.91796875, "learning_rate": 0.0001759380581050351, "loss": 0.0655, "step": 4838 }, { "epoch": 2.2585764294049007, "grad_norm": 0.96484375, "learning_rate": 0.0001759285100343796, "loss": 0.0654, "step": 4839 }, { "epoch": 2.2590431738623105, "grad_norm": 0.921875, "learning_rate": 0.00017591896032889912, "loss": 0.065, "step": 4840 }, { "epoch": 2.25950991831972, "grad_norm": 1.0234375, "learning_rate": 0.00017590940898879915, "loss": 0.0683, "step": 4841 }, { "epoch": 2.2599766627771296, "grad_norm": 0.9140625, "learning_rate": 0.00017589985601428537, "loss": 0.0576, "step": 4842 }, { "epoch": 2.260443407234539, "grad_norm": 0.92578125, "learning_rate": 0.00017589030140556348, "loss": 0.0592, "step": 4843 }, { "epoch": 2.2609101516919488, "grad_norm": 0.92578125, "learning_rate": 0.0001758807451628392, "loss": 0.0665, "step": 4844 }, { "epoch": 2.261376896149358, "grad_norm": 0.62890625, "learning_rate": 0.0001758711872863183, "loss": 0.0417, "step": 4845 }, { "epoch": 2.261843640606768, "grad_norm": 0.9609375, "learning_rate": 0.00017586162777620654, "loss": 0.0583, "step": 4846 }, { "epoch": 2.2623103850641773, "grad_norm": 0.92578125, "learning_rate": 0.00017585206663270976, "loss": 0.0634, "step": 4847 }, { "epoch": 2.262777129521587, "grad_norm": 0.88671875, "learning_rate": 0.00017584250385603386, "loss": 0.0734, "step": 4848 }, { "epoch": 2.2632438739789964, "grad_norm": 0.875, "learning_rate": 0.00017583293944638465, "loss": 0.0724, "step": 4849 }, { "epoch": 2.263710618436406, "grad_norm": 0.85546875, "learning_rate": 0.00017582337340396815, "loss": 0.0662, "step": 4850 }, { "epoch": 2.2641773628938155, "grad_norm": 0.92578125, "learning_rate": 0.0001758138057289903, "loss": 0.0698, "step": 4851 }, { "epoch": 2.2646441073512253, "grad_norm": 1.1015625, "learning_rate": 0.0001758042364216571, "loss": 0.0836, "step": 4852 }, { "epoch": 2.2651108518086347, "grad_norm": 0.83203125, "learning_rate": 0.00017579466548217453, "loss": 0.0705, "step": 4853 }, { "epoch": 2.2655775962660445, "grad_norm": 0.9765625, "learning_rate": 0.00017578509291074875, "loss": 0.102, "step": 4854 }, { "epoch": 2.266044340723454, "grad_norm": 0.9296875, "learning_rate": 0.00017577551870758584, "loss": 0.0641, "step": 4855 }, { "epoch": 2.2665110851808636, "grad_norm": 0.6796875, "learning_rate": 0.00017576594287289194, "loss": 0.0436, "step": 4856 }, { "epoch": 2.266977829638273, "grad_norm": 0.84375, "learning_rate": 0.00017575636540687325, "loss": 0.0743, "step": 4857 }, { "epoch": 2.267444574095683, "grad_norm": 0.74609375, "learning_rate": 0.00017574678630973592, "loss": 0.0564, "step": 4858 }, { "epoch": 2.267911318553092, "grad_norm": 0.7890625, "learning_rate": 0.00017573720558168625, "loss": 0.0598, "step": 4859 }, { "epoch": 2.268378063010502, "grad_norm": 0.8828125, "learning_rate": 0.00017572762322293052, "loss": 0.0585, "step": 4860 }, { "epoch": 2.2688448074679113, "grad_norm": 1.359375, "learning_rate": 0.000175718039233675, "loss": 0.0732, "step": 4861 }, { "epoch": 2.269311551925321, "grad_norm": 0.90625, "learning_rate": 0.00017570845361412614, "loss": 0.0591, "step": 4862 }, { "epoch": 2.2697782963827304, "grad_norm": 1.0, "learning_rate": 0.00017569886636449025, "loss": 0.0722, "step": 4863 }, { "epoch": 2.2702450408401402, "grad_norm": 1.1171875, "learning_rate": 0.00017568927748497378, "loss": 0.0791, "step": 4864 }, { "epoch": 2.2707117852975496, "grad_norm": 0.97265625, "learning_rate": 0.00017567968697578315, "loss": 0.0794, "step": 4865 }, { "epoch": 2.271178529754959, "grad_norm": 0.8515625, "learning_rate": 0.00017567009483712495, "loss": 0.0691, "step": 4866 }, { "epoch": 2.2716452742123687, "grad_norm": 1.046875, "learning_rate": 0.00017566050106920564, "loss": 0.0768, "step": 4867 }, { "epoch": 2.2721120186697785, "grad_norm": 0.9375, "learning_rate": 0.0001756509056722318, "loss": 0.06, "step": 4868 }, { "epoch": 2.272578763127188, "grad_norm": 0.890625, "learning_rate": 0.00017564130864641, "loss": 0.0805, "step": 4869 }, { "epoch": 2.2730455075845972, "grad_norm": 0.9140625, "learning_rate": 0.0001756317099919469, "loss": 0.0593, "step": 4870 }, { "epoch": 2.273512252042007, "grad_norm": 0.99609375, "learning_rate": 0.00017562210970904917, "loss": 0.0667, "step": 4871 }, { "epoch": 2.273978996499417, "grad_norm": 1.59375, "learning_rate": 0.00017561250779792354, "loss": 0.0704, "step": 4872 }, { "epoch": 2.274445740956826, "grad_norm": 0.78125, "learning_rate": 0.0001756029042587767, "loss": 0.0618, "step": 4873 }, { "epoch": 2.2749124854142355, "grad_norm": 1.03125, "learning_rate": 0.00017559329909181543, "loss": 0.0759, "step": 4874 }, { "epoch": 2.2753792298716453, "grad_norm": 0.8046875, "learning_rate": 0.00017558369229724662, "loss": 0.0527, "step": 4875 }, { "epoch": 2.2758459743290547, "grad_norm": 1.078125, "learning_rate": 0.000175574083875277, "loss": 0.078, "step": 4876 }, { "epoch": 2.2763127187864645, "grad_norm": 0.90625, "learning_rate": 0.0001755644738261135, "loss": 0.064, "step": 4877 }, { "epoch": 2.276779463243874, "grad_norm": 0.8203125, "learning_rate": 0.00017555486214996307, "loss": 0.0562, "step": 4878 }, { "epoch": 2.2772462077012836, "grad_norm": 0.8125, "learning_rate": 0.0001755452488470326, "loss": 0.066, "step": 4879 }, { "epoch": 2.277712952158693, "grad_norm": 0.84375, "learning_rate": 0.0001755356339175291, "loss": 0.0479, "step": 4880 }, { "epoch": 2.2781796966161028, "grad_norm": 0.9453125, "learning_rate": 0.00017552601736165962, "loss": 0.0657, "step": 4881 }, { "epoch": 2.278646441073512, "grad_norm": 0.765625, "learning_rate": 0.00017551639917963118, "loss": 0.0553, "step": 4882 }, { "epoch": 2.279113185530922, "grad_norm": 0.90625, "learning_rate": 0.0001755067793716509, "loss": 0.0437, "step": 4883 }, { "epoch": 2.2795799299883313, "grad_norm": 0.984375, "learning_rate": 0.00017549715793792586, "loss": 0.0557, "step": 4884 }, { "epoch": 2.280046674445741, "grad_norm": 0.8125, "learning_rate": 0.00017548753487866322, "loss": 0.0562, "step": 4885 }, { "epoch": 2.2805134189031504, "grad_norm": 0.90234375, "learning_rate": 0.00017547791019407023, "loss": 0.071, "step": 4886 }, { "epoch": 2.28098016336056, "grad_norm": 0.86328125, "learning_rate": 0.0001754682838843541, "loss": 0.0607, "step": 4887 }, { "epoch": 2.2814469078179695, "grad_norm": 0.99609375, "learning_rate": 0.00017545865594972206, "loss": 0.0804, "step": 4888 }, { "epoch": 2.2819136522753793, "grad_norm": 1.203125, "learning_rate": 0.00017544902639038146, "loss": 0.0837, "step": 4889 }, { "epoch": 2.2823803967327887, "grad_norm": 0.9375, "learning_rate": 0.00017543939520653958, "loss": 0.0737, "step": 4890 }, { "epoch": 2.2828471411901985, "grad_norm": 0.9609375, "learning_rate": 0.00017542976239840385, "loss": 0.0535, "step": 4891 }, { "epoch": 2.283313885647608, "grad_norm": 0.84375, "learning_rate": 0.00017542012796618162, "loss": 0.0501, "step": 4892 }, { "epoch": 2.2837806301050176, "grad_norm": 0.94921875, "learning_rate": 0.00017541049191008035, "loss": 0.0655, "step": 4893 }, { "epoch": 2.284247374562427, "grad_norm": 1.3203125, "learning_rate": 0.00017540085423030754, "loss": 0.0638, "step": 4894 }, { "epoch": 2.284714119019837, "grad_norm": 1.1171875, "learning_rate": 0.00017539121492707066, "loss": 0.0828, "step": 4895 }, { "epoch": 2.285180863477246, "grad_norm": 1.109375, "learning_rate": 0.0001753815740005773, "loss": 0.0566, "step": 4896 }, { "epoch": 2.285647607934656, "grad_norm": 1.0234375, "learning_rate": 0.00017537193145103502, "loss": 0.0707, "step": 4897 }, { "epoch": 2.2861143523920653, "grad_norm": 1.0078125, "learning_rate": 0.0001753622872786514, "loss": 0.0678, "step": 4898 }, { "epoch": 2.286581096849475, "grad_norm": 1.171875, "learning_rate": 0.00017535264148363413, "loss": 0.0711, "step": 4899 }, { "epoch": 2.2870478413068844, "grad_norm": 0.80078125, "learning_rate": 0.00017534299406619087, "loss": 0.0602, "step": 4900 }, { "epoch": 2.2875145857642942, "grad_norm": 1.15625, "learning_rate": 0.00017533334502652936, "loss": 0.1016, "step": 4901 }, { "epoch": 2.2879813302217036, "grad_norm": 0.953125, "learning_rate": 0.00017532369436485738, "loss": 0.0684, "step": 4902 }, { "epoch": 2.2884480746791134, "grad_norm": 0.76171875, "learning_rate": 0.00017531404208138266, "loss": 0.0589, "step": 4903 }, { "epoch": 2.2889148191365227, "grad_norm": 1.0625, "learning_rate": 0.00017530438817631305, "loss": 0.0791, "step": 4904 }, { "epoch": 2.289381563593932, "grad_norm": 0.7734375, "learning_rate": 0.0001752947326498564, "loss": 0.0499, "step": 4905 }, { "epoch": 2.289848308051342, "grad_norm": 0.80078125, "learning_rate": 0.00017528507550222062, "loss": 0.0692, "step": 4906 }, { "epoch": 2.2903150525087517, "grad_norm": 0.9765625, "learning_rate": 0.00017527541673361365, "loss": 0.0804, "step": 4907 }, { "epoch": 2.290781796966161, "grad_norm": 1.0, "learning_rate": 0.00017526575634424342, "loss": 0.0877, "step": 4908 }, { "epoch": 2.2912485414235704, "grad_norm": 0.98046875, "learning_rate": 0.000175256094334318, "loss": 0.0727, "step": 4909 }, { "epoch": 2.29171528588098, "grad_norm": 0.9765625, "learning_rate": 0.0001752464307040453, "loss": 0.1109, "step": 4910 }, { "epoch": 2.29218203033839, "grad_norm": 0.92578125, "learning_rate": 0.0001752367654536335, "loss": 0.0728, "step": 4911 }, { "epoch": 2.2926487747957993, "grad_norm": 0.8828125, "learning_rate": 0.00017522709858329064, "loss": 0.0808, "step": 4912 }, { "epoch": 2.2931155192532087, "grad_norm": 0.78515625, "learning_rate": 0.00017521743009322492, "loss": 0.0586, "step": 4913 }, { "epoch": 2.2935822637106185, "grad_norm": 0.734375, "learning_rate": 0.00017520775998364443, "loss": 0.0507, "step": 4914 }, { "epoch": 2.294049008168028, "grad_norm": 0.828125, "learning_rate": 0.00017519808825475742, "loss": 0.0688, "step": 4915 }, { "epoch": 2.2945157526254376, "grad_norm": 1.0390625, "learning_rate": 0.0001751884149067722, "loss": 0.0668, "step": 4916 }, { "epoch": 2.294982497082847, "grad_norm": 0.83203125, "learning_rate": 0.00017517873993989693, "loss": 0.0555, "step": 4917 }, { "epoch": 2.2954492415402568, "grad_norm": 0.6953125, "learning_rate": 0.00017516906335434, "loss": 0.0454, "step": 4918 }, { "epoch": 2.295915985997666, "grad_norm": 0.921875, "learning_rate": 0.0001751593851503097, "loss": 0.0634, "step": 4919 }, { "epoch": 2.296382730455076, "grad_norm": 1.03125, "learning_rate": 0.00017514970532801449, "loss": 0.078, "step": 4920 }, { "epoch": 2.2968494749124853, "grad_norm": 0.90234375, "learning_rate": 0.00017514002388766274, "loss": 0.061, "step": 4921 }, { "epoch": 2.297316219369895, "grad_norm": 0.65625, "learning_rate": 0.00017513034082946284, "loss": 0.0489, "step": 4922 }, { "epoch": 2.2977829638273044, "grad_norm": 0.74609375, "learning_rate": 0.00017512065615362342, "loss": 0.0471, "step": 4923 }, { "epoch": 2.298249708284714, "grad_norm": 0.73046875, "learning_rate": 0.00017511096986035289, "loss": 0.0434, "step": 4924 }, { "epoch": 2.2987164527421236, "grad_norm": 1.0546875, "learning_rate": 0.00017510128194985985, "loss": 0.0898, "step": 4925 }, { "epoch": 2.2991831971995333, "grad_norm": 1.03125, "learning_rate": 0.0001750915924223529, "loss": 0.0698, "step": 4926 }, { "epoch": 2.2996499416569427, "grad_norm": 0.90625, "learning_rate": 0.00017508190127804062, "loss": 0.057, "step": 4927 }, { "epoch": 2.3001166861143525, "grad_norm": 0.91796875, "learning_rate": 0.00017507220851713172, "loss": 0.0852, "step": 4928 }, { "epoch": 2.300583430571762, "grad_norm": 1.015625, "learning_rate": 0.00017506251413983487, "loss": 0.0789, "step": 4929 }, { "epoch": 2.3010501750291716, "grad_norm": 0.796875, "learning_rate": 0.0001750528181463588, "loss": 0.0465, "step": 4930 }, { "epoch": 2.301516919486581, "grad_norm": 0.81640625, "learning_rate": 0.0001750431205369123, "loss": 0.0677, "step": 4931 }, { "epoch": 2.301983663943991, "grad_norm": 1.0078125, "learning_rate": 0.00017503342131170414, "loss": 0.0764, "step": 4932 }, { "epoch": 2.3024504084014, "grad_norm": 0.80859375, "learning_rate": 0.00017502372047094317, "loss": 0.0678, "step": 4933 }, { "epoch": 2.30291715285881, "grad_norm": 0.8125, "learning_rate": 0.00017501401801483827, "loss": 0.0591, "step": 4934 }, { "epoch": 2.3033838973162193, "grad_norm": 0.9296875, "learning_rate": 0.00017500431394359834, "loss": 0.0648, "step": 4935 }, { "epoch": 2.303850641773629, "grad_norm": 0.94140625, "learning_rate": 0.0001749946082574323, "loss": 0.0651, "step": 4936 }, { "epoch": 2.3043173862310384, "grad_norm": 0.91796875, "learning_rate": 0.0001749849009565491, "loss": 0.063, "step": 4937 }, { "epoch": 2.3047841306884482, "grad_norm": 0.7578125, "learning_rate": 0.00017497519204115784, "loss": 0.0575, "step": 4938 }, { "epoch": 2.3052508751458576, "grad_norm": 0.91015625, "learning_rate": 0.0001749654815114675, "loss": 0.0621, "step": 4939 }, { "epoch": 2.3057176196032674, "grad_norm": 0.859375, "learning_rate": 0.00017495576936768715, "loss": 0.059, "step": 4940 }, { "epoch": 2.3061843640606767, "grad_norm": 0.86328125, "learning_rate": 0.0001749460556100259, "loss": 0.0519, "step": 4941 }, { "epoch": 2.3066511085180865, "grad_norm": 0.96484375, "learning_rate": 0.00017493634023869295, "loss": 0.0845, "step": 4942 }, { "epoch": 2.307117852975496, "grad_norm": 1.015625, "learning_rate": 0.00017492662325389744, "loss": 0.0715, "step": 4943 }, { "epoch": 2.3075845974329057, "grad_norm": 1.0078125, "learning_rate": 0.00017491690465584864, "loss": 0.0704, "step": 4944 }, { "epoch": 2.308051341890315, "grad_norm": 1.0234375, "learning_rate": 0.0001749071844447557, "loss": 0.0697, "step": 4945 }, { "epoch": 2.308518086347725, "grad_norm": 0.69921875, "learning_rate": 0.000174897462620828, "loss": 0.0464, "step": 4946 }, { "epoch": 2.308984830805134, "grad_norm": 1.0234375, "learning_rate": 0.00017488773918427478, "loss": 0.0753, "step": 4947 }, { "epoch": 2.3094515752625435, "grad_norm": 0.96875, "learning_rate": 0.0001748780141353055, "loss": 0.0576, "step": 4948 }, { "epoch": 2.3099183197199533, "grad_norm": 0.890625, "learning_rate": 0.00017486828747412952, "loss": 0.0532, "step": 4949 }, { "epoch": 2.310385064177363, "grad_norm": 0.984375, "learning_rate": 0.00017485855920095617, "loss": 0.0725, "step": 4950 }, { "epoch": 2.3108518086347725, "grad_norm": 0.96484375, "learning_rate": 0.00017484882931599502, "loss": 0.0755, "step": 4951 }, { "epoch": 2.311318553092182, "grad_norm": 1.0390625, "learning_rate": 0.00017483909781945556, "loss": 0.0814, "step": 4952 }, { "epoch": 2.3117852975495916, "grad_norm": 0.875, "learning_rate": 0.00017482936471154722, "loss": 0.0486, "step": 4953 }, { "epoch": 2.3122520420070014, "grad_norm": 1.125, "learning_rate": 0.0001748196299924797, "loss": 0.0677, "step": 4954 }, { "epoch": 2.3127187864644108, "grad_norm": 0.73046875, "learning_rate": 0.0001748098936624625, "loss": 0.0618, "step": 4955 }, { "epoch": 2.31318553092182, "grad_norm": 0.640625, "learning_rate": 0.0001748001557217053, "loss": 0.0468, "step": 4956 }, { "epoch": 2.31365227537923, "grad_norm": 0.89453125, "learning_rate": 0.00017479041617041773, "loss": 0.0624, "step": 4957 }, { "epoch": 2.3141190198366393, "grad_norm": 0.8359375, "learning_rate": 0.00017478067500880954, "loss": 0.0443, "step": 4958 }, { "epoch": 2.314585764294049, "grad_norm": 0.94140625, "learning_rate": 0.00017477093223709043, "loss": 0.0737, "step": 4959 }, { "epoch": 2.3150525087514584, "grad_norm": 0.796875, "learning_rate": 0.0001747611878554702, "loss": 0.0557, "step": 4960 }, { "epoch": 2.315519253208868, "grad_norm": 1.046875, "learning_rate": 0.00017475144186415866, "loss": 0.0698, "step": 4961 }, { "epoch": 2.3159859976662776, "grad_norm": 0.8671875, "learning_rate": 0.00017474169426336561, "loss": 0.0588, "step": 4962 }, { "epoch": 2.3164527421236873, "grad_norm": 1.0078125, "learning_rate": 0.00017473194505330096, "loss": 0.0722, "step": 4963 }, { "epoch": 2.3169194865810967, "grad_norm": 0.97265625, "learning_rate": 0.00017472219423417463, "loss": 0.073, "step": 4964 }, { "epoch": 2.3173862310385065, "grad_norm": 1.0859375, "learning_rate": 0.00017471244180619654, "loss": 0.0689, "step": 4965 }, { "epoch": 2.317852975495916, "grad_norm": 1.359375, "learning_rate": 0.00017470268776957672, "loss": 0.0701, "step": 4966 }, { "epoch": 2.3183197199533256, "grad_norm": 1.03125, "learning_rate": 0.0001746929321245251, "loss": 0.0614, "step": 4967 }, { "epoch": 2.318786464410735, "grad_norm": 0.8359375, "learning_rate": 0.0001746831748712518, "loss": 0.0426, "step": 4968 }, { "epoch": 2.319253208868145, "grad_norm": 0.91796875, "learning_rate": 0.00017467341600996687, "loss": 0.0652, "step": 4969 }, { "epoch": 2.319719953325554, "grad_norm": 0.78515625, "learning_rate": 0.00017466365554088048, "loss": 0.0676, "step": 4970 }, { "epoch": 2.320186697782964, "grad_norm": 0.953125, "learning_rate": 0.00017465389346420272, "loss": 0.0744, "step": 4971 }, { "epoch": 2.3206534422403733, "grad_norm": 1.015625, "learning_rate": 0.0001746441297801438, "loss": 0.0684, "step": 4972 }, { "epoch": 2.321120186697783, "grad_norm": 0.9296875, "learning_rate": 0.00017463436448891394, "loss": 0.0744, "step": 4973 }, { "epoch": 2.3215869311551924, "grad_norm": 0.83984375, "learning_rate": 0.0001746245975907234, "loss": 0.0625, "step": 4974 }, { "epoch": 2.3220536756126022, "grad_norm": 1.328125, "learning_rate": 0.0001746148290857825, "loss": 0.0814, "step": 4975 }, { "epoch": 2.3225204200700116, "grad_norm": 0.91015625, "learning_rate": 0.00017460505897430153, "loss": 0.0615, "step": 4976 }, { "epoch": 2.3229871645274214, "grad_norm": 1.046875, "learning_rate": 0.00017459528725649088, "loss": 0.0731, "step": 4977 }, { "epoch": 2.3234539089848307, "grad_norm": 0.86328125, "learning_rate": 0.00017458551393256092, "loss": 0.0604, "step": 4978 }, { "epoch": 2.3239206534422405, "grad_norm": 0.93359375, "learning_rate": 0.00017457573900272206, "loss": 0.0694, "step": 4979 }, { "epoch": 2.32438739789965, "grad_norm": 0.79296875, "learning_rate": 0.00017456596246718484, "loss": 0.0633, "step": 4980 }, { "epoch": 2.3248541423570597, "grad_norm": 0.796875, "learning_rate": 0.0001745561843261597, "loss": 0.0546, "step": 4981 }, { "epoch": 2.325320886814469, "grad_norm": 0.98046875, "learning_rate": 0.00017454640457985717, "loss": 0.0699, "step": 4982 }, { "epoch": 2.325787631271879, "grad_norm": 0.86328125, "learning_rate": 0.00017453662322848787, "loss": 0.0515, "step": 4983 }, { "epoch": 2.326254375729288, "grad_norm": 0.92578125, "learning_rate": 0.00017452684027226235, "loss": 0.0703, "step": 4984 }, { "epoch": 2.326721120186698, "grad_norm": 0.98828125, "learning_rate": 0.0001745170557113913, "loss": 0.0875, "step": 4985 }, { "epoch": 2.3271878646441073, "grad_norm": 1.0234375, "learning_rate": 0.00017450726954608534, "loss": 0.0633, "step": 4986 }, { "epoch": 2.327654609101517, "grad_norm": 0.96484375, "learning_rate": 0.0001744974817765552, "loss": 0.0737, "step": 4987 }, { "epoch": 2.3281213535589265, "grad_norm": 0.67578125, "learning_rate": 0.0001744876924030116, "loss": 0.0511, "step": 4988 }, { "epoch": 2.3285880980163363, "grad_norm": 0.8828125, "learning_rate": 0.0001744779014256654, "loss": 0.057, "step": 4989 }, { "epoch": 2.3290548424737456, "grad_norm": 1.0390625, "learning_rate": 0.0001744681088447273, "loss": 0.0808, "step": 4990 }, { "epoch": 2.329521586931155, "grad_norm": 0.8984375, "learning_rate": 0.0001744583146604082, "loss": 0.0541, "step": 4991 }, { "epoch": 2.3299883313885648, "grad_norm": 0.984375, "learning_rate": 0.00017444851887291898, "loss": 0.067, "step": 4992 }, { "epoch": 2.3304550758459746, "grad_norm": 0.94921875, "learning_rate": 0.00017443872148247054, "loss": 0.0655, "step": 4993 }, { "epoch": 2.330921820303384, "grad_norm": 1.1171875, "learning_rate": 0.00017442892248927382, "loss": 0.0767, "step": 4994 }, { "epoch": 2.3313885647607933, "grad_norm": 0.95703125, "learning_rate": 0.00017441912189353982, "loss": 0.0718, "step": 4995 }, { "epoch": 2.331855309218203, "grad_norm": 0.91015625, "learning_rate": 0.0001744093196954796, "loss": 0.0595, "step": 4996 }, { "epoch": 2.332322053675613, "grad_norm": 0.91015625, "learning_rate": 0.00017439951589530412, "loss": 0.0642, "step": 4997 }, { "epoch": 2.332788798133022, "grad_norm": 0.8671875, "learning_rate": 0.00017438971049322453, "loss": 0.0548, "step": 4998 }, { "epoch": 2.3332555425904316, "grad_norm": 1.1328125, "learning_rate": 0.00017437990348945193, "loss": 0.0617, "step": 4999 }, { "epoch": 2.3337222870478413, "grad_norm": 0.7578125, "learning_rate": 0.0001743700948841975, "loss": 0.0531, "step": 5000 }, { "epoch": 2.3341890315052507, "grad_norm": 0.984375, "learning_rate": 0.0001743602846776724, "loss": 0.0751, "step": 5001 }, { "epoch": 2.3346557759626605, "grad_norm": 0.76953125, "learning_rate": 0.00017435047287008788, "loss": 0.0622, "step": 5002 }, { "epoch": 2.33512252042007, "grad_norm": 0.7578125, "learning_rate": 0.0001743406594616552, "loss": 0.0522, "step": 5003 }, { "epoch": 2.3355892648774796, "grad_norm": 0.91015625, "learning_rate": 0.0001743308444525856, "loss": 0.0577, "step": 5004 }, { "epoch": 2.336056009334889, "grad_norm": 0.8203125, "learning_rate": 0.00017432102784309045, "loss": 0.0766, "step": 5005 }, { "epoch": 2.336522753792299, "grad_norm": 0.86328125, "learning_rate": 0.00017431120963338112, "loss": 0.0755, "step": 5006 }, { "epoch": 2.336989498249708, "grad_norm": 0.6171875, "learning_rate": 0.000174301389823669, "loss": 0.0381, "step": 5007 }, { "epoch": 2.337456242707118, "grad_norm": 1.125, "learning_rate": 0.00017429156841416554, "loss": 0.0924, "step": 5008 }, { "epoch": 2.3379229871645273, "grad_norm": 0.9296875, "learning_rate": 0.00017428174540508215, "loss": 0.0777, "step": 5009 }, { "epoch": 2.338389731621937, "grad_norm": 0.953125, "learning_rate": 0.00017427192079663036, "loss": 0.0619, "step": 5010 }, { "epoch": 2.3388564760793464, "grad_norm": 0.83203125, "learning_rate": 0.00017426209458902173, "loss": 0.0617, "step": 5011 }, { "epoch": 2.3393232205367562, "grad_norm": 0.84765625, "learning_rate": 0.00017425226678246782, "loss": 0.0696, "step": 5012 }, { "epoch": 2.3397899649941656, "grad_norm": 0.96484375, "learning_rate": 0.0001742424373771802, "loss": 0.073, "step": 5013 }, { "epoch": 2.3402567094515754, "grad_norm": 0.86328125, "learning_rate": 0.00017423260637337054, "loss": 0.0499, "step": 5014 }, { "epoch": 2.3407234539089847, "grad_norm": 0.84765625, "learning_rate": 0.0001742227737712505, "loss": 0.0555, "step": 5015 }, { "epoch": 2.3411901983663945, "grad_norm": 0.7890625, "learning_rate": 0.00017421293957103177, "loss": 0.0577, "step": 5016 }, { "epoch": 2.341656942823804, "grad_norm": 1.078125, "learning_rate": 0.00017420310377292615, "loss": 0.0793, "step": 5017 }, { "epoch": 2.3421236872812137, "grad_norm": 0.85546875, "learning_rate": 0.00017419326637714533, "loss": 0.0549, "step": 5018 }, { "epoch": 2.342590431738623, "grad_norm": 0.7578125, "learning_rate": 0.0001741834273839012, "loss": 0.0561, "step": 5019 }, { "epoch": 2.343057176196033, "grad_norm": 0.93359375, "learning_rate": 0.00017417358679340557, "loss": 0.0695, "step": 5020 }, { "epoch": 2.343523920653442, "grad_norm": 0.88671875, "learning_rate": 0.0001741637446058703, "loss": 0.076, "step": 5021 }, { "epoch": 2.343990665110852, "grad_norm": 0.88671875, "learning_rate": 0.00017415390082150737, "loss": 0.0607, "step": 5022 }, { "epoch": 2.3444574095682613, "grad_norm": 0.859375, "learning_rate": 0.0001741440554405286, "loss": 0.053, "step": 5023 }, { "epoch": 2.344924154025671, "grad_norm": 0.8671875, "learning_rate": 0.0001741342084631461, "loss": 0.0567, "step": 5024 }, { "epoch": 2.3453908984830805, "grad_norm": 1.0625, "learning_rate": 0.00017412435988957184, "loss": 0.093, "step": 5025 }, { "epoch": 2.3458576429404903, "grad_norm": 0.88671875, "learning_rate": 0.00017411450972001786, "loss": 0.0688, "step": 5026 }, { "epoch": 2.3463243873978996, "grad_norm": 1.1015625, "learning_rate": 0.00017410465795469627, "loss": 0.0707, "step": 5027 }, { "epoch": 2.3467911318553094, "grad_norm": 0.6953125, "learning_rate": 0.00017409480459381916, "loss": 0.0372, "step": 5028 }, { "epoch": 2.3472578763127188, "grad_norm": 0.90234375, "learning_rate": 0.0001740849496375987, "loss": 0.0673, "step": 5029 }, { "epoch": 2.347724620770128, "grad_norm": 0.734375, "learning_rate": 0.00017407509308624706, "loss": 0.048, "step": 5030 }, { "epoch": 2.348191365227538, "grad_norm": 0.8828125, "learning_rate": 0.00017406523493997646, "loss": 0.0541, "step": 5031 }, { "epoch": 2.3486581096849477, "grad_norm": 0.7421875, "learning_rate": 0.00017405537519899923, "loss": 0.0556, "step": 5032 }, { "epoch": 2.349124854142357, "grad_norm": 1.0546875, "learning_rate": 0.00017404551386352755, "loss": 0.0687, "step": 5033 }, { "epoch": 2.3495915985997664, "grad_norm": 0.95703125, "learning_rate": 0.00017403565093377385, "loss": 0.0867, "step": 5034 }, { "epoch": 2.350058343057176, "grad_norm": 0.8828125, "learning_rate": 0.00017402578640995044, "loss": 0.0683, "step": 5035 }, { "epoch": 2.350525087514586, "grad_norm": 1.0078125, "learning_rate": 0.00017401592029226968, "loss": 0.0778, "step": 5036 }, { "epoch": 2.3509918319719953, "grad_norm": 0.7578125, "learning_rate": 0.00017400605258094406, "loss": 0.0531, "step": 5037 }, { "epoch": 2.3514585764294047, "grad_norm": 1.0859375, "learning_rate": 0.00017399618327618602, "loss": 0.0854, "step": 5038 }, { "epoch": 2.3519253208868145, "grad_norm": 0.84375, "learning_rate": 0.00017398631237820804, "loss": 0.0561, "step": 5039 }, { "epoch": 2.352392065344224, "grad_norm": 0.8984375, "learning_rate": 0.0001739764398872227, "loss": 0.0808, "step": 5040 }, { "epoch": 2.3528588098016336, "grad_norm": 0.8984375, "learning_rate": 0.0001739665658034425, "loss": 0.0491, "step": 5041 }, { "epoch": 2.353325554259043, "grad_norm": 1.015625, "learning_rate": 0.00017395669012708006, "loss": 0.0517, "step": 5042 }, { "epoch": 2.353792298716453, "grad_norm": 0.9609375, "learning_rate": 0.00017394681285834803, "loss": 0.0766, "step": 5043 }, { "epoch": 2.354259043173862, "grad_norm": 0.87109375, "learning_rate": 0.00017393693399745907, "loss": 0.0597, "step": 5044 }, { "epoch": 2.354725787631272, "grad_norm": 0.87890625, "learning_rate": 0.00017392705354462592, "loss": 0.0619, "step": 5045 }, { "epoch": 2.3551925320886813, "grad_norm": 0.83984375, "learning_rate": 0.00017391717150006127, "loss": 0.0767, "step": 5046 }, { "epoch": 2.355659276546091, "grad_norm": 0.84375, "learning_rate": 0.0001739072878639779, "loss": 0.0636, "step": 5047 }, { "epoch": 2.3561260210035004, "grad_norm": 0.84765625, "learning_rate": 0.0001738974026365886, "loss": 0.0682, "step": 5048 }, { "epoch": 2.3565927654609102, "grad_norm": 0.96484375, "learning_rate": 0.00017388751581810626, "loss": 0.0598, "step": 5049 }, { "epoch": 2.3570595099183196, "grad_norm": 0.8203125, "learning_rate": 0.00017387762740874368, "loss": 0.0481, "step": 5050 }, { "epoch": 2.3575262543757294, "grad_norm": 0.765625, "learning_rate": 0.00017386773740871386, "loss": 0.0542, "step": 5051 }, { "epoch": 2.3579929988331387, "grad_norm": 0.97265625, "learning_rate": 0.0001738578458182297, "loss": 0.0792, "step": 5052 }, { "epoch": 2.3584597432905485, "grad_norm": 0.94921875, "learning_rate": 0.00017384795263750412, "loss": 0.0597, "step": 5053 }, { "epoch": 2.358926487747958, "grad_norm": 0.75, "learning_rate": 0.00017383805786675024, "loss": 0.0552, "step": 5054 }, { "epoch": 2.3593932322053677, "grad_norm": 0.8046875, "learning_rate": 0.00017382816150618102, "loss": 0.0595, "step": 5055 }, { "epoch": 2.359859976662777, "grad_norm": 0.7890625, "learning_rate": 0.0001738182635560096, "loss": 0.0501, "step": 5056 }, { "epoch": 2.360326721120187, "grad_norm": 0.9375, "learning_rate": 0.00017380836401644902, "loss": 0.0703, "step": 5057 }, { "epoch": 2.360793465577596, "grad_norm": 0.84375, "learning_rate": 0.0001737984628877125, "loss": 0.0593, "step": 5058 }, { "epoch": 2.361260210035006, "grad_norm": 0.953125, "learning_rate": 0.00017378856017001318, "loss": 0.0548, "step": 5059 }, { "epoch": 2.3617269544924153, "grad_norm": 0.8125, "learning_rate": 0.0001737786558635643, "loss": 0.0701, "step": 5060 }, { "epoch": 2.362193698949825, "grad_norm": 0.76171875, "learning_rate": 0.00017376874996857908, "loss": 0.0574, "step": 5061 }, { "epoch": 2.3626604434072345, "grad_norm": 1.1171875, "learning_rate": 0.00017375884248527085, "loss": 0.0768, "step": 5062 }, { "epoch": 2.3631271878646443, "grad_norm": 0.99609375, "learning_rate": 0.0001737489334138529, "loss": 0.0676, "step": 5063 }, { "epoch": 2.3635939323220536, "grad_norm": 1.09375, "learning_rate": 0.00017373902275453858, "loss": 0.0857, "step": 5064 }, { "epoch": 2.3640606767794634, "grad_norm": 1.21875, "learning_rate": 0.00017372911050754129, "loss": 0.0779, "step": 5065 }, { "epoch": 2.3645274212368728, "grad_norm": 1.1875, "learning_rate": 0.00017371919667307447, "loss": 0.0882, "step": 5066 }, { "epoch": 2.3649941656942826, "grad_norm": 0.77734375, "learning_rate": 0.00017370928125135147, "loss": 0.0524, "step": 5067 }, { "epoch": 2.365460910151692, "grad_norm": 0.6796875, "learning_rate": 0.00017369936424258594, "loss": 0.062, "step": 5068 }, { "epoch": 2.3659276546091017, "grad_norm": 0.9140625, "learning_rate": 0.0001736894456469913, "loss": 0.0674, "step": 5069 }, { "epoch": 2.366394399066511, "grad_norm": 0.73046875, "learning_rate": 0.00017367952546478116, "loss": 0.0483, "step": 5070 }, { "epoch": 2.366861143523921, "grad_norm": 0.77734375, "learning_rate": 0.00017366960369616906, "loss": 0.066, "step": 5071 }, { "epoch": 2.36732788798133, "grad_norm": 0.96484375, "learning_rate": 0.0001736596803413687, "loss": 0.0806, "step": 5072 }, { "epoch": 2.3677946324387396, "grad_norm": 0.87109375, "learning_rate": 0.00017364975540059362, "loss": 0.0697, "step": 5073 }, { "epoch": 2.3682613768961494, "grad_norm": 0.875, "learning_rate": 0.00017363982887405764, "loss": 0.0699, "step": 5074 }, { "epoch": 2.368728121353559, "grad_norm": 0.80859375, "learning_rate": 0.00017362990076197444, "loss": 0.078, "step": 5075 }, { "epoch": 2.3691948658109685, "grad_norm": 0.88671875, "learning_rate": 0.00017361997106455775, "loss": 0.0772, "step": 5076 }, { "epoch": 2.369661610268378, "grad_norm": 0.75, "learning_rate": 0.00017361003978202144, "loss": 0.0547, "step": 5077 }, { "epoch": 2.3701283547257876, "grad_norm": 1.09375, "learning_rate": 0.00017360010691457929, "loss": 0.0753, "step": 5078 }, { "epoch": 2.3705950991831974, "grad_norm": 1.03125, "learning_rate": 0.00017359017246244517, "loss": 0.0772, "step": 5079 }, { "epoch": 2.371061843640607, "grad_norm": 1.109375, "learning_rate": 0.000173580236425833, "loss": 0.0711, "step": 5080 }, { "epoch": 2.371528588098016, "grad_norm": 0.97265625, "learning_rate": 0.00017357029880495667, "loss": 0.071, "step": 5081 }, { "epoch": 2.371995332555426, "grad_norm": 0.8515625, "learning_rate": 0.0001735603596000302, "loss": 0.0707, "step": 5082 }, { "epoch": 2.3724620770128353, "grad_norm": 0.86328125, "learning_rate": 0.00017355041881126758, "loss": 0.0621, "step": 5083 }, { "epoch": 2.372928821470245, "grad_norm": 0.92578125, "learning_rate": 0.00017354047643888283, "loss": 0.0743, "step": 5084 }, { "epoch": 2.3733955659276544, "grad_norm": 0.859375, "learning_rate": 0.00017353053248309004, "loss": 0.061, "step": 5085 }, { "epoch": 2.3738623103850642, "grad_norm": 1.0703125, "learning_rate": 0.00017352058694410327, "loss": 0.0734, "step": 5086 }, { "epoch": 2.3743290548424736, "grad_norm": 0.765625, "learning_rate": 0.00017351063982213672, "loss": 0.051, "step": 5087 }, { "epoch": 2.3747957992998834, "grad_norm": 0.83203125, "learning_rate": 0.0001735006911174045, "loss": 0.0562, "step": 5088 }, { "epoch": 2.3752625437572927, "grad_norm": 1.1640625, "learning_rate": 0.00017349074083012088, "loss": 0.0486, "step": 5089 }, { "epoch": 2.3757292882147025, "grad_norm": 0.69140625, "learning_rate": 0.00017348078896050007, "loss": 0.047, "step": 5090 }, { "epoch": 2.376196032672112, "grad_norm": 0.8359375, "learning_rate": 0.00017347083550875634, "loss": 0.0503, "step": 5091 }, { "epoch": 2.3766627771295217, "grad_norm": 0.9765625, "learning_rate": 0.00017346088047510397, "loss": 0.0653, "step": 5092 }, { "epoch": 2.377129521586931, "grad_norm": 0.953125, "learning_rate": 0.00017345092385975737, "loss": 0.0854, "step": 5093 }, { "epoch": 2.377596266044341, "grad_norm": 0.68359375, "learning_rate": 0.0001734409656629309, "loss": 0.0438, "step": 5094 }, { "epoch": 2.37806301050175, "grad_norm": 0.80859375, "learning_rate": 0.00017343100588483896, "loss": 0.0605, "step": 5095 }, { "epoch": 2.37852975495916, "grad_norm": 1.1171875, "learning_rate": 0.00017342104452569595, "loss": 0.0906, "step": 5096 }, { "epoch": 2.3789964994165693, "grad_norm": 0.8515625, "learning_rate": 0.0001734110815857164, "loss": 0.0755, "step": 5097 }, { "epoch": 2.379463243873979, "grad_norm": 0.828125, "learning_rate": 0.00017340111706511482, "loss": 0.0733, "step": 5098 }, { "epoch": 2.3799299883313885, "grad_norm": 0.7578125, "learning_rate": 0.00017339115096410575, "loss": 0.0433, "step": 5099 }, { "epoch": 2.3803967327887983, "grad_norm": 0.80859375, "learning_rate": 0.00017338118328290379, "loss": 0.0597, "step": 5100 }, { "epoch": 2.3808634772462076, "grad_norm": 0.96484375, "learning_rate": 0.00017337121402172348, "loss": 0.0605, "step": 5101 }, { "epoch": 2.3813302217036174, "grad_norm": 0.953125, "learning_rate": 0.00017336124318077957, "loss": 0.0667, "step": 5102 }, { "epoch": 2.3817969661610268, "grad_norm": 0.8359375, "learning_rate": 0.0001733512707602867, "loss": 0.063, "step": 5103 }, { "epoch": 2.3822637106184366, "grad_norm": 0.82421875, "learning_rate": 0.00017334129676045958, "loss": 0.0605, "step": 5104 }, { "epoch": 2.382730455075846, "grad_norm": 0.80078125, "learning_rate": 0.00017333132118151296, "loss": 0.0549, "step": 5105 }, { "epoch": 2.3831971995332557, "grad_norm": 1.0859375, "learning_rate": 0.00017332134402366165, "loss": 0.0773, "step": 5106 }, { "epoch": 2.383663943990665, "grad_norm": 1.0703125, "learning_rate": 0.00017331136528712042, "loss": 0.0809, "step": 5107 }, { "epoch": 2.384130688448075, "grad_norm": 0.8203125, "learning_rate": 0.00017330138497210416, "loss": 0.0544, "step": 5108 }, { "epoch": 2.384597432905484, "grad_norm": 0.796875, "learning_rate": 0.00017329140307882778, "loss": 0.0378, "step": 5109 }, { "epoch": 2.385064177362894, "grad_norm": 0.8203125, "learning_rate": 0.00017328141960750617, "loss": 0.0397, "step": 5110 }, { "epoch": 2.3855309218203034, "grad_norm": 0.9296875, "learning_rate": 0.0001732714345583543, "loss": 0.0594, "step": 5111 }, { "epoch": 2.3859976662777127, "grad_norm": 0.95703125, "learning_rate": 0.0001732614479315871, "loss": 0.0635, "step": 5112 }, { "epoch": 2.3864644107351225, "grad_norm": 0.77734375, "learning_rate": 0.00017325145972741965, "loss": 0.0561, "step": 5113 }, { "epoch": 2.3869311551925323, "grad_norm": 0.92578125, "learning_rate": 0.00017324146994606704, "loss": 0.0838, "step": 5114 }, { "epoch": 2.3873978996499416, "grad_norm": 1.0234375, "learning_rate": 0.00017323147858774429, "loss": 0.0708, "step": 5115 }, { "epoch": 2.387864644107351, "grad_norm": 0.87890625, "learning_rate": 0.00017322148565266656, "loss": 0.0626, "step": 5116 }, { "epoch": 2.388331388564761, "grad_norm": 0.94921875, "learning_rate": 0.000173211491141049, "loss": 0.0554, "step": 5117 }, { "epoch": 2.3887981330221706, "grad_norm": 0.8203125, "learning_rate": 0.00017320149505310682, "loss": 0.0612, "step": 5118 }, { "epoch": 2.38926487747958, "grad_norm": 0.75390625, "learning_rate": 0.0001731914973890552, "loss": 0.0426, "step": 5119 }, { "epoch": 2.3897316219369893, "grad_norm": 0.859375, "learning_rate": 0.00017318149814910945, "loss": 0.0415, "step": 5120 }, { "epoch": 2.390198366394399, "grad_norm": 0.94921875, "learning_rate": 0.00017317149733348485, "loss": 0.0777, "step": 5121 }, { "epoch": 2.390665110851809, "grad_norm": 0.9765625, "learning_rate": 0.00017316149494239674, "loss": 0.0652, "step": 5122 }, { "epoch": 2.3911318553092182, "grad_norm": 0.79296875, "learning_rate": 0.00017315149097606045, "loss": 0.0517, "step": 5123 }, { "epoch": 2.3915985997666276, "grad_norm": 0.88671875, "learning_rate": 0.00017314148543469141, "loss": 0.0641, "step": 5124 }, { "epoch": 2.3920653442240374, "grad_norm": 0.9453125, "learning_rate": 0.00017313147831850504, "loss": 0.0715, "step": 5125 }, { "epoch": 2.3925320886814467, "grad_norm": 0.87109375, "learning_rate": 0.00017312146962771677, "loss": 0.0625, "step": 5126 }, { "epoch": 2.3929988331388565, "grad_norm": 0.9140625, "learning_rate": 0.00017311145936254216, "loss": 0.0556, "step": 5127 }, { "epoch": 2.393465577596266, "grad_norm": 0.96875, "learning_rate": 0.00017310144752319672, "loss": 0.0652, "step": 5128 }, { "epoch": 2.3939323220536757, "grad_norm": 0.75390625, "learning_rate": 0.00017309143410989598, "loss": 0.0532, "step": 5129 }, { "epoch": 2.394399066511085, "grad_norm": 1.1796875, "learning_rate": 0.0001730814191228556, "loss": 0.0804, "step": 5130 }, { "epoch": 2.394865810968495, "grad_norm": 0.98828125, "learning_rate": 0.00017307140256229116, "loss": 0.0577, "step": 5131 }, { "epoch": 2.395332555425904, "grad_norm": 0.6953125, "learning_rate": 0.00017306138442841834, "loss": 0.0444, "step": 5132 }, { "epoch": 2.395799299883314, "grad_norm": 0.80859375, "learning_rate": 0.00017305136472145285, "loss": 0.053, "step": 5133 }, { "epoch": 2.3962660443407233, "grad_norm": 0.9375, "learning_rate": 0.00017304134344161044, "loss": 0.0567, "step": 5134 }, { "epoch": 2.396732788798133, "grad_norm": 0.78125, "learning_rate": 0.00017303132058910686, "loss": 0.0552, "step": 5135 }, { "epoch": 2.3971995332555425, "grad_norm": 0.828125, "learning_rate": 0.00017302129616415793, "loss": 0.0589, "step": 5136 }, { "epoch": 2.3976662777129523, "grad_norm": 0.87109375, "learning_rate": 0.00017301127016697943, "loss": 0.0658, "step": 5137 }, { "epoch": 2.3981330221703616, "grad_norm": 0.69140625, "learning_rate": 0.00017300124259778734, "loss": 0.0571, "step": 5138 }, { "epoch": 2.3985997666277714, "grad_norm": 1.015625, "learning_rate": 0.00017299121345679746, "loss": 0.0735, "step": 5139 }, { "epoch": 2.3990665110851808, "grad_norm": 0.87109375, "learning_rate": 0.00017298118274422576, "loss": 0.0606, "step": 5140 }, { "epoch": 2.3995332555425906, "grad_norm": 0.83984375, "learning_rate": 0.00017297115046028828, "loss": 0.052, "step": 5141 }, { "epoch": 2.4, "grad_norm": 1.0, "learning_rate": 0.00017296111660520088, "loss": 0.0621, "step": 5142 }, { "epoch": 2.4004667444574097, "grad_norm": 0.76953125, "learning_rate": 0.00017295108117917977, "loss": 0.0533, "step": 5143 }, { "epoch": 2.400933488914819, "grad_norm": 0.98828125, "learning_rate": 0.00017294104418244088, "loss": 0.0673, "step": 5144 }, { "epoch": 2.401400233372229, "grad_norm": 0.8125, "learning_rate": 0.00017293100561520042, "loss": 0.0688, "step": 5145 }, { "epoch": 2.401866977829638, "grad_norm": 0.75390625, "learning_rate": 0.0001729209654776745, "loss": 0.0574, "step": 5146 }, { "epoch": 2.402333722287048, "grad_norm": 0.96484375, "learning_rate": 0.00017291092377007923, "loss": 0.0686, "step": 5147 }, { "epoch": 2.4028004667444574, "grad_norm": 0.87109375, "learning_rate": 0.0001729008804926309, "loss": 0.0545, "step": 5148 }, { "epoch": 2.403267211201867, "grad_norm": 0.875, "learning_rate": 0.00017289083564554577, "loss": 0.0679, "step": 5149 }, { "epoch": 2.4037339556592765, "grad_norm": 0.9375, "learning_rate": 0.00017288078922904002, "loss": 0.057, "step": 5150 }, { "epoch": 2.4042007001166863, "grad_norm": 0.984375, "learning_rate": 0.00017287074124333005, "loss": 0.0794, "step": 5151 }, { "epoch": 2.4046674445740956, "grad_norm": 0.99609375, "learning_rate": 0.00017286069168863215, "loss": 0.0641, "step": 5152 }, { "epoch": 2.4051341890315054, "grad_norm": 0.921875, "learning_rate": 0.00017285064056516273, "loss": 0.0615, "step": 5153 }, { "epoch": 2.405600933488915, "grad_norm": 0.7890625, "learning_rate": 0.0001728405878731382, "loss": 0.048, "step": 5154 }, { "epoch": 2.406067677946324, "grad_norm": 1.0234375, "learning_rate": 0.00017283053361277494, "loss": 0.0531, "step": 5155 }, { "epoch": 2.406534422403734, "grad_norm": 1.0078125, "learning_rate": 0.00017282047778428952, "loss": 0.1063, "step": 5156 }, { "epoch": 2.4070011668611437, "grad_norm": 1.2578125, "learning_rate": 0.00017281042038789842, "loss": 0.0697, "step": 5157 }, { "epoch": 2.407467911318553, "grad_norm": 0.90234375, "learning_rate": 0.0001728003614238182, "loss": 0.0538, "step": 5158 }, { "epoch": 2.4079346557759624, "grad_norm": 1.03125, "learning_rate": 0.00017279030089226543, "loss": 0.0786, "step": 5159 }, { "epoch": 2.4084014002333722, "grad_norm": 0.84765625, "learning_rate": 0.00017278023879345667, "loss": 0.06, "step": 5160 }, { "epoch": 2.408868144690782, "grad_norm": 0.80078125, "learning_rate": 0.00017277017512760864, "loss": 0.0492, "step": 5161 }, { "epoch": 2.4093348891481914, "grad_norm": 0.87109375, "learning_rate": 0.00017276010989493803, "loss": 0.06, "step": 5162 }, { "epoch": 2.4098016336056007, "grad_norm": 0.9375, "learning_rate": 0.0001727500430956615, "loss": 0.0706, "step": 5163 }, { "epoch": 2.4102683780630105, "grad_norm": 0.8359375, "learning_rate": 0.00017273997472999581, "loss": 0.0553, "step": 5164 }, { "epoch": 2.41073512252042, "grad_norm": 0.80859375, "learning_rate": 0.00017272990479815778, "loss": 0.0594, "step": 5165 }, { "epoch": 2.4112018669778297, "grad_norm": 1.0546875, "learning_rate": 0.0001727198333003642, "loss": 0.077, "step": 5166 }, { "epoch": 2.411668611435239, "grad_norm": 1.2265625, "learning_rate": 0.00017270976023683193, "loss": 0.0717, "step": 5167 }, { "epoch": 2.412135355892649, "grad_norm": 0.8984375, "learning_rate": 0.00017269968560777783, "loss": 0.0601, "step": 5168 }, { "epoch": 2.412602100350058, "grad_norm": 0.88671875, "learning_rate": 0.00017268960941341886, "loss": 0.0523, "step": 5169 }, { "epoch": 2.413068844807468, "grad_norm": 1.015625, "learning_rate": 0.00017267953165397195, "loss": 0.0774, "step": 5170 }, { "epoch": 2.4135355892648773, "grad_norm": 0.7734375, "learning_rate": 0.00017266945232965406, "loss": 0.0443, "step": 5171 }, { "epoch": 2.414002333722287, "grad_norm": 0.953125, "learning_rate": 0.00017265937144068225, "loss": 0.0898, "step": 5172 }, { "epoch": 2.4144690781796965, "grad_norm": 0.859375, "learning_rate": 0.00017264928898727354, "loss": 0.0599, "step": 5173 }, { "epoch": 2.4149358226371063, "grad_norm": 1.0390625, "learning_rate": 0.00017263920496964506, "loss": 0.0671, "step": 5174 }, { "epoch": 2.4154025670945156, "grad_norm": 1.046875, "learning_rate": 0.00017262911938801387, "loss": 0.0822, "step": 5175 }, { "epoch": 2.4158693115519254, "grad_norm": 0.88671875, "learning_rate": 0.00017261903224259718, "loss": 0.0729, "step": 5176 }, { "epoch": 2.4163360560093348, "grad_norm": 0.9765625, "learning_rate": 0.00017260894353361215, "loss": 0.0586, "step": 5177 }, { "epoch": 2.4168028004667446, "grad_norm": 0.80078125, "learning_rate": 0.000172598853261276, "loss": 0.0551, "step": 5178 }, { "epoch": 2.417269544924154, "grad_norm": 0.828125, "learning_rate": 0.00017258876142580597, "loss": 0.0615, "step": 5179 }, { "epoch": 2.4177362893815637, "grad_norm": 0.8828125, "learning_rate": 0.00017257866802741943, "loss": 0.0617, "step": 5180 }, { "epoch": 2.418203033838973, "grad_norm": 0.90234375, "learning_rate": 0.00017256857306633357, "loss": 0.0567, "step": 5181 }, { "epoch": 2.418669778296383, "grad_norm": 0.8046875, "learning_rate": 0.00017255847654276584, "loss": 0.0519, "step": 5182 }, { "epoch": 2.419136522753792, "grad_norm": 0.77734375, "learning_rate": 0.0001725483784569336, "loss": 0.0684, "step": 5183 }, { "epoch": 2.419603267211202, "grad_norm": 0.80078125, "learning_rate": 0.0001725382788090543, "loss": 0.05, "step": 5184 }, { "epoch": 2.4200700116686114, "grad_norm": 0.94140625, "learning_rate": 0.00017252817759934536, "loss": 0.074, "step": 5185 }, { "epoch": 2.420536756126021, "grad_norm": 0.6015625, "learning_rate": 0.0001725180748280243, "loss": 0.0489, "step": 5186 }, { "epoch": 2.4210035005834305, "grad_norm": 0.7578125, "learning_rate": 0.00017250797049530862, "loss": 0.0472, "step": 5187 }, { "epoch": 2.4214702450408403, "grad_norm": 1.140625, "learning_rate": 0.00017249786460141588, "loss": 0.0988, "step": 5188 }, { "epoch": 2.4219369894982496, "grad_norm": 0.78515625, "learning_rate": 0.00017248775714656366, "loss": 0.057, "step": 5189 }, { "epoch": 2.4224037339556594, "grad_norm": 0.92578125, "learning_rate": 0.00017247764813096964, "loss": 0.0608, "step": 5190 }, { "epoch": 2.422870478413069, "grad_norm": 0.97265625, "learning_rate": 0.00017246753755485142, "loss": 0.0675, "step": 5191 }, { "epoch": 2.4233372228704786, "grad_norm": 0.71875, "learning_rate": 0.00017245742541842672, "loss": 0.0563, "step": 5192 }, { "epoch": 2.423803967327888, "grad_norm": 0.94921875, "learning_rate": 0.00017244731172191325, "loss": 0.0689, "step": 5193 }, { "epoch": 2.4242707117852977, "grad_norm": 0.79296875, "learning_rate": 0.00017243719646552877, "loss": 0.0448, "step": 5194 }, { "epoch": 2.424737456242707, "grad_norm": 0.96875, "learning_rate": 0.00017242707964949108, "loss": 0.0676, "step": 5195 }, { "epoch": 2.425204200700117, "grad_norm": 1.2109375, "learning_rate": 0.00017241696127401804, "loss": 0.0757, "step": 5196 }, { "epoch": 2.4256709451575262, "grad_norm": 0.953125, "learning_rate": 0.00017240684133932742, "loss": 0.0551, "step": 5197 }, { "epoch": 2.4261376896149356, "grad_norm": 1.03125, "learning_rate": 0.00017239671984563718, "loss": 0.0635, "step": 5198 }, { "epoch": 2.4266044340723454, "grad_norm": 1.015625, "learning_rate": 0.00017238659679316527, "loss": 0.0745, "step": 5199 }, { "epoch": 2.427071178529755, "grad_norm": 1.0234375, "learning_rate": 0.00017237647218212962, "loss": 0.0703, "step": 5200 }, { "epoch": 2.4275379229871645, "grad_norm": 0.90625, "learning_rate": 0.00017236634601274818, "loss": 0.0778, "step": 5201 }, { "epoch": 2.428004667444574, "grad_norm": 1.046875, "learning_rate": 0.00017235621828523903, "loss": 0.0621, "step": 5202 }, { "epoch": 2.4284714119019837, "grad_norm": 0.89453125, "learning_rate": 0.00017234608899982024, "loss": 0.0669, "step": 5203 }, { "epoch": 2.4289381563593935, "grad_norm": 0.70703125, "learning_rate": 0.0001723359581567099, "loss": 0.0411, "step": 5204 }, { "epoch": 2.429404900816803, "grad_norm": 0.94921875, "learning_rate": 0.00017232582575612608, "loss": 0.0672, "step": 5205 }, { "epoch": 2.429871645274212, "grad_norm": 0.9140625, "learning_rate": 0.000172315691798287, "loss": 0.0751, "step": 5206 }, { "epoch": 2.430338389731622, "grad_norm": 0.9453125, "learning_rate": 0.00017230555628341084, "loss": 0.0859, "step": 5207 }, { "epoch": 2.4308051341890313, "grad_norm": 0.8359375, "learning_rate": 0.00017229541921171584, "loss": 0.0555, "step": 5208 }, { "epoch": 2.431271878646441, "grad_norm": 0.734375, "learning_rate": 0.00017228528058342026, "loss": 0.0553, "step": 5209 }, { "epoch": 2.4317386231038505, "grad_norm": 1.1015625, "learning_rate": 0.00017227514039874238, "loss": 0.0888, "step": 5210 }, { "epoch": 2.4322053675612603, "grad_norm": 0.96875, "learning_rate": 0.00017226499865790048, "loss": 0.0775, "step": 5211 }, { "epoch": 2.4326721120186696, "grad_norm": 1.1640625, "learning_rate": 0.00017225485536111304, "loss": 0.0569, "step": 5212 }, { "epoch": 2.4331388564760794, "grad_norm": 0.7890625, "learning_rate": 0.00017224471050859838, "loss": 0.0583, "step": 5213 }, { "epoch": 2.4336056009334888, "grad_norm": 0.84375, "learning_rate": 0.00017223456410057496, "loss": 0.0659, "step": 5214 }, { "epoch": 2.4340723453908986, "grad_norm": 0.9375, "learning_rate": 0.00017222441613726122, "loss": 0.0598, "step": 5215 }, { "epoch": 2.434539089848308, "grad_norm": 0.71875, "learning_rate": 0.00017221426661887565, "loss": 0.043, "step": 5216 }, { "epoch": 2.4350058343057177, "grad_norm": 0.7109375, "learning_rate": 0.00017220411554563683, "loss": 0.0484, "step": 5217 }, { "epoch": 2.435472578763127, "grad_norm": 0.7890625, "learning_rate": 0.00017219396291776328, "loss": 0.0558, "step": 5218 }, { "epoch": 2.435939323220537, "grad_norm": 0.83984375, "learning_rate": 0.0001721838087354736, "loss": 0.0521, "step": 5219 }, { "epoch": 2.436406067677946, "grad_norm": 0.8203125, "learning_rate": 0.0001721736529989864, "loss": 0.0544, "step": 5220 }, { "epoch": 2.436872812135356, "grad_norm": 0.9453125, "learning_rate": 0.0001721634957085204, "loss": 0.0556, "step": 5221 }, { "epoch": 2.4373395565927654, "grad_norm": 0.84375, "learning_rate": 0.0001721533368642943, "loss": 0.0515, "step": 5222 }, { "epoch": 2.437806301050175, "grad_norm": 0.96484375, "learning_rate": 0.00017214317646652676, "loss": 0.0671, "step": 5223 }, { "epoch": 2.4382730455075845, "grad_norm": 0.90234375, "learning_rate": 0.0001721330145154366, "loss": 0.0594, "step": 5224 }, { "epoch": 2.4387397899649943, "grad_norm": 0.80078125, "learning_rate": 0.00017212285101124257, "loss": 0.0571, "step": 5225 }, { "epoch": 2.4392065344224036, "grad_norm": 0.75390625, "learning_rate": 0.00017211268595416362, "loss": 0.0404, "step": 5226 }, { "epoch": 2.4396732788798134, "grad_norm": 0.86328125, "learning_rate": 0.00017210251934441845, "loss": 0.0658, "step": 5227 }, { "epoch": 2.440140023337223, "grad_norm": 1.0859375, "learning_rate": 0.00017209235118222606, "loss": 0.0516, "step": 5228 }, { "epoch": 2.4406067677946326, "grad_norm": 1.03125, "learning_rate": 0.0001720821814678054, "loss": 0.0503, "step": 5229 }, { "epoch": 2.441073512252042, "grad_norm": 1.0234375, "learning_rate": 0.00017207201020137536, "loss": 0.0556, "step": 5230 }, { "epoch": 2.4415402567094517, "grad_norm": 1.140625, "learning_rate": 0.00017206183738315497, "loss": 0.0598, "step": 5231 }, { "epoch": 2.442007001166861, "grad_norm": 0.78515625, "learning_rate": 0.00017205166301336328, "loss": 0.0403, "step": 5232 }, { "epoch": 2.442473745624271, "grad_norm": 0.84765625, "learning_rate": 0.00017204148709221934, "loss": 0.0492, "step": 5233 }, { "epoch": 2.4429404900816802, "grad_norm": 1.0859375, "learning_rate": 0.00017203130961994224, "loss": 0.0676, "step": 5234 }, { "epoch": 2.44340723453909, "grad_norm": 0.8359375, "learning_rate": 0.00017202113059675114, "loss": 0.075, "step": 5235 }, { "epoch": 2.4438739789964994, "grad_norm": 0.6875, "learning_rate": 0.00017201095002286517, "loss": 0.0471, "step": 5236 }, { "epoch": 2.4443407234539087, "grad_norm": 0.85546875, "learning_rate": 0.00017200076789850358, "loss": 0.0461, "step": 5237 }, { "epoch": 2.4448074679113185, "grad_norm": 1.03125, "learning_rate": 0.00017199058422388556, "loss": 0.072, "step": 5238 }, { "epoch": 2.4452742123687283, "grad_norm": 0.859375, "learning_rate": 0.0001719803989992304, "loss": 0.0578, "step": 5239 }, { "epoch": 2.4457409568261377, "grad_norm": 0.953125, "learning_rate": 0.00017197021222475735, "loss": 0.0646, "step": 5240 }, { "epoch": 2.446207701283547, "grad_norm": 0.8671875, "learning_rate": 0.00017196002390068582, "loss": 0.0614, "step": 5241 }, { "epoch": 2.446674445740957, "grad_norm": 0.91796875, "learning_rate": 0.0001719498340272351, "loss": 0.0543, "step": 5242 }, { "epoch": 2.4471411901983666, "grad_norm": 1.03125, "learning_rate": 0.00017193964260462462, "loss": 0.0681, "step": 5243 }, { "epoch": 2.447607934655776, "grad_norm": 0.97265625, "learning_rate": 0.00017192944963307386, "loss": 0.0637, "step": 5244 }, { "epoch": 2.4480746791131853, "grad_norm": 0.94921875, "learning_rate": 0.0001719192551128022, "loss": 0.0578, "step": 5245 }, { "epoch": 2.448541423570595, "grad_norm": 0.9453125, "learning_rate": 0.0001719090590440292, "loss": 0.0662, "step": 5246 }, { "epoch": 2.449008168028005, "grad_norm": 0.9921875, "learning_rate": 0.00017189886142697435, "loss": 0.0578, "step": 5247 }, { "epoch": 2.4494749124854143, "grad_norm": 1.1328125, "learning_rate": 0.00017188866226185726, "loss": 0.0735, "step": 5248 }, { "epoch": 2.4499416569428236, "grad_norm": 1.0390625, "learning_rate": 0.00017187846154889753, "loss": 0.0614, "step": 5249 }, { "epoch": 2.4504084014002334, "grad_norm": 0.90234375, "learning_rate": 0.00017186825928831474, "loss": 0.0793, "step": 5250 }, { "epoch": 2.4508751458576428, "grad_norm": 0.88671875, "learning_rate": 0.0001718580554803286, "loss": 0.0592, "step": 5251 }, { "epoch": 2.4513418903150526, "grad_norm": 0.67578125, "learning_rate": 0.0001718478501251588, "loss": 0.047, "step": 5252 }, { "epoch": 2.451808634772462, "grad_norm": 0.80078125, "learning_rate": 0.00017183764322302507, "loss": 0.059, "step": 5253 }, { "epoch": 2.4522753792298717, "grad_norm": 0.9375, "learning_rate": 0.00017182743477414718, "loss": 0.0442, "step": 5254 }, { "epoch": 2.452742123687281, "grad_norm": 0.85546875, "learning_rate": 0.0001718172247787449, "loss": 0.0611, "step": 5255 }, { "epoch": 2.453208868144691, "grad_norm": 0.9609375, "learning_rate": 0.00017180701323703812, "loss": 0.0488, "step": 5256 }, { "epoch": 2.4536756126021, "grad_norm": 0.8828125, "learning_rate": 0.00017179680014924666, "loss": 0.0557, "step": 5257 }, { "epoch": 2.45414235705951, "grad_norm": 1.3125, "learning_rate": 0.00017178658551559044, "loss": 0.0713, "step": 5258 }, { "epoch": 2.4546091015169194, "grad_norm": 1.046875, "learning_rate": 0.00017177636933628937, "loss": 0.0774, "step": 5259 }, { "epoch": 2.455075845974329, "grad_norm": 0.67578125, "learning_rate": 0.00017176615161156345, "loss": 0.0461, "step": 5260 }, { "epoch": 2.4555425904317385, "grad_norm": 0.9296875, "learning_rate": 0.00017175593234163266, "loss": 0.0747, "step": 5261 }, { "epoch": 2.4560093348891483, "grad_norm": 0.8515625, "learning_rate": 0.00017174571152671702, "loss": 0.0515, "step": 5262 }, { "epoch": 2.4564760793465577, "grad_norm": 0.99609375, "learning_rate": 0.00017173548916703658, "loss": 0.0812, "step": 5263 }, { "epoch": 2.4569428238039674, "grad_norm": 0.86328125, "learning_rate": 0.0001717252652628115, "loss": 0.0336, "step": 5264 }, { "epoch": 2.457409568261377, "grad_norm": 0.76171875, "learning_rate": 0.00017171503981426188, "loss": 0.0541, "step": 5265 }, { "epoch": 2.4578763127187866, "grad_norm": 1.0390625, "learning_rate": 0.00017170481282160788, "loss": 0.066, "step": 5266 }, { "epoch": 2.458343057176196, "grad_norm": 0.57421875, "learning_rate": 0.00017169458428506972, "loss": 0.0436, "step": 5267 }, { "epoch": 2.4588098016336057, "grad_norm": 0.94140625, "learning_rate": 0.00017168435420486758, "loss": 0.065, "step": 5268 }, { "epoch": 2.459276546091015, "grad_norm": 0.88671875, "learning_rate": 0.00017167412258122178, "loss": 0.072, "step": 5269 }, { "epoch": 2.459743290548425, "grad_norm": 0.80859375, "learning_rate": 0.0001716638894143526, "loss": 0.0574, "step": 5270 }, { "epoch": 2.4602100350058342, "grad_norm": 0.98046875, "learning_rate": 0.00017165365470448035, "loss": 0.0639, "step": 5271 }, { "epoch": 2.460676779463244, "grad_norm": 0.93359375, "learning_rate": 0.0001716434184518254, "loss": 0.0708, "step": 5272 }, { "epoch": 2.4611435239206534, "grad_norm": 0.91796875, "learning_rate": 0.00017163318065660822, "loss": 0.0587, "step": 5273 }, { "epoch": 2.461610268378063, "grad_norm": 0.96875, "learning_rate": 0.00017162294131904912, "loss": 0.0595, "step": 5274 }, { "epoch": 2.4620770128354725, "grad_norm": 0.98828125, "learning_rate": 0.00017161270043936867, "loss": 0.074, "step": 5275 }, { "epoch": 2.4625437572928823, "grad_norm": 0.79296875, "learning_rate": 0.0001716024580177873, "loss": 0.0533, "step": 5276 }, { "epoch": 2.4630105017502917, "grad_norm": 0.9140625, "learning_rate": 0.00017159221405452555, "loss": 0.0574, "step": 5277 }, { "epoch": 2.4634772462077015, "grad_norm": 0.82421875, "learning_rate": 0.00017158196854980405, "loss": 0.0439, "step": 5278 }, { "epoch": 2.463943990665111, "grad_norm": 0.875, "learning_rate": 0.0001715717215038433, "loss": 0.0696, "step": 5279 }, { "epoch": 2.46441073512252, "grad_norm": 0.90234375, "learning_rate": 0.000171561472916864, "loss": 0.0699, "step": 5280 }, { "epoch": 2.46487747957993, "grad_norm": 1.0390625, "learning_rate": 0.00017155122278908675, "loss": 0.0797, "step": 5281 }, { "epoch": 2.4653442240373398, "grad_norm": 0.78125, "learning_rate": 0.0001715409711207323, "loss": 0.0491, "step": 5282 }, { "epoch": 2.465810968494749, "grad_norm": 0.75390625, "learning_rate": 0.0001715307179120214, "loss": 0.0368, "step": 5283 }, { "epoch": 2.4662777129521585, "grad_norm": 0.8671875, "learning_rate": 0.00017152046316317476, "loss": 0.0576, "step": 5284 }, { "epoch": 2.4667444574095683, "grad_norm": 0.80078125, "learning_rate": 0.0001715102068744132, "loss": 0.0485, "step": 5285 }, { "epoch": 2.467211201866978, "grad_norm": 0.97265625, "learning_rate": 0.00017149994904595755, "loss": 0.0546, "step": 5286 }, { "epoch": 2.4676779463243874, "grad_norm": 1.15625, "learning_rate": 0.00017148968967802862, "loss": 0.0798, "step": 5287 }, { "epoch": 2.4681446907817968, "grad_norm": 0.9375, "learning_rate": 0.0001714794287708474, "loss": 0.0587, "step": 5288 }, { "epoch": 2.4686114352392066, "grad_norm": 0.9140625, "learning_rate": 0.00017146916632463475, "loss": 0.0747, "step": 5289 }, { "epoch": 2.469078179696616, "grad_norm": 1.1640625, "learning_rate": 0.00017145890233961165, "loss": 0.0692, "step": 5290 }, { "epoch": 2.4695449241540257, "grad_norm": 0.75390625, "learning_rate": 0.00017144863681599915, "loss": 0.0383, "step": 5291 }, { "epoch": 2.470011668611435, "grad_norm": 0.83203125, "learning_rate": 0.00017143836975401814, "loss": 0.0607, "step": 5292 }, { "epoch": 2.470478413068845, "grad_norm": 1.0, "learning_rate": 0.00017142810115388984, "loss": 0.0635, "step": 5293 }, { "epoch": 2.470945157526254, "grad_norm": 0.90625, "learning_rate": 0.00017141783101583524, "loss": 0.0599, "step": 5294 }, { "epoch": 2.471411901983664, "grad_norm": 0.8125, "learning_rate": 0.0001714075593400755, "loss": 0.0519, "step": 5295 }, { "epoch": 2.4718786464410734, "grad_norm": 0.921875, "learning_rate": 0.00017139728612683178, "loss": 0.0519, "step": 5296 }, { "epoch": 2.472345390898483, "grad_norm": 0.71875, "learning_rate": 0.00017138701137632526, "loss": 0.0421, "step": 5297 }, { "epoch": 2.4728121353558925, "grad_norm": 1.015625, "learning_rate": 0.0001713767350887772, "loss": 0.0489, "step": 5298 }, { "epoch": 2.4732788798133023, "grad_norm": 0.92578125, "learning_rate": 0.00017136645726440882, "loss": 0.0646, "step": 5299 }, { "epoch": 2.4737456242707117, "grad_norm": 0.97265625, "learning_rate": 0.00017135617790344144, "loss": 0.0674, "step": 5300 }, { "epoch": 2.4742123687281214, "grad_norm": 0.83203125, "learning_rate": 0.00017134589700609637, "loss": 0.049, "step": 5301 }, { "epoch": 2.474679113185531, "grad_norm": 0.88671875, "learning_rate": 0.00017133561457259502, "loss": 0.0606, "step": 5302 }, { "epoch": 2.4751458576429406, "grad_norm": 1.0390625, "learning_rate": 0.0001713253306031587, "loss": 0.0674, "step": 5303 }, { "epoch": 2.47561260210035, "grad_norm": 0.875, "learning_rate": 0.0001713150450980089, "loss": 0.0571, "step": 5304 }, { "epoch": 2.4760793465577597, "grad_norm": 0.91015625, "learning_rate": 0.00017130475805736704, "loss": 0.0726, "step": 5305 }, { "epoch": 2.476546091015169, "grad_norm": 0.91015625, "learning_rate": 0.0001712944694814546, "loss": 0.0803, "step": 5306 }, { "epoch": 2.477012835472579, "grad_norm": 0.953125, "learning_rate": 0.00017128417937049316, "loss": 0.0617, "step": 5307 }, { "epoch": 2.4774795799299882, "grad_norm": 0.87109375, "learning_rate": 0.00017127388772470426, "loss": 0.066, "step": 5308 }, { "epoch": 2.477946324387398, "grad_norm": 0.83203125, "learning_rate": 0.00017126359454430946, "loss": 0.0775, "step": 5309 }, { "epoch": 2.4784130688448074, "grad_norm": 1.109375, "learning_rate": 0.00017125329982953038, "loss": 0.1242, "step": 5310 }, { "epoch": 2.478879813302217, "grad_norm": 0.86328125, "learning_rate": 0.00017124300358058875, "loss": 0.0517, "step": 5311 }, { "epoch": 2.4793465577596265, "grad_norm": 0.80859375, "learning_rate": 0.00017123270579770617, "loss": 0.0606, "step": 5312 }, { "epoch": 2.4798133022170363, "grad_norm": 0.9765625, "learning_rate": 0.0001712224064811044, "loss": 0.0798, "step": 5313 }, { "epoch": 2.4802800466744457, "grad_norm": 0.90234375, "learning_rate": 0.0001712121056310052, "loss": 0.0731, "step": 5314 }, { "epoch": 2.4807467911318555, "grad_norm": 0.8125, "learning_rate": 0.00017120180324763036, "loss": 0.0734, "step": 5315 }, { "epoch": 2.481213535589265, "grad_norm": 0.78125, "learning_rate": 0.0001711914993312017, "loss": 0.0571, "step": 5316 }, { "epoch": 2.4816802800466746, "grad_norm": 0.80078125, "learning_rate": 0.00017118119388194103, "loss": 0.0629, "step": 5317 }, { "epoch": 2.482147024504084, "grad_norm": 0.8828125, "learning_rate": 0.00017117088690007031, "loss": 0.0505, "step": 5318 }, { "epoch": 2.4826137689614938, "grad_norm": 0.8828125, "learning_rate": 0.00017116057838581145, "loss": 0.0716, "step": 5319 }, { "epoch": 2.483080513418903, "grad_norm": 0.9765625, "learning_rate": 0.00017115026833938637, "loss": 0.08, "step": 5320 }, { "epoch": 2.483547257876313, "grad_norm": 0.765625, "learning_rate": 0.00017113995676101707, "loss": 0.0535, "step": 5321 }, { "epoch": 2.4840140023337223, "grad_norm": 0.84765625, "learning_rate": 0.00017112964365092553, "loss": 0.0641, "step": 5322 }, { "epoch": 2.4844807467911316, "grad_norm": 0.73046875, "learning_rate": 0.0001711193290093339, "loss": 0.0483, "step": 5323 }, { "epoch": 2.4849474912485414, "grad_norm": 0.64453125, "learning_rate": 0.0001711090128364642, "loss": 0.0472, "step": 5324 }, { "epoch": 2.485414235705951, "grad_norm": 0.87109375, "learning_rate": 0.00017109869513253853, "loss": 0.0444, "step": 5325 }, { "epoch": 2.4858809801633606, "grad_norm": 0.90625, "learning_rate": 0.0001710883758977791, "loss": 0.0615, "step": 5326 }, { "epoch": 2.48634772462077, "grad_norm": 0.86328125, "learning_rate": 0.00017107805513240802, "loss": 0.0579, "step": 5327 }, { "epoch": 2.4868144690781797, "grad_norm": 0.85546875, "learning_rate": 0.00017106773283664757, "loss": 0.0578, "step": 5328 }, { "epoch": 2.4872812135355895, "grad_norm": 1.0546875, "learning_rate": 0.00017105740901071997, "loss": 0.0749, "step": 5329 }, { "epoch": 2.487747957992999, "grad_norm": 0.90625, "learning_rate": 0.00017104708365484755, "loss": 0.0622, "step": 5330 }, { "epoch": 2.488214702450408, "grad_norm": 0.80859375, "learning_rate": 0.00017103675676925258, "loss": 0.0419, "step": 5331 }, { "epoch": 2.488681446907818, "grad_norm": 0.83203125, "learning_rate": 0.0001710264283541574, "loss": 0.078, "step": 5332 }, { "epoch": 2.4891481913652274, "grad_norm": 1.0, "learning_rate": 0.00017101609840978443, "loss": 0.0649, "step": 5333 }, { "epoch": 2.489614935822637, "grad_norm": 0.87109375, "learning_rate": 0.00017100576693635606, "loss": 0.0548, "step": 5334 }, { "epoch": 2.4900816802800465, "grad_norm": 1.203125, "learning_rate": 0.00017099543393409475, "loss": 0.0763, "step": 5335 }, { "epoch": 2.4905484247374563, "grad_norm": 1.1171875, "learning_rate": 0.00017098509940322295, "loss": 0.077, "step": 5336 }, { "epoch": 2.4910151691948657, "grad_norm": 0.796875, "learning_rate": 0.00017097476334396324, "loss": 0.051, "step": 5337 }, { "epoch": 2.4914819136522754, "grad_norm": 1.390625, "learning_rate": 0.0001709644257565381, "loss": 0.0761, "step": 5338 }, { "epoch": 2.491948658109685, "grad_norm": 0.8828125, "learning_rate": 0.00017095408664117017, "loss": 0.0751, "step": 5339 }, { "epoch": 2.4924154025670946, "grad_norm": 0.84765625, "learning_rate": 0.00017094374599808197, "loss": 0.0466, "step": 5340 }, { "epoch": 2.492882147024504, "grad_norm": 0.92578125, "learning_rate": 0.00017093340382749625, "loss": 0.0616, "step": 5341 }, { "epoch": 2.4933488914819137, "grad_norm": 0.8515625, "learning_rate": 0.00017092306012963566, "loss": 0.0534, "step": 5342 }, { "epoch": 2.493815635939323, "grad_norm": 0.8125, "learning_rate": 0.00017091271490472286, "loss": 0.0509, "step": 5343 }, { "epoch": 2.494282380396733, "grad_norm": 0.8828125, "learning_rate": 0.0001709023681529806, "loss": 0.0704, "step": 5344 }, { "epoch": 2.4947491248541422, "grad_norm": 0.84765625, "learning_rate": 0.00017089201987463177, "loss": 0.0707, "step": 5345 }, { "epoch": 2.495215869311552, "grad_norm": 0.8984375, "learning_rate": 0.00017088167006989907, "loss": 0.0777, "step": 5346 }, { "epoch": 2.4956826137689614, "grad_norm": 0.78515625, "learning_rate": 0.00017087131873900533, "loss": 0.0589, "step": 5347 }, { "epoch": 2.496149358226371, "grad_norm": 1.0078125, "learning_rate": 0.00017086096588217348, "loss": 0.0703, "step": 5348 }, { "epoch": 2.4966161026837805, "grad_norm": 1.015625, "learning_rate": 0.0001708506114996264, "loss": 0.0638, "step": 5349 }, { "epoch": 2.4970828471411903, "grad_norm": 0.9765625, "learning_rate": 0.00017084025559158708, "loss": 0.0695, "step": 5350 }, { "epoch": 2.4975495915985997, "grad_norm": 0.8203125, "learning_rate": 0.00017082989815827846, "loss": 0.0515, "step": 5351 }, { "epoch": 2.4980163360560095, "grad_norm": 0.93359375, "learning_rate": 0.0001708195391999235, "loss": 0.0563, "step": 5352 }, { "epoch": 2.498483080513419, "grad_norm": 0.92578125, "learning_rate": 0.0001708091787167453, "loss": 0.0621, "step": 5353 }, { "epoch": 2.4989498249708286, "grad_norm": 0.88671875, "learning_rate": 0.00017079881670896695, "loss": 0.0546, "step": 5354 }, { "epoch": 2.499416569428238, "grad_norm": 0.953125, "learning_rate": 0.0001707884531768115, "loss": 0.0555, "step": 5355 }, { "epoch": 2.4998833138856478, "grad_norm": 0.66015625, "learning_rate": 0.00017077808812050212, "loss": 0.0513, "step": 5356 }, { "epoch": 2.500350058343057, "grad_norm": 0.7578125, "learning_rate": 0.00017076772154026197, "loss": 0.0603, "step": 5357 }, { "epoch": 2.5008168028004665, "grad_norm": 0.84765625, "learning_rate": 0.00017075735343631426, "loss": 0.0488, "step": 5358 }, { "epoch": 2.5012835472578763, "grad_norm": 0.890625, "learning_rate": 0.0001707469838088822, "loss": 0.0732, "step": 5359 }, { "epoch": 2.501750291715286, "grad_norm": 0.91015625, "learning_rate": 0.00017073661265818913, "loss": 0.0669, "step": 5360 }, { "epoch": 2.501750291715286, "eval_loss": 1.6183894872665405, "eval_runtime": 95.4633, "eval_samples_per_second": 18.897, "eval_steps_per_second": 2.367, "step": 5360 }, { "epoch": 2.5022170361726954, "grad_norm": 0.87109375, "learning_rate": 0.00017072623998445828, "loss": 0.0597, "step": 5361 }, { "epoch": 2.5026837806301048, "grad_norm": 0.94140625, "learning_rate": 0.000170715865787913, "loss": 0.0476, "step": 5362 }, { "epoch": 2.5031505250875146, "grad_norm": 1.015625, "learning_rate": 0.00017070549006877668, "loss": 0.0541, "step": 5363 }, { "epoch": 2.5036172695449244, "grad_norm": 1.1171875, "learning_rate": 0.0001706951128272727, "loss": 0.0869, "step": 5364 }, { "epoch": 2.5040840140023337, "grad_norm": 0.91015625, "learning_rate": 0.00017068473406362452, "loss": 0.0514, "step": 5365 }, { "epoch": 2.504550758459743, "grad_norm": 0.85546875, "learning_rate": 0.0001706743537780556, "loss": 0.0578, "step": 5366 }, { "epoch": 2.505017502917153, "grad_norm": 0.94140625, "learning_rate": 0.00017066397197078938, "loss": 0.0502, "step": 5367 }, { "epoch": 2.5054842473745627, "grad_norm": 0.92578125, "learning_rate": 0.0001706535886420495, "loss": 0.0677, "step": 5368 }, { "epoch": 2.505950991831972, "grad_norm": 0.76953125, "learning_rate": 0.00017064320379205943, "loss": 0.0499, "step": 5369 }, { "epoch": 2.5064177362893814, "grad_norm": 0.9140625, "learning_rate": 0.0001706328174210428, "loss": 0.0562, "step": 5370 }, { "epoch": 2.506884480746791, "grad_norm": 0.97265625, "learning_rate": 0.00017062242952922325, "loss": 0.0736, "step": 5371 }, { "epoch": 2.507351225204201, "grad_norm": 1.0234375, "learning_rate": 0.00017061204011682444, "loss": 0.0964, "step": 5372 }, { "epoch": 2.5078179696616103, "grad_norm": 1.0078125, "learning_rate": 0.00017060164918407007, "loss": 0.0778, "step": 5373 }, { "epoch": 2.5082847141190197, "grad_norm": 0.83984375, "learning_rate": 0.0001705912567311838, "loss": 0.063, "step": 5374 }, { "epoch": 2.5087514585764294, "grad_norm": 0.8203125, "learning_rate": 0.00017058086275838953, "loss": 0.0528, "step": 5375 }, { "epoch": 2.5092182030338392, "grad_norm": 0.69140625, "learning_rate": 0.00017057046726591096, "loss": 0.0455, "step": 5376 }, { "epoch": 2.5096849474912486, "grad_norm": 0.9296875, "learning_rate": 0.0001705600702539719, "loss": 0.0612, "step": 5377 }, { "epoch": 2.510151691948658, "grad_norm": 0.73046875, "learning_rate": 0.00017054967172279627, "loss": 0.0565, "step": 5378 }, { "epoch": 2.5106184364060677, "grad_norm": 0.8671875, "learning_rate": 0.00017053927167260787, "loss": 0.0726, "step": 5379 }, { "epoch": 2.511085180863477, "grad_norm": 0.796875, "learning_rate": 0.00017052887010363075, "loss": 0.0542, "step": 5380 }, { "epoch": 2.511551925320887, "grad_norm": 0.83203125, "learning_rate": 0.0001705184670160888, "loss": 0.0625, "step": 5381 }, { "epoch": 2.5120186697782962, "grad_norm": 0.84375, "learning_rate": 0.00017050806241020595, "loss": 0.062, "step": 5382 }, { "epoch": 2.512485414235706, "grad_norm": 0.9765625, "learning_rate": 0.00017049765628620635, "loss": 0.0724, "step": 5383 }, { "epoch": 2.5129521586931154, "grad_norm": 0.81640625, "learning_rate": 0.00017048724864431397, "loss": 0.0536, "step": 5384 }, { "epoch": 2.513418903150525, "grad_norm": 0.65625, "learning_rate": 0.00017047683948475292, "loss": 0.0437, "step": 5385 }, { "epoch": 2.5138856476079345, "grad_norm": 0.91796875, "learning_rate": 0.00017046642880774732, "loss": 0.0483, "step": 5386 }, { "epoch": 2.5143523920653443, "grad_norm": 1.015625, "learning_rate": 0.00017045601661352133, "loss": 0.0785, "step": 5387 }, { "epoch": 2.5148191365227537, "grad_norm": 0.87890625, "learning_rate": 0.00017044560290229913, "loss": 0.0582, "step": 5388 }, { "epoch": 2.5152858809801635, "grad_norm": 0.73046875, "learning_rate": 0.00017043518767430492, "loss": 0.0527, "step": 5389 }, { "epoch": 2.515752625437573, "grad_norm": 0.75390625, "learning_rate": 0.000170424770929763, "loss": 0.0409, "step": 5390 }, { "epoch": 2.5162193698949826, "grad_norm": 0.8203125, "learning_rate": 0.00017041435266889764, "loss": 0.061, "step": 5391 }, { "epoch": 2.516686114352392, "grad_norm": 0.72265625, "learning_rate": 0.00017040393289193307, "loss": 0.0566, "step": 5392 }, { "epoch": 2.5171528588098018, "grad_norm": 0.92578125, "learning_rate": 0.00017039351159909376, "loss": 0.0598, "step": 5393 }, { "epoch": 2.517619603267211, "grad_norm": 0.74609375, "learning_rate": 0.00017038308879060402, "loss": 0.0447, "step": 5394 }, { "epoch": 2.518086347724621, "grad_norm": 0.984375, "learning_rate": 0.0001703726644666883, "loss": 0.0649, "step": 5395 }, { "epoch": 2.5185530921820303, "grad_norm": 1.0234375, "learning_rate": 0.00017036223862757107, "loss": 0.0762, "step": 5396 }, { "epoch": 2.51901983663944, "grad_norm": 1.0, "learning_rate": 0.00017035181127347674, "loss": 0.0653, "step": 5397 }, { "epoch": 2.5194865810968494, "grad_norm": 0.69921875, "learning_rate": 0.0001703413824046298, "loss": 0.0448, "step": 5398 }, { "epoch": 2.519953325554259, "grad_norm": 0.8359375, "learning_rate": 0.00017033095202125497, "loss": 0.0515, "step": 5399 }, { "epoch": 2.5204200700116686, "grad_norm": 0.83203125, "learning_rate": 0.00017032052012357665, "loss": 0.0471, "step": 5400 }, { "epoch": 2.520886814469078, "grad_norm": 0.984375, "learning_rate": 0.0001703100867118195, "loss": 0.0592, "step": 5401 }, { "epoch": 2.5213535589264877, "grad_norm": 1.0, "learning_rate": 0.00017029965178620818, "loss": 0.0593, "step": 5402 }, { "epoch": 2.5218203033838975, "grad_norm": 0.8515625, "learning_rate": 0.00017028921534696735, "loss": 0.0473, "step": 5403 }, { "epoch": 2.522287047841307, "grad_norm": 0.734375, "learning_rate": 0.00017027877739432174, "loss": 0.0544, "step": 5404 }, { "epoch": 2.522753792298716, "grad_norm": 0.91015625, "learning_rate": 0.0001702683379284961, "loss": 0.0652, "step": 5405 }, { "epoch": 2.523220536756126, "grad_norm": 1.046875, "learning_rate": 0.0001702578969497151, "loss": 0.0762, "step": 5406 }, { "epoch": 2.523687281213536, "grad_norm": 0.83984375, "learning_rate": 0.00017024745445820368, "loss": 0.0533, "step": 5407 }, { "epoch": 2.524154025670945, "grad_norm": 0.84375, "learning_rate": 0.00017023701045418662, "loss": 0.0449, "step": 5408 }, { "epoch": 2.5246207701283545, "grad_norm": 1.0625, "learning_rate": 0.00017022656493788883, "loss": 0.0713, "step": 5409 }, { "epoch": 2.5250875145857643, "grad_norm": 0.80859375, "learning_rate": 0.00017021611790953513, "loss": 0.0558, "step": 5410 }, { "epoch": 2.525554259043174, "grad_norm": 0.9140625, "learning_rate": 0.00017020566936935052, "loss": 0.0548, "step": 5411 }, { "epoch": 2.5260210035005835, "grad_norm": 0.6953125, "learning_rate": 0.00017019521931755996, "loss": 0.0438, "step": 5412 }, { "epoch": 2.526487747957993, "grad_norm": 0.84765625, "learning_rate": 0.00017018476775438845, "loss": 0.0497, "step": 5413 }, { "epoch": 2.5269544924154026, "grad_norm": 0.98046875, "learning_rate": 0.000170174314680061, "loss": 0.0596, "step": 5414 }, { "epoch": 2.5274212368728124, "grad_norm": 0.76171875, "learning_rate": 0.0001701638600948027, "loss": 0.0527, "step": 5415 }, { "epoch": 2.5278879813302217, "grad_norm": 0.9140625, "learning_rate": 0.00017015340399883865, "loss": 0.064, "step": 5416 }, { "epoch": 2.528354725787631, "grad_norm": 0.859375, "learning_rate": 0.00017014294639239398, "loss": 0.053, "step": 5417 }, { "epoch": 2.528821470245041, "grad_norm": 0.890625, "learning_rate": 0.00017013248727569384, "loss": 0.05, "step": 5418 }, { "epoch": 2.5292882147024502, "grad_norm": 0.9921875, "learning_rate": 0.00017012202664896343, "loss": 0.0737, "step": 5419 }, { "epoch": 2.52975495915986, "grad_norm": 0.77734375, "learning_rate": 0.000170111564512428, "loss": 0.0449, "step": 5420 }, { "epoch": 2.5302217036172694, "grad_norm": 0.9453125, "learning_rate": 0.00017010110086631278, "loss": 0.0986, "step": 5421 }, { "epoch": 2.530688448074679, "grad_norm": 0.9375, "learning_rate": 0.00017009063571084312, "loss": 0.0601, "step": 5422 }, { "epoch": 2.5311551925320885, "grad_norm": 0.73046875, "learning_rate": 0.00017008016904624426, "loss": 0.0453, "step": 5423 }, { "epoch": 2.5316219369894983, "grad_norm": 1.09375, "learning_rate": 0.00017006970087274164, "loss": 0.0693, "step": 5424 }, { "epoch": 2.5320886814469077, "grad_norm": 1.078125, "learning_rate": 0.00017005923119056058, "loss": 0.0729, "step": 5425 }, { "epoch": 2.5325554259043175, "grad_norm": 0.796875, "learning_rate": 0.00017004875999992658, "loss": 0.0606, "step": 5426 }, { "epoch": 2.533022170361727, "grad_norm": 0.703125, "learning_rate": 0.00017003828730106503, "loss": 0.0489, "step": 5427 }, { "epoch": 2.5334889148191366, "grad_norm": 0.828125, "learning_rate": 0.00017002781309420146, "loss": 0.0502, "step": 5428 }, { "epoch": 2.533955659276546, "grad_norm": 0.64453125, "learning_rate": 0.00017001733737956136, "loss": 0.0506, "step": 5429 }, { "epoch": 2.5344224037339558, "grad_norm": 0.61328125, "learning_rate": 0.0001700068601573703, "loss": 0.0495, "step": 5430 }, { "epoch": 2.534889148191365, "grad_norm": 0.96875, "learning_rate": 0.00016999638142785388, "loss": 0.0657, "step": 5431 }, { "epoch": 2.535355892648775, "grad_norm": 0.890625, "learning_rate": 0.0001699859011912377, "loss": 0.0513, "step": 5432 }, { "epoch": 2.5358226371061843, "grad_norm": 1.046875, "learning_rate": 0.00016997541944774742, "loss": 0.0751, "step": 5433 }, { "epoch": 2.536289381563594, "grad_norm": 0.8984375, "learning_rate": 0.00016996493619760868, "loss": 0.0746, "step": 5434 }, { "epoch": 2.5367561260210034, "grad_norm": 0.75, "learning_rate": 0.0001699544514410473, "loss": 0.0354, "step": 5435 }, { "epoch": 2.537222870478413, "grad_norm": 0.91015625, "learning_rate": 0.00016994396517828895, "loss": 0.0535, "step": 5436 }, { "epoch": 2.5376896149358226, "grad_norm": 0.7734375, "learning_rate": 0.0001699334774095594, "loss": 0.0517, "step": 5437 }, { "epoch": 2.5381563593932324, "grad_norm": 1.046875, "learning_rate": 0.0001699229881350845, "loss": 0.1052, "step": 5438 }, { "epoch": 2.5386231038506417, "grad_norm": 0.98828125, "learning_rate": 0.0001699124973550901, "loss": 0.07, "step": 5439 }, { "epoch": 2.539089848308051, "grad_norm": 0.86328125, "learning_rate": 0.00016990200506980204, "loss": 0.0776, "step": 5440 }, { "epoch": 2.539556592765461, "grad_norm": 0.72265625, "learning_rate": 0.00016989151127944627, "loss": 0.0444, "step": 5441 }, { "epoch": 2.5400233372228707, "grad_norm": 1.0078125, "learning_rate": 0.0001698810159842487, "loss": 0.0634, "step": 5442 }, { "epoch": 2.54049008168028, "grad_norm": 0.88671875, "learning_rate": 0.00016987051918443532, "loss": 0.0574, "step": 5443 }, { "epoch": 2.5409568261376894, "grad_norm": 0.70703125, "learning_rate": 0.00016986002088023217, "loss": 0.0511, "step": 5444 }, { "epoch": 2.541423570595099, "grad_norm": 0.83984375, "learning_rate": 0.00016984952107186523, "loss": 0.0408, "step": 5445 }, { "epoch": 2.541890315052509, "grad_norm": 0.8125, "learning_rate": 0.0001698390197595606, "loss": 0.0474, "step": 5446 }, { "epoch": 2.5423570595099183, "grad_norm": 0.62890625, "learning_rate": 0.00016982851694354444, "loss": 0.0418, "step": 5447 }, { "epoch": 2.5428238039673277, "grad_norm": 0.88671875, "learning_rate": 0.00016981801262404278, "loss": 0.0436, "step": 5448 }, { "epoch": 2.5432905484247375, "grad_norm": 0.828125, "learning_rate": 0.00016980750680128186, "loss": 0.0376, "step": 5449 }, { "epoch": 2.5437572928821472, "grad_norm": 0.7265625, "learning_rate": 0.00016979699947548786, "loss": 0.0393, "step": 5450 }, { "epoch": 2.5442240373395566, "grad_norm": 0.984375, "learning_rate": 0.00016978649064688703, "loss": 0.0654, "step": 5451 }, { "epoch": 2.544690781796966, "grad_norm": 0.90625, "learning_rate": 0.0001697759803157056, "loss": 0.0638, "step": 5452 }, { "epoch": 2.5451575262543757, "grad_norm": 0.9375, "learning_rate": 0.0001697654684821699, "loss": 0.0596, "step": 5453 }, { "epoch": 2.5456242707117855, "grad_norm": 0.86328125, "learning_rate": 0.0001697549551465063, "loss": 0.0632, "step": 5454 }, { "epoch": 2.546091015169195, "grad_norm": 0.8671875, "learning_rate": 0.00016974444030894107, "loss": 0.0561, "step": 5455 }, { "epoch": 2.5465577596266042, "grad_norm": 0.9453125, "learning_rate": 0.00016973392396970073, "loss": 0.0481, "step": 5456 }, { "epoch": 2.547024504084014, "grad_norm": 0.76171875, "learning_rate": 0.00016972340612901157, "loss": 0.0493, "step": 5457 }, { "epoch": 2.547491248541424, "grad_norm": 1.0625, "learning_rate": 0.0001697128867871001, "loss": 0.0634, "step": 5458 }, { "epoch": 2.547957992998833, "grad_norm": 1.03125, "learning_rate": 0.0001697023659441929, "loss": 0.0622, "step": 5459 }, { "epoch": 2.5484247374562425, "grad_norm": 0.91015625, "learning_rate": 0.00016969184360051637, "loss": 0.0632, "step": 5460 }, { "epoch": 2.5488914819136523, "grad_norm": 1.0078125, "learning_rate": 0.00016968131975629715, "loss": 0.0747, "step": 5461 }, { "epoch": 2.5493582263710617, "grad_norm": 1.0859375, "learning_rate": 0.0001696707944117618, "loss": 0.0786, "step": 5462 }, { "epoch": 2.5498249708284715, "grad_norm": 0.84375, "learning_rate": 0.0001696602675671369, "loss": 0.0578, "step": 5463 }, { "epoch": 2.550291715285881, "grad_norm": 1.2109375, "learning_rate": 0.00016964973922264923, "loss": 0.0517, "step": 5464 }, { "epoch": 2.5507584597432906, "grad_norm": 0.703125, "learning_rate": 0.00016963920937852533, "loss": 0.036, "step": 5465 }, { "epoch": 2.5512252042007, "grad_norm": 1.0078125, "learning_rate": 0.00016962867803499204, "loss": 0.0769, "step": 5466 }, { "epoch": 2.5516919486581098, "grad_norm": 0.90234375, "learning_rate": 0.00016961814519227602, "loss": 0.0492, "step": 5467 }, { "epoch": 2.552158693115519, "grad_norm": 1.1015625, "learning_rate": 0.00016960761085060407, "loss": 0.0852, "step": 5468 }, { "epoch": 2.552625437572929, "grad_norm": 0.70703125, "learning_rate": 0.00016959707501020308, "loss": 0.0621, "step": 5469 }, { "epoch": 2.5530921820303383, "grad_norm": 0.73046875, "learning_rate": 0.00016958653767129982, "loss": 0.0583, "step": 5470 }, { "epoch": 2.553558926487748, "grad_norm": 0.82421875, "learning_rate": 0.00016957599883412122, "loss": 0.0645, "step": 5471 }, { "epoch": 2.5540256709451574, "grad_norm": 0.953125, "learning_rate": 0.00016956545849889415, "loss": 0.0459, "step": 5472 }, { "epoch": 2.554492415402567, "grad_norm": 0.984375, "learning_rate": 0.0001695549166658456, "loss": 0.0635, "step": 5473 }, { "epoch": 2.5549591598599766, "grad_norm": 0.7578125, "learning_rate": 0.00016954437333520245, "loss": 0.0535, "step": 5474 }, { "epoch": 2.5554259043173864, "grad_norm": 0.8984375, "learning_rate": 0.00016953382850719184, "loss": 0.0501, "step": 5475 }, { "epoch": 2.5558926487747957, "grad_norm": 0.94921875, "learning_rate": 0.00016952328218204074, "loss": 0.0592, "step": 5476 }, { "epoch": 2.5563593932322055, "grad_norm": 0.7421875, "learning_rate": 0.00016951273435997623, "loss": 0.0401, "step": 5477 }, { "epoch": 2.556826137689615, "grad_norm": 0.9609375, "learning_rate": 0.00016950218504122543, "loss": 0.0611, "step": 5478 }, { "epoch": 2.5572928821470247, "grad_norm": 1.09375, "learning_rate": 0.00016949163422601548, "loss": 0.0682, "step": 5479 }, { "epoch": 2.557759626604434, "grad_norm": 0.94921875, "learning_rate": 0.0001694810819145735, "loss": 0.0571, "step": 5480 }, { "epoch": 2.558226371061844, "grad_norm": 0.90625, "learning_rate": 0.0001694705281071268, "loss": 0.0647, "step": 5481 }, { "epoch": 2.558693115519253, "grad_norm": 0.94921875, "learning_rate": 0.00016945997280390254, "loss": 0.0597, "step": 5482 }, { "epoch": 2.5591598599766625, "grad_norm": 0.74609375, "learning_rate": 0.00016944941600512795, "loss": 0.0458, "step": 5483 }, { "epoch": 2.5596266044340723, "grad_norm": 0.78125, "learning_rate": 0.00016943885771103043, "loss": 0.043, "step": 5484 }, { "epoch": 2.560093348891482, "grad_norm": 0.80078125, "learning_rate": 0.00016942829792183724, "loss": 0.0406, "step": 5485 }, { "epoch": 2.5605600933488915, "grad_norm": 0.81640625, "learning_rate": 0.00016941773663777576, "loss": 0.0407, "step": 5486 }, { "epoch": 2.561026837806301, "grad_norm": 0.765625, "learning_rate": 0.0001694071738590734, "loss": 0.0463, "step": 5487 }, { "epoch": 2.5614935822637106, "grad_norm": 0.953125, "learning_rate": 0.00016939660958595755, "loss": 0.0508, "step": 5488 }, { "epoch": 2.5619603267211204, "grad_norm": 0.83203125, "learning_rate": 0.00016938604381865573, "loss": 0.0464, "step": 5489 }, { "epoch": 2.5624270711785297, "grad_norm": 1.15625, "learning_rate": 0.0001693754765573954, "loss": 0.0681, "step": 5490 }, { "epoch": 2.562893815635939, "grad_norm": 0.9375, "learning_rate": 0.00016936490780240408, "loss": 0.071, "step": 5491 }, { "epoch": 2.563360560093349, "grad_norm": 0.80078125, "learning_rate": 0.00016935433755390934, "loss": 0.0464, "step": 5492 }, { "epoch": 2.5638273045507587, "grad_norm": 0.8359375, "learning_rate": 0.00016934376581213874, "loss": 0.0541, "step": 5493 }, { "epoch": 2.564294049008168, "grad_norm": 0.69140625, "learning_rate": 0.00016933319257731995, "loss": 0.0426, "step": 5494 }, { "epoch": 2.5647607934655774, "grad_norm": 0.98828125, "learning_rate": 0.00016932261784968059, "loss": 0.0799, "step": 5495 }, { "epoch": 2.565227537922987, "grad_norm": 0.6484375, "learning_rate": 0.00016931204162944834, "loss": 0.0464, "step": 5496 }, { "epoch": 2.565694282380397, "grad_norm": 1.1015625, "learning_rate": 0.0001693014639168509, "loss": 0.0782, "step": 5497 }, { "epoch": 2.5661610268378063, "grad_norm": 0.9453125, "learning_rate": 0.00016929088471211612, "loss": 0.0494, "step": 5498 }, { "epoch": 2.5666277712952157, "grad_norm": 0.87109375, "learning_rate": 0.00016928030401547166, "loss": 0.0563, "step": 5499 }, { "epoch": 2.5670945157526255, "grad_norm": 1.015625, "learning_rate": 0.00016926972182714541, "loss": 0.0721, "step": 5500 }, { "epoch": 2.5675612602100353, "grad_norm": 0.90234375, "learning_rate": 0.0001692591381473652, "loss": 0.0731, "step": 5501 }, { "epoch": 2.5680280046674446, "grad_norm": 0.9921875, "learning_rate": 0.0001692485529763589, "loss": 0.0701, "step": 5502 }, { "epoch": 2.568494749124854, "grad_norm": 0.8984375, "learning_rate": 0.0001692379663143544, "loss": 0.0583, "step": 5503 }, { "epoch": 2.5689614935822638, "grad_norm": 0.78125, "learning_rate": 0.00016922737816157967, "loss": 0.0553, "step": 5504 }, { "epoch": 2.569428238039673, "grad_norm": 0.953125, "learning_rate": 0.0001692167885182627, "loss": 0.0582, "step": 5505 }, { "epoch": 2.569894982497083, "grad_norm": 0.81640625, "learning_rate": 0.00016920619738463146, "loss": 0.0639, "step": 5506 }, { "epoch": 2.5703617269544923, "grad_norm": 1.015625, "learning_rate": 0.000169195604760914, "loss": 0.054, "step": 5507 }, { "epoch": 2.570828471411902, "grad_norm": 0.875, "learning_rate": 0.0001691850106473384, "loss": 0.056, "step": 5508 }, { "epoch": 2.5712952158693114, "grad_norm": 0.86328125, "learning_rate": 0.0001691744150441328, "loss": 0.0521, "step": 5509 }, { "epoch": 2.571761960326721, "grad_norm": 0.91796875, "learning_rate": 0.00016916381795152523, "loss": 0.0626, "step": 5510 }, { "epoch": 2.5722287047841306, "grad_norm": 0.88671875, "learning_rate": 0.00016915321936974395, "loss": 0.0431, "step": 5511 }, { "epoch": 2.5726954492415404, "grad_norm": 0.921875, "learning_rate": 0.00016914261929901713, "loss": 0.0575, "step": 5512 }, { "epoch": 2.5731621936989497, "grad_norm": 0.890625, "learning_rate": 0.000169132017739573, "loss": 0.0661, "step": 5513 }, { "epoch": 2.5736289381563595, "grad_norm": 0.95703125, "learning_rate": 0.00016912141469163983, "loss": 0.0508, "step": 5514 }, { "epoch": 2.574095682613769, "grad_norm": 0.7265625, "learning_rate": 0.0001691108101554459, "loss": 0.0503, "step": 5515 }, { "epoch": 2.5745624270711787, "grad_norm": 0.8203125, "learning_rate": 0.00016910020413121951, "loss": 0.0529, "step": 5516 }, { "epoch": 2.575029171528588, "grad_norm": 0.77734375, "learning_rate": 0.00016908959661918912, "loss": 0.057, "step": 5517 }, { "epoch": 2.575495915985998, "grad_norm": 0.99609375, "learning_rate": 0.00016907898761958306, "loss": 0.0704, "step": 5518 }, { "epoch": 2.575962660443407, "grad_norm": 0.65234375, "learning_rate": 0.0001690683771326297, "loss": 0.045, "step": 5519 }, { "epoch": 2.576429404900817, "grad_norm": 1.1015625, "learning_rate": 0.0001690577651585576, "loss": 0.0682, "step": 5520 }, { "epoch": 2.5768961493582263, "grad_norm": 0.87890625, "learning_rate": 0.00016904715169759518, "loss": 0.0641, "step": 5521 }, { "epoch": 2.577362893815636, "grad_norm": 0.75, "learning_rate": 0.00016903653674997095, "loss": 0.055, "step": 5522 }, { "epoch": 2.5778296382730455, "grad_norm": 0.78125, "learning_rate": 0.0001690259203159135, "loss": 0.0384, "step": 5523 }, { "epoch": 2.5782963827304552, "grad_norm": 0.765625, "learning_rate": 0.00016901530239565141, "loss": 0.0505, "step": 5524 }, { "epoch": 2.5787631271878646, "grad_norm": 0.90234375, "learning_rate": 0.00016900468298941329, "loss": 0.0509, "step": 5525 }, { "epoch": 2.579229871645274, "grad_norm": 1.078125, "learning_rate": 0.00016899406209742773, "loss": 0.0864, "step": 5526 }, { "epoch": 2.5796966161026837, "grad_norm": 0.96875, "learning_rate": 0.00016898343971992352, "loss": 0.0656, "step": 5527 }, { "epoch": 2.5801633605600935, "grad_norm": 0.828125, "learning_rate": 0.0001689728158571293, "loss": 0.0556, "step": 5528 }, { "epoch": 2.580630105017503, "grad_norm": 1.125, "learning_rate": 0.00016896219050927384, "loss": 0.0975, "step": 5529 }, { "epoch": 2.5810968494749122, "grad_norm": 0.80078125, "learning_rate": 0.00016895156367658588, "loss": 0.0481, "step": 5530 }, { "epoch": 2.581563593932322, "grad_norm": 0.90234375, "learning_rate": 0.00016894093535929424, "loss": 0.0585, "step": 5531 }, { "epoch": 2.582030338389732, "grad_norm": 0.7265625, "learning_rate": 0.0001689303055576278, "loss": 0.0459, "step": 5532 }, { "epoch": 2.582497082847141, "grad_norm": 0.88671875, "learning_rate": 0.0001689196742718154, "loss": 0.0762, "step": 5533 }, { "epoch": 2.5829638273045505, "grad_norm": 0.8515625, "learning_rate": 0.00016890904150208594, "loss": 0.0603, "step": 5534 }, { "epoch": 2.5834305717619603, "grad_norm": 1.0078125, "learning_rate": 0.00016889840724866834, "loss": 0.0687, "step": 5535 }, { "epoch": 2.58389731621937, "grad_norm": 0.96484375, "learning_rate": 0.00016888777151179163, "loss": 0.0744, "step": 5536 }, { "epoch": 2.5843640606767795, "grad_norm": 0.6328125, "learning_rate": 0.00016887713429168474, "loss": 0.0427, "step": 5537 }, { "epoch": 2.584830805134189, "grad_norm": 0.82421875, "learning_rate": 0.00016886649558857673, "loss": 0.0479, "step": 5538 }, { "epoch": 2.5852975495915986, "grad_norm": 0.9765625, "learning_rate": 0.00016885585540269666, "loss": 0.0735, "step": 5539 }, { "epoch": 2.5857642940490084, "grad_norm": 0.72265625, "learning_rate": 0.00016884521373427362, "loss": 0.0324, "step": 5540 }, { "epoch": 2.586231038506418, "grad_norm": 0.75, "learning_rate": 0.00016883457058353675, "loss": 0.0406, "step": 5541 }, { "epoch": 2.586697782963827, "grad_norm": 0.81640625, "learning_rate": 0.00016882392595071524, "loss": 0.0535, "step": 5542 }, { "epoch": 2.587164527421237, "grad_norm": 0.796875, "learning_rate": 0.0001688132798360382, "loss": 0.0467, "step": 5543 }, { "epoch": 2.5876312718786463, "grad_norm": 1.015625, "learning_rate": 0.00016880263223973492, "loss": 0.059, "step": 5544 }, { "epoch": 2.588098016336056, "grad_norm": 0.9296875, "learning_rate": 0.0001687919831620346, "loss": 0.0478, "step": 5545 }, { "epoch": 2.5885647607934654, "grad_norm": 0.88671875, "learning_rate": 0.00016878133260316656, "loss": 0.0525, "step": 5546 }, { "epoch": 2.589031505250875, "grad_norm": 0.76953125, "learning_rate": 0.00016877068056336014, "loss": 0.0479, "step": 5547 }, { "epoch": 2.5894982497082846, "grad_norm": 0.7578125, "learning_rate": 0.00016876002704284464, "loss": 0.0465, "step": 5548 }, { "epoch": 2.5899649941656944, "grad_norm": 0.70703125, "learning_rate": 0.0001687493720418495, "loss": 0.0413, "step": 5549 }, { "epoch": 2.5904317386231037, "grad_norm": 0.83203125, "learning_rate": 0.00016873871556060405, "loss": 0.0558, "step": 5550 }, { "epoch": 2.5908984830805135, "grad_norm": 1.0625, "learning_rate": 0.00016872805759933783, "loss": 0.0537, "step": 5551 }, { "epoch": 2.591365227537923, "grad_norm": 0.73828125, "learning_rate": 0.00016871739815828025, "loss": 0.0491, "step": 5552 }, { "epoch": 2.5918319719953327, "grad_norm": 1.0703125, "learning_rate": 0.00016870673723766086, "loss": 0.0765, "step": 5553 }, { "epoch": 2.592298716452742, "grad_norm": 1.1796875, "learning_rate": 0.00016869607483770916, "loss": 0.0742, "step": 5554 }, { "epoch": 2.592765460910152, "grad_norm": 0.9375, "learning_rate": 0.0001686854109586548, "loss": 0.0516, "step": 5555 }, { "epoch": 2.593232205367561, "grad_norm": 0.90234375, "learning_rate": 0.0001686747456007273, "loss": 0.0404, "step": 5556 }, { "epoch": 2.593698949824971, "grad_norm": 0.75390625, "learning_rate": 0.00016866407876415634, "loss": 0.0438, "step": 5557 }, { "epoch": 2.5941656942823803, "grad_norm": 1.0390625, "learning_rate": 0.00016865341044917156, "loss": 0.0518, "step": 5558 }, { "epoch": 2.59463243873979, "grad_norm": 0.72265625, "learning_rate": 0.0001686427406560027, "loss": 0.0517, "step": 5559 }, { "epoch": 2.5950991831971995, "grad_norm": 1.015625, "learning_rate": 0.00016863206938487946, "loss": 0.0848, "step": 5560 }, { "epoch": 2.5955659276546093, "grad_norm": 1.0, "learning_rate": 0.00016862139663603162, "loss": 0.0647, "step": 5561 }, { "epoch": 2.5960326721120186, "grad_norm": 0.9609375, "learning_rate": 0.00016861072240968898, "loss": 0.0661, "step": 5562 }, { "epoch": 2.5964994165694284, "grad_norm": 0.86328125, "learning_rate": 0.00016860004670608136, "loss": 0.0573, "step": 5563 }, { "epoch": 2.5969661610268377, "grad_norm": 0.83203125, "learning_rate": 0.00016858936952543862, "loss": 0.0658, "step": 5564 }, { "epoch": 2.597432905484247, "grad_norm": 0.859375, "learning_rate": 0.00016857869086799068, "loss": 0.0493, "step": 5565 }, { "epoch": 2.597899649941657, "grad_norm": 0.75, "learning_rate": 0.00016856801073396742, "loss": 0.041, "step": 5566 }, { "epoch": 2.5983663943990667, "grad_norm": 0.61328125, "learning_rate": 0.0001685573291235988, "loss": 0.0359, "step": 5567 }, { "epoch": 2.598833138856476, "grad_norm": 0.99609375, "learning_rate": 0.00016854664603711483, "loss": 0.0695, "step": 5568 }, { "epoch": 2.5992998833138854, "grad_norm": 0.68359375, "learning_rate": 0.00016853596147474548, "loss": 0.0479, "step": 5569 }, { "epoch": 2.599766627771295, "grad_norm": 0.8125, "learning_rate": 0.00016852527543672087, "loss": 0.0447, "step": 5570 }, { "epoch": 2.600233372228705, "grad_norm": 0.8984375, "learning_rate": 0.00016851458792327105, "loss": 0.0576, "step": 5571 }, { "epoch": 2.6007001166861143, "grad_norm": 1.09375, "learning_rate": 0.00016850389893462611, "loss": 0.043, "step": 5572 }, { "epoch": 2.6011668611435237, "grad_norm": 0.7890625, "learning_rate": 0.00016849320847101625, "loss": 0.0493, "step": 5573 }, { "epoch": 2.6016336056009335, "grad_norm": 0.9609375, "learning_rate": 0.0001684825165326716, "loss": 0.0745, "step": 5574 }, { "epoch": 2.6021003500583433, "grad_norm": 0.95703125, "learning_rate": 0.00016847182311982242, "loss": 0.0524, "step": 5575 }, { "epoch": 2.6025670945157526, "grad_norm": 0.76171875, "learning_rate": 0.00016846112823269889, "loss": 0.0504, "step": 5576 }, { "epoch": 2.603033838973162, "grad_norm": 0.984375, "learning_rate": 0.0001684504318715313, "loss": 0.0562, "step": 5577 }, { "epoch": 2.603500583430572, "grad_norm": 1.015625, "learning_rate": 0.00016843973403654999, "loss": 0.0722, "step": 5578 }, { "epoch": 2.6039673278879816, "grad_norm": 0.90625, "learning_rate": 0.00016842903472798524, "loss": 0.0619, "step": 5579 }, { "epoch": 2.604434072345391, "grad_norm": 1.015625, "learning_rate": 0.00016841833394606748, "loss": 0.0539, "step": 5580 }, { "epoch": 2.6049008168028003, "grad_norm": 1.171875, "learning_rate": 0.00016840763169102704, "loss": 0.0876, "step": 5581 }, { "epoch": 2.60536756126021, "grad_norm": 0.62109375, "learning_rate": 0.0001683969279630944, "loss": 0.0363, "step": 5582 }, { "epoch": 2.60583430571762, "grad_norm": 0.625, "learning_rate": 0.00016838622276250004, "loss": 0.0375, "step": 5583 }, { "epoch": 2.606301050175029, "grad_norm": 0.75390625, "learning_rate": 0.00016837551608947442, "loss": 0.0531, "step": 5584 }, { "epoch": 2.6067677946324386, "grad_norm": 1.078125, "learning_rate": 0.00016836480794424807, "loss": 0.0647, "step": 5585 }, { "epoch": 2.6072345390898484, "grad_norm": 0.77734375, "learning_rate": 0.00016835409832705156, "loss": 0.0512, "step": 5586 }, { "epoch": 2.6077012835472577, "grad_norm": 0.78125, "learning_rate": 0.00016834338723811546, "loss": 0.0531, "step": 5587 }, { "epoch": 2.6081680280046675, "grad_norm": 0.8359375, "learning_rate": 0.0001683326746776704, "loss": 0.0416, "step": 5588 }, { "epoch": 2.608634772462077, "grad_norm": 0.8984375, "learning_rate": 0.00016832196064594704, "loss": 0.0612, "step": 5589 }, { "epoch": 2.6091015169194867, "grad_norm": 0.734375, "learning_rate": 0.00016831124514317609, "loss": 0.0523, "step": 5590 }, { "epoch": 2.609568261376896, "grad_norm": 0.71484375, "learning_rate": 0.00016830052816958822, "loss": 0.0341, "step": 5591 }, { "epoch": 2.610035005834306, "grad_norm": 1.0078125, "learning_rate": 0.0001682898097254142, "loss": 0.0494, "step": 5592 }, { "epoch": 2.610501750291715, "grad_norm": 0.86328125, "learning_rate": 0.0001682790898108848, "loss": 0.0479, "step": 5593 }, { "epoch": 2.610968494749125, "grad_norm": 1.09375, "learning_rate": 0.00016826836842623086, "loss": 0.0773, "step": 5594 }, { "epoch": 2.6114352392065343, "grad_norm": 0.8671875, "learning_rate": 0.00016825764557168319, "loss": 0.0798, "step": 5595 }, { "epoch": 2.611901983663944, "grad_norm": 0.953125, "learning_rate": 0.0001682469212474727, "loss": 0.0541, "step": 5596 }, { "epoch": 2.6123687281213535, "grad_norm": 0.87890625, "learning_rate": 0.00016823619545383022, "loss": 0.0473, "step": 5597 }, { "epoch": 2.6128354725787633, "grad_norm": 0.8515625, "learning_rate": 0.00016822546819098678, "loss": 0.0523, "step": 5598 }, { "epoch": 2.6133022170361726, "grad_norm": 0.6484375, "learning_rate": 0.0001682147394591733, "loss": 0.0437, "step": 5599 }, { "epoch": 2.6137689614935824, "grad_norm": 0.93359375, "learning_rate": 0.00016820400925862078, "loss": 0.0536, "step": 5600 }, { "epoch": 2.6142357059509918, "grad_norm": 0.8046875, "learning_rate": 0.00016819327758956033, "loss": 0.0529, "step": 5601 }, { "epoch": 2.6147024504084015, "grad_norm": 0.71875, "learning_rate": 0.00016818254445222287, "loss": 0.0571, "step": 5602 }, { "epoch": 2.615169194865811, "grad_norm": 0.80859375, "learning_rate": 0.00016817180984683963, "loss": 0.05, "step": 5603 }, { "epoch": 2.6156359393232207, "grad_norm": 0.8671875, "learning_rate": 0.00016816107377364166, "loss": 0.0584, "step": 5604 }, { "epoch": 2.61610268378063, "grad_norm": 0.9921875, "learning_rate": 0.00016815033623286015, "loss": 0.0662, "step": 5605 }, { "epoch": 2.61656942823804, "grad_norm": 1.0546875, "learning_rate": 0.0001681395972247263, "loss": 0.0488, "step": 5606 }, { "epoch": 2.617036172695449, "grad_norm": 0.96484375, "learning_rate": 0.0001681288567494713, "loss": 0.055, "step": 5607 }, { "epoch": 2.6175029171528585, "grad_norm": 0.98046875, "learning_rate": 0.00016811811480732644, "loss": 0.0619, "step": 5608 }, { "epoch": 2.6179696616102683, "grad_norm": 0.7734375, "learning_rate": 0.00016810737139852301, "loss": 0.0577, "step": 5609 }, { "epoch": 2.618436406067678, "grad_norm": 0.80078125, "learning_rate": 0.00016809662652329227, "loss": 0.0583, "step": 5610 }, { "epoch": 2.6189031505250875, "grad_norm": 0.9140625, "learning_rate": 0.00016808588018186563, "loss": 0.0553, "step": 5611 }, { "epoch": 2.619369894982497, "grad_norm": 0.8125, "learning_rate": 0.00016807513237447443, "loss": 0.0528, "step": 5612 }, { "epoch": 2.6198366394399066, "grad_norm": 0.86328125, "learning_rate": 0.00016806438310135012, "loss": 0.0631, "step": 5613 }, { "epoch": 2.6203033838973164, "grad_norm": 0.90625, "learning_rate": 0.0001680536323627241, "loss": 0.0513, "step": 5614 }, { "epoch": 2.620770128354726, "grad_norm": 0.92578125, "learning_rate": 0.0001680428801588279, "loss": 0.0514, "step": 5615 }, { "epoch": 2.621236872812135, "grad_norm": 0.8984375, "learning_rate": 0.00016803212648989298, "loss": 0.0407, "step": 5616 }, { "epoch": 2.621703617269545, "grad_norm": 0.91796875, "learning_rate": 0.00016802137135615088, "loss": 0.0743, "step": 5617 }, { "epoch": 2.6221703617269547, "grad_norm": 0.75, "learning_rate": 0.00016801061475783317, "loss": 0.0522, "step": 5618 }, { "epoch": 2.622637106184364, "grad_norm": 0.828125, "learning_rate": 0.0001679998566951715, "loss": 0.0501, "step": 5619 }, { "epoch": 2.6231038506417734, "grad_norm": 0.78125, "learning_rate": 0.00016798909716839747, "loss": 0.0578, "step": 5620 }, { "epoch": 2.6235705950991832, "grad_norm": 0.984375, "learning_rate": 0.00016797833617774273, "loss": 0.0497, "step": 5621 }, { "epoch": 2.624037339556593, "grad_norm": 1.09375, "learning_rate": 0.00016796757372343897, "loss": 0.0655, "step": 5622 }, { "epoch": 2.6245040840140024, "grad_norm": 0.98046875, "learning_rate": 0.00016795680980571796, "loss": 0.0566, "step": 5623 }, { "epoch": 2.6249708284714117, "grad_norm": 1.0, "learning_rate": 0.0001679460444248114, "loss": 0.061, "step": 5624 }, { "epoch": 2.6254375729288215, "grad_norm": 0.71875, "learning_rate": 0.00016793527758095117, "loss": 0.039, "step": 5625 }, { "epoch": 2.6259043173862313, "grad_norm": 0.8203125, "learning_rate": 0.000167924509274369, "loss": 0.0488, "step": 5626 }, { "epoch": 2.6263710618436407, "grad_norm": 0.75390625, "learning_rate": 0.00016791373950529682, "loss": 0.0535, "step": 5627 }, { "epoch": 2.62683780630105, "grad_norm": 0.7890625, "learning_rate": 0.00016790296827396642, "loss": 0.0408, "step": 5628 }, { "epoch": 2.62730455075846, "grad_norm": 0.89453125, "learning_rate": 0.0001678921955806098, "loss": 0.0597, "step": 5629 }, { "epoch": 2.627771295215869, "grad_norm": 0.7578125, "learning_rate": 0.00016788142142545892, "loss": 0.0509, "step": 5630 }, { "epoch": 2.628238039673279, "grad_norm": 0.6875, "learning_rate": 0.00016787064580874565, "loss": 0.0426, "step": 5631 }, { "epoch": 2.6287047841306883, "grad_norm": 0.765625, "learning_rate": 0.00016785986873070211, "loss": 0.0474, "step": 5632 }, { "epoch": 2.629171528588098, "grad_norm": 0.7734375, "learning_rate": 0.00016784909019156033, "loss": 0.054, "step": 5633 }, { "epoch": 2.6296382730455075, "grad_norm": 1.1015625, "learning_rate": 0.00016783831019155234, "loss": 0.0596, "step": 5634 }, { "epoch": 2.6301050175029173, "grad_norm": 0.78515625, "learning_rate": 0.00016782752873091027, "loss": 0.0486, "step": 5635 }, { "epoch": 2.6305717619603266, "grad_norm": 0.74609375, "learning_rate": 0.0001678167458098662, "loss": 0.0428, "step": 5636 }, { "epoch": 2.6310385064177364, "grad_norm": 0.8515625, "learning_rate": 0.00016780596142865239, "loss": 0.0666, "step": 5637 }, { "epoch": 2.6315052508751458, "grad_norm": 1.0234375, "learning_rate": 0.000167795175587501, "loss": 0.0663, "step": 5638 }, { "epoch": 2.6319719953325555, "grad_norm": 0.890625, "learning_rate": 0.00016778438828664424, "loss": 0.0506, "step": 5639 }, { "epoch": 2.632438739789965, "grad_norm": 0.9140625, "learning_rate": 0.0001677735995263144, "loss": 0.0609, "step": 5640 }, { "epoch": 2.6329054842473747, "grad_norm": 0.734375, "learning_rate": 0.00016776280930674378, "loss": 0.0436, "step": 5641 }, { "epoch": 2.633372228704784, "grad_norm": 0.83984375, "learning_rate": 0.0001677520176281647, "loss": 0.0505, "step": 5642 }, { "epoch": 2.633838973162194, "grad_norm": 0.74609375, "learning_rate": 0.0001677412244908095, "loss": 0.037, "step": 5643 }, { "epoch": 2.634305717619603, "grad_norm": 0.89453125, "learning_rate": 0.00016773042989491054, "loss": 0.0604, "step": 5644 }, { "epoch": 2.634772462077013, "grad_norm": 0.87890625, "learning_rate": 0.00016771963384070034, "loss": 0.0522, "step": 5645 }, { "epoch": 2.6352392065344223, "grad_norm": 0.71875, "learning_rate": 0.0001677088363284112, "loss": 0.0385, "step": 5646 }, { "epoch": 2.635705950991832, "grad_norm": 0.6875, "learning_rate": 0.00016769803735827577, "loss": 0.0416, "step": 5647 }, { "epoch": 2.6361726954492415, "grad_norm": 0.68359375, "learning_rate": 0.00016768723693052646, "loss": 0.0568, "step": 5648 }, { "epoch": 2.6366394399066513, "grad_norm": 1.03125, "learning_rate": 0.00016767643504539582, "loss": 0.0544, "step": 5649 }, { "epoch": 2.6371061843640606, "grad_norm": 0.85546875, "learning_rate": 0.00016766563170311647, "loss": 0.0461, "step": 5650 }, { "epoch": 2.63757292882147, "grad_norm": 0.8828125, "learning_rate": 0.00016765482690392095, "loss": 0.0475, "step": 5651 }, { "epoch": 2.63803967327888, "grad_norm": 0.8828125, "learning_rate": 0.000167644020648042, "loss": 0.0556, "step": 5652 }, { "epoch": 2.6385064177362896, "grad_norm": 0.9921875, "learning_rate": 0.00016763321293571218, "loss": 0.0586, "step": 5653 }, { "epoch": 2.638973162193699, "grad_norm": 0.89453125, "learning_rate": 0.00016762240376716426, "loss": 0.0636, "step": 5654 }, { "epoch": 2.6394399066511083, "grad_norm": 0.76953125, "learning_rate": 0.00016761159314263103, "loss": 0.0398, "step": 5655 }, { "epoch": 2.639906651108518, "grad_norm": 1.03125, "learning_rate": 0.0001676007810623451, "loss": 0.0799, "step": 5656 }, { "epoch": 2.640373395565928, "grad_norm": 0.8515625, "learning_rate": 0.00016758996752653938, "loss": 0.0457, "step": 5657 }, { "epoch": 2.6408401400233372, "grad_norm": 1.140625, "learning_rate": 0.00016757915253544665, "loss": 0.0559, "step": 5658 }, { "epoch": 2.6413068844807466, "grad_norm": 0.73046875, "learning_rate": 0.00016756833608929977, "loss": 0.0372, "step": 5659 }, { "epoch": 2.6417736289381564, "grad_norm": 0.77734375, "learning_rate": 0.00016755751818833166, "loss": 0.0565, "step": 5660 }, { "epoch": 2.642240373395566, "grad_norm": 0.9296875, "learning_rate": 0.00016754669883277528, "loss": 0.0545, "step": 5661 }, { "epoch": 2.6427071178529755, "grad_norm": 0.92578125, "learning_rate": 0.00016753587802286347, "loss": 0.0548, "step": 5662 }, { "epoch": 2.643173862310385, "grad_norm": 1.0859375, "learning_rate": 0.0001675250557588293, "loss": 0.0694, "step": 5663 }, { "epoch": 2.6436406067677947, "grad_norm": 0.85546875, "learning_rate": 0.00016751423204090576, "loss": 0.042, "step": 5664 }, { "epoch": 2.6441073512252045, "grad_norm": 0.9765625, "learning_rate": 0.00016750340686932588, "loss": 0.0512, "step": 5665 }, { "epoch": 2.644574095682614, "grad_norm": 0.75, "learning_rate": 0.00016749258024432274, "loss": 0.0467, "step": 5666 }, { "epoch": 2.645040840140023, "grad_norm": 0.74609375, "learning_rate": 0.00016748175216612952, "loss": 0.0437, "step": 5667 }, { "epoch": 2.645507584597433, "grad_norm": 0.70703125, "learning_rate": 0.00016747092263497926, "loss": 0.0539, "step": 5668 }, { "epoch": 2.6459743290548423, "grad_norm": 0.71484375, "learning_rate": 0.00016746009165110516, "loss": 0.0425, "step": 5669 }, { "epoch": 2.646441073512252, "grad_norm": 0.97265625, "learning_rate": 0.00016744925921474048, "loss": 0.0646, "step": 5670 }, { "epoch": 2.6469078179696615, "grad_norm": 0.85546875, "learning_rate": 0.0001674384253261184, "loss": 0.0577, "step": 5671 }, { "epoch": 2.6473745624270713, "grad_norm": 0.96875, "learning_rate": 0.00016742758998547217, "loss": 0.0598, "step": 5672 }, { "epoch": 2.6478413068844806, "grad_norm": 0.76171875, "learning_rate": 0.00016741675319303516, "loss": 0.0481, "step": 5673 }, { "epoch": 2.6483080513418904, "grad_norm": 0.90234375, "learning_rate": 0.0001674059149490406, "loss": 0.0531, "step": 5674 }, { "epoch": 2.6487747957992998, "grad_norm": 0.921875, "learning_rate": 0.00016739507525372193, "loss": 0.0508, "step": 5675 }, { "epoch": 2.6492415402567095, "grad_norm": 0.73046875, "learning_rate": 0.00016738423410731255, "loss": 0.0438, "step": 5676 }, { "epoch": 2.649708284714119, "grad_norm": 0.76171875, "learning_rate": 0.0001673733915100458, "loss": 0.0473, "step": 5677 }, { "epoch": 2.6501750291715287, "grad_norm": 0.90234375, "learning_rate": 0.00016736254746215517, "loss": 0.0415, "step": 5678 }, { "epoch": 2.650641773628938, "grad_norm": 1.1484375, "learning_rate": 0.00016735170196387417, "loss": 0.0516, "step": 5679 }, { "epoch": 2.651108518086348, "grad_norm": 0.8671875, "learning_rate": 0.00016734085501543633, "loss": 0.054, "step": 5680 }, { "epoch": 2.651575262543757, "grad_norm": 0.828125, "learning_rate": 0.00016733000661707515, "loss": 0.0548, "step": 5681 }, { "epoch": 2.652042007001167, "grad_norm": 0.76953125, "learning_rate": 0.0001673191567690242, "loss": 0.0469, "step": 5682 }, { "epoch": 2.6525087514585763, "grad_norm": 0.80078125, "learning_rate": 0.00016730830547151713, "loss": 0.0546, "step": 5683 }, { "epoch": 2.652975495915986, "grad_norm": 0.9296875, "learning_rate": 0.00016729745272478757, "loss": 0.0503, "step": 5684 }, { "epoch": 2.6534422403733955, "grad_norm": 0.68359375, "learning_rate": 0.00016728659852906916, "loss": 0.0428, "step": 5685 }, { "epoch": 2.6539089848308053, "grad_norm": 0.8828125, "learning_rate": 0.00016727574288459565, "loss": 0.055, "step": 5686 }, { "epoch": 2.6543757292882146, "grad_norm": 1.15625, "learning_rate": 0.00016726488579160074, "loss": 0.0827, "step": 5687 }, { "epoch": 2.6548424737456244, "grad_norm": 0.71875, "learning_rate": 0.0001672540272503182, "loss": 0.045, "step": 5688 }, { "epoch": 2.655309218203034, "grad_norm": 1.046875, "learning_rate": 0.00016724316726098186, "loss": 0.0717, "step": 5689 }, { "epoch": 2.655775962660443, "grad_norm": 0.890625, "learning_rate": 0.0001672323058238255, "loss": 0.0647, "step": 5690 }, { "epoch": 2.656242707117853, "grad_norm": 0.74609375, "learning_rate": 0.00016722144293908299, "loss": 0.0372, "step": 5691 }, { "epoch": 2.6567094515752627, "grad_norm": 0.76953125, "learning_rate": 0.00016721057860698827, "loss": 0.0396, "step": 5692 }, { "epoch": 2.657176196032672, "grad_norm": 0.8125, "learning_rate": 0.00016719971282777517, "loss": 0.0545, "step": 5693 }, { "epoch": 2.6576429404900814, "grad_norm": 0.8203125, "learning_rate": 0.00016718884560167774, "loss": 0.0577, "step": 5694 }, { "epoch": 2.6581096849474912, "grad_norm": 1.046875, "learning_rate": 0.00016717797692892988, "loss": 0.0476, "step": 5695 }, { "epoch": 2.658576429404901, "grad_norm": 0.703125, "learning_rate": 0.00016716710680976567, "loss": 0.0425, "step": 5696 }, { "epoch": 2.6590431738623104, "grad_norm": 0.9375, "learning_rate": 0.00016715623524441912, "loss": 0.049, "step": 5697 }, { "epoch": 2.6595099183197197, "grad_norm": 0.9296875, "learning_rate": 0.0001671453622331243, "loss": 0.0497, "step": 5698 }, { "epoch": 2.6599766627771295, "grad_norm": 0.9140625, "learning_rate": 0.00016713448777611536, "loss": 0.0522, "step": 5699 }, { "epoch": 2.6604434072345393, "grad_norm": 0.796875, "learning_rate": 0.0001671236118736264, "loss": 0.0471, "step": 5700 }, { "epoch": 2.6609101516919487, "grad_norm": 0.8984375, "learning_rate": 0.00016711273452589158, "loss": 0.0405, "step": 5701 }, { "epoch": 2.661376896149358, "grad_norm": 0.6875, "learning_rate": 0.00016710185573314514, "loss": 0.0461, "step": 5702 }, { "epoch": 2.661843640606768, "grad_norm": 0.90234375, "learning_rate": 0.00016709097549562127, "loss": 0.0399, "step": 5703 }, { "epoch": 2.6623103850641776, "grad_norm": 0.9453125, "learning_rate": 0.0001670800938135543, "loss": 0.0649, "step": 5704 }, { "epoch": 2.662777129521587, "grad_norm": 0.84375, "learning_rate": 0.00016706921068717847, "loss": 0.0526, "step": 5705 }, { "epoch": 2.6632438739789963, "grad_norm": 0.94921875, "learning_rate": 0.0001670583261167281, "loss": 0.0473, "step": 5706 }, { "epoch": 2.663710618436406, "grad_norm": 0.98046875, "learning_rate": 0.0001670474401024376, "loss": 0.0576, "step": 5707 }, { "epoch": 2.664177362893816, "grad_norm": 0.72265625, "learning_rate": 0.0001670365526445413, "loss": 0.0505, "step": 5708 }, { "epoch": 2.6646441073512253, "grad_norm": 0.98046875, "learning_rate": 0.00016702566374327368, "loss": 0.0539, "step": 5709 }, { "epoch": 2.6651108518086346, "grad_norm": 0.9140625, "learning_rate": 0.0001670147733988691, "loss": 0.044, "step": 5710 }, { "epoch": 2.6655775962660444, "grad_norm": 0.765625, "learning_rate": 0.00016700388161156211, "loss": 0.0464, "step": 5711 }, { "epoch": 2.6660443407234538, "grad_norm": 0.7421875, "learning_rate": 0.00016699298838158722, "loss": 0.0493, "step": 5712 }, { "epoch": 2.6665110851808635, "grad_norm": 0.75390625, "learning_rate": 0.00016698209370917893, "loss": 0.0371, "step": 5713 }, { "epoch": 2.666977829638273, "grad_norm": 0.8515625, "learning_rate": 0.0001669711975945719, "loss": 0.0581, "step": 5714 }, { "epoch": 2.6674445740956827, "grad_norm": 0.91015625, "learning_rate": 0.00016696030003800062, "loss": 0.0541, "step": 5715 }, { "epoch": 2.667911318553092, "grad_norm": 0.99609375, "learning_rate": 0.00016694940103969982, "loss": 0.0564, "step": 5716 }, { "epoch": 2.668378063010502, "grad_norm": 0.83984375, "learning_rate": 0.00016693850059990413, "loss": 0.0488, "step": 5717 }, { "epoch": 2.668844807467911, "grad_norm": 0.82421875, "learning_rate": 0.00016692759871884824, "loss": 0.0421, "step": 5718 }, { "epoch": 2.669311551925321, "grad_norm": 0.8828125, "learning_rate": 0.00016691669539676692, "loss": 0.068, "step": 5719 }, { "epoch": 2.6697782963827303, "grad_norm": 0.93359375, "learning_rate": 0.00016690579063389488, "loss": 0.0595, "step": 5720 }, { "epoch": 2.67024504084014, "grad_norm": 0.7265625, "learning_rate": 0.00016689488443046694, "loss": 0.0441, "step": 5721 }, { "epoch": 2.6707117852975495, "grad_norm": 0.73046875, "learning_rate": 0.0001668839767867179, "loss": 0.0446, "step": 5722 }, { "epoch": 2.6711785297549593, "grad_norm": 0.7109375, "learning_rate": 0.00016687306770288266, "loss": 0.0533, "step": 5723 }, { "epoch": 2.6716452742123686, "grad_norm": 0.6953125, "learning_rate": 0.00016686215717919605, "loss": 0.0333, "step": 5724 }, { "epoch": 2.6721120186697784, "grad_norm": 0.94140625, "learning_rate": 0.00016685124521589306, "loss": 0.0501, "step": 5725 }, { "epoch": 2.672578763127188, "grad_norm": 0.90234375, "learning_rate": 0.00016684033181320852, "loss": 0.0442, "step": 5726 }, { "epoch": 2.6730455075845976, "grad_norm": 0.9296875, "learning_rate": 0.00016682941697137752, "loss": 0.0605, "step": 5727 }, { "epoch": 2.673512252042007, "grad_norm": 0.8828125, "learning_rate": 0.000166818500690635, "loss": 0.0471, "step": 5728 }, { "epoch": 2.6739789964994167, "grad_norm": 0.6484375, "learning_rate": 0.00016680758297121602, "loss": 0.0395, "step": 5729 }, { "epoch": 2.674445740956826, "grad_norm": 0.89453125, "learning_rate": 0.0001667966638133557, "loss": 0.0513, "step": 5730 }, { "epoch": 2.674912485414236, "grad_norm": 0.90234375, "learning_rate": 0.00016678574321728906, "loss": 0.0516, "step": 5731 }, { "epoch": 2.6753792298716452, "grad_norm": 0.7265625, "learning_rate": 0.00016677482118325127, "loss": 0.0427, "step": 5732 }, { "epoch": 2.6758459743290546, "grad_norm": 0.80859375, "learning_rate": 0.0001667638977114775, "loss": 0.0477, "step": 5733 }, { "epoch": 2.6763127187864644, "grad_norm": 0.8203125, "learning_rate": 0.00016675297280220293, "loss": 0.0477, "step": 5734 }, { "epoch": 2.676779463243874, "grad_norm": 0.97265625, "learning_rate": 0.0001667420464556628, "loss": 0.079, "step": 5735 }, { "epoch": 2.6772462077012835, "grad_norm": 1.0, "learning_rate": 0.00016673111867209235, "loss": 0.0713, "step": 5736 }, { "epoch": 2.677712952158693, "grad_norm": 0.78515625, "learning_rate": 0.00016672018945172688, "loss": 0.0524, "step": 5737 }, { "epoch": 2.6781796966161027, "grad_norm": 0.81640625, "learning_rate": 0.00016670925879480172, "loss": 0.0535, "step": 5738 }, { "epoch": 2.6786464410735125, "grad_norm": 0.96484375, "learning_rate": 0.00016669832670155219, "loss": 0.0532, "step": 5739 }, { "epoch": 2.679113185530922, "grad_norm": 0.8515625, "learning_rate": 0.00016668739317221367, "loss": 0.0505, "step": 5740 }, { "epoch": 2.679579929988331, "grad_norm": 0.76953125, "learning_rate": 0.00016667645820702158, "loss": 0.0438, "step": 5741 }, { "epoch": 2.680046674445741, "grad_norm": 0.8046875, "learning_rate": 0.0001666655218062114, "loss": 0.0443, "step": 5742 }, { "epoch": 2.6805134189031508, "grad_norm": 0.94140625, "learning_rate": 0.00016665458397001852, "loss": 0.0495, "step": 5743 }, { "epoch": 2.68098016336056, "grad_norm": 0.80859375, "learning_rate": 0.0001666436446986785, "loss": 0.0585, "step": 5744 }, { "epoch": 2.6814469078179695, "grad_norm": 0.8203125, "learning_rate": 0.0001666327039924269, "loss": 0.0527, "step": 5745 }, { "epoch": 2.6819136522753793, "grad_norm": 0.58203125, "learning_rate": 0.00016662176185149925, "loss": 0.0338, "step": 5746 }, { "epoch": 2.682380396732789, "grad_norm": 0.78515625, "learning_rate": 0.00016661081827613113, "loss": 0.0394, "step": 5747 }, { "epoch": 2.6828471411901984, "grad_norm": 0.7734375, "learning_rate": 0.0001665998732665582, "loss": 0.0535, "step": 5748 }, { "epoch": 2.6833138856476078, "grad_norm": 0.84375, "learning_rate": 0.0001665889268230161, "loss": 0.0548, "step": 5749 }, { "epoch": 2.6837806301050176, "grad_norm": 0.765625, "learning_rate": 0.00016657797894574053, "loss": 0.0402, "step": 5750 }, { "epoch": 2.6842473745624273, "grad_norm": 0.87890625, "learning_rate": 0.0001665670296349672, "loss": 0.0547, "step": 5751 }, { "epoch": 2.6847141190198367, "grad_norm": 1.078125, "learning_rate": 0.00016655607889093184, "loss": 0.0799, "step": 5752 }, { "epoch": 2.685180863477246, "grad_norm": 0.83203125, "learning_rate": 0.00016654512671387028, "loss": 0.0456, "step": 5753 }, { "epoch": 2.685647607934656, "grad_norm": 0.89453125, "learning_rate": 0.00016653417310401834, "loss": 0.0551, "step": 5754 }, { "epoch": 2.686114352392065, "grad_norm": 0.7578125, "learning_rate": 0.00016652321806161178, "loss": 0.0471, "step": 5755 }, { "epoch": 2.686581096849475, "grad_norm": 0.8359375, "learning_rate": 0.00016651226158688655, "loss": 0.0532, "step": 5756 }, { "epoch": 2.6870478413068843, "grad_norm": 0.9375, "learning_rate": 0.00016650130368007853, "loss": 0.0598, "step": 5757 }, { "epoch": 2.687514585764294, "grad_norm": 0.75, "learning_rate": 0.00016649034434142366, "loss": 0.0464, "step": 5758 }, { "epoch": 2.6879813302217035, "grad_norm": 1.0625, "learning_rate": 0.0001664793835711579, "loss": 0.0778, "step": 5759 }, { "epoch": 2.6884480746791133, "grad_norm": 1.0859375, "learning_rate": 0.00016646842136951723, "loss": 0.0546, "step": 5760 }, { "epoch": 2.6889148191365226, "grad_norm": 0.91796875, "learning_rate": 0.0001664574577367377, "loss": 0.0566, "step": 5761 }, { "epoch": 2.6893815635939324, "grad_norm": 0.9765625, "learning_rate": 0.00016644649267305543, "loss": 0.0671, "step": 5762 }, { "epoch": 2.689848308051342, "grad_norm": 0.87890625, "learning_rate": 0.0001664355261787064, "loss": 0.0551, "step": 5763 }, { "epoch": 2.6903150525087516, "grad_norm": 0.8828125, "learning_rate": 0.0001664245582539268, "loss": 0.0494, "step": 5764 }, { "epoch": 2.690781796966161, "grad_norm": 0.85546875, "learning_rate": 0.00016641358889895274, "loss": 0.0678, "step": 5765 }, { "epoch": 2.6912485414235707, "grad_norm": 0.66796875, "learning_rate": 0.0001664026181140204, "loss": 0.0334, "step": 5766 }, { "epoch": 2.69171528588098, "grad_norm": 1.375, "learning_rate": 0.00016639164589936608, "loss": 0.0505, "step": 5767 }, { "epoch": 2.69218203033839, "grad_norm": 0.74609375, "learning_rate": 0.00016638067225522592, "loss": 0.0348, "step": 5768 }, { "epoch": 2.6926487747957992, "grad_norm": 0.75, "learning_rate": 0.00016636969718183625, "loss": 0.0335, "step": 5769 }, { "epoch": 2.693115519253209, "grad_norm": 0.82421875, "learning_rate": 0.00016635872067943333, "loss": 0.0531, "step": 5770 }, { "epoch": 2.6935822637106184, "grad_norm": 0.90234375, "learning_rate": 0.00016634774274825357, "loss": 0.0604, "step": 5771 }, { "epoch": 2.694049008168028, "grad_norm": 0.84375, "learning_rate": 0.00016633676338853327, "loss": 0.052, "step": 5772 }, { "epoch": 2.6945157526254375, "grad_norm": 0.78515625, "learning_rate": 0.00016632578260050888, "loss": 0.0385, "step": 5773 }, { "epoch": 2.6949824970828473, "grad_norm": 0.8125, "learning_rate": 0.0001663148003844168, "loss": 0.0454, "step": 5774 }, { "epoch": 2.6954492415402567, "grad_norm": 0.875, "learning_rate": 0.00016630381674049346, "loss": 0.045, "step": 5775 }, { "epoch": 2.695915985997666, "grad_norm": 0.90625, "learning_rate": 0.0001662928316689754, "loss": 0.0376, "step": 5776 }, { "epoch": 2.696382730455076, "grad_norm": 0.69140625, "learning_rate": 0.0001662818451700991, "loss": 0.0439, "step": 5777 }, { "epoch": 2.6968494749124856, "grad_norm": 0.77734375, "learning_rate": 0.00016627085724410114, "loss": 0.0473, "step": 5778 }, { "epoch": 2.697316219369895, "grad_norm": 0.99609375, "learning_rate": 0.00016625986789121813, "loss": 0.0504, "step": 5779 }, { "epoch": 2.6977829638273043, "grad_norm": 0.73828125, "learning_rate": 0.00016624887711168658, "loss": 0.0417, "step": 5780 }, { "epoch": 2.698249708284714, "grad_norm": 0.890625, "learning_rate": 0.0001662378849057433, "loss": 0.0686, "step": 5781 }, { "epoch": 2.698716452742124, "grad_norm": 0.69140625, "learning_rate": 0.00016622689127362479, "loss": 0.0366, "step": 5782 }, { "epoch": 2.6991831971995333, "grad_norm": 1.046875, "learning_rate": 0.00016621589621556784, "loss": 0.0496, "step": 5783 }, { "epoch": 2.6996499416569426, "grad_norm": 0.76953125, "learning_rate": 0.00016620489973180917, "loss": 0.0482, "step": 5784 }, { "epoch": 2.7001166861143524, "grad_norm": 0.82421875, "learning_rate": 0.00016619390182258557, "loss": 0.05, "step": 5785 }, { "epoch": 2.700583430571762, "grad_norm": 0.90625, "learning_rate": 0.00016618290248813383, "loss": 0.0519, "step": 5786 }, { "epoch": 2.7010501750291716, "grad_norm": 0.87890625, "learning_rate": 0.00016617190172869077, "loss": 0.0533, "step": 5787 }, { "epoch": 2.701516919486581, "grad_norm": 0.9140625, "learning_rate": 0.0001661608995444932, "loss": 0.0616, "step": 5788 }, { "epoch": 2.7019836639439907, "grad_norm": 0.85546875, "learning_rate": 0.0001661498959357781, "loss": 0.063, "step": 5789 }, { "epoch": 2.7024504084014005, "grad_norm": 0.734375, "learning_rate": 0.00016613889090278235, "loss": 0.0356, "step": 5790 }, { "epoch": 2.70291715285881, "grad_norm": 0.9375, "learning_rate": 0.00016612788444574287, "loss": 0.0591, "step": 5791 }, { "epoch": 2.703383897316219, "grad_norm": 0.79296875, "learning_rate": 0.00016611687656489668, "loss": 0.052, "step": 5792 }, { "epoch": 2.703850641773629, "grad_norm": 0.703125, "learning_rate": 0.00016610586726048077, "loss": 0.0435, "step": 5793 }, { "epoch": 2.7043173862310383, "grad_norm": 0.765625, "learning_rate": 0.0001660948565327322, "loss": 0.0324, "step": 5794 }, { "epoch": 2.704784130688448, "grad_norm": 0.81640625, "learning_rate": 0.00016608384438188803, "loss": 0.0559, "step": 5795 }, { "epoch": 2.7052508751458575, "grad_norm": 0.8359375, "learning_rate": 0.0001660728308081854, "loss": 0.0503, "step": 5796 }, { "epoch": 2.7057176196032673, "grad_norm": 0.78515625, "learning_rate": 0.00016606181581186138, "loss": 0.0484, "step": 5797 }, { "epoch": 2.7061843640606766, "grad_norm": 0.765625, "learning_rate": 0.00016605079939315318, "loss": 0.0469, "step": 5798 }, { "epoch": 2.7066511085180864, "grad_norm": 0.87109375, "learning_rate": 0.00016603978155229796, "loss": 0.0489, "step": 5799 }, { "epoch": 2.707117852975496, "grad_norm": 0.76171875, "learning_rate": 0.000166028762289533, "loss": 0.0556, "step": 5800 }, { "epoch": 2.7075845974329056, "grad_norm": 0.6875, "learning_rate": 0.00016601774160509552, "loss": 0.0399, "step": 5801 }, { "epoch": 2.708051341890315, "grad_norm": 0.89453125, "learning_rate": 0.00016600671949922285, "loss": 0.0524, "step": 5802 }, { "epoch": 2.7085180863477247, "grad_norm": 0.80078125, "learning_rate": 0.00016599569597215222, "loss": 0.0522, "step": 5803 }, { "epoch": 2.708984830805134, "grad_norm": 0.62890625, "learning_rate": 0.00016598467102412102, "loss": 0.0424, "step": 5804 }, { "epoch": 2.709451575262544, "grad_norm": 0.89453125, "learning_rate": 0.00016597364465536672, "loss": 0.049, "step": 5805 }, { "epoch": 2.7099183197199532, "grad_norm": 0.81640625, "learning_rate": 0.0001659626168661266, "loss": 0.0536, "step": 5806 }, { "epoch": 2.710385064177363, "grad_norm": 0.87890625, "learning_rate": 0.00016595158765663815, "loss": 0.0551, "step": 5807 }, { "epoch": 2.7108518086347724, "grad_norm": 0.9609375, "learning_rate": 0.00016594055702713885, "loss": 0.0518, "step": 5808 }, { "epoch": 2.711318553092182, "grad_norm": 0.92578125, "learning_rate": 0.0001659295249778662, "loss": 0.0638, "step": 5809 }, { "epoch": 2.7117852975495915, "grad_norm": 0.73828125, "learning_rate": 0.0001659184915090577, "loss": 0.0435, "step": 5810 }, { "epoch": 2.7122520420070013, "grad_norm": 0.6875, "learning_rate": 0.00016590745662095098, "loss": 0.042, "step": 5811 }, { "epoch": 2.7127187864644107, "grad_norm": 0.78515625, "learning_rate": 0.00016589642031378357, "loss": 0.0487, "step": 5812 }, { "epoch": 2.7131855309218205, "grad_norm": 0.8515625, "learning_rate": 0.00016588538258779313, "loss": 0.0489, "step": 5813 }, { "epoch": 2.71365227537923, "grad_norm": 0.7109375, "learning_rate": 0.0001658743434432173, "loss": 0.0535, "step": 5814 }, { "epoch": 2.714119019836639, "grad_norm": 0.90234375, "learning_rate": 0.00016586330288029374, "loss": 0.0571, "step": 5815 }, { "epoch": 2.714585764294049, "grad_norm": 0.9375, "learning_rate": 0.00016585226089926023, "loss": 0.0726, "step": 5816 }, { "epoch": 2.7150525087514588, "grad_norm": 0.8125, "learning_rate": 0.00016584121750035444, "loss": 0.0604, "step": 5817 }, { "epoch": 2.715519253208868, "grad_norm": 0.83203125, "learning_rate": 0.00016583017268381422, "loss": 0.0666, "step": 5818 }, { "epoch": 2.7159859976662775, "grad_norm": 0.83984375, "learning_rate": 0.0001658191264498773, "loss": 0.059, "step": 5819 }, { "epoch": 2.7164527421236873, "grad_norm": 0.8046875, "learning_rate": 0.0001658080787987816, "loss": 0.0526, "step": 5820 }, { "epoch": 2.716919486581097, "grad_norm": 0.85546875, "learning_rate": 0.00016579702973076494, "loss": 0.0761, "step": 5821 }, { "epoch": 2.7173862310385064, "grad_norm": 0.8125, "learning_rate": 0.00016578597924606526, "loss": 0.0583, "step": 5822 }, { "epoch": 2.7178529754959158, "grad_norm": 0.6796875, "learning_rate": 0.0001657749273449204, "loss": 0.0348, "step": 5823 }, { "epoch": 2.7183197199533256, "grad_norm": 0.81640625, "learning_rate": 0.0001657638740275684, "loss": 0.0494, "step": 5824 }, { "epoch": 2.7187864644107353, "grad_norm": 0.79296875, "learning_rate": 0.00016575281929424719, "loss": 0.0459, "step": 5825 }, { "epoch": 2.7192532088681447, "grad_norm": 0.7265625, "learning_rate": 0.00016574176314519486, "loss": 0.0478, "step": 5826 }, { "epoch": 2.719719953325554, "grad_norm": 0.68359375, "learning_rate": 0.00016573070558064944, "loss": 0.0453, "step": 5827 }, { "epoch": 2.720186697782964, "grad_norm": 0.7734375, "learning_rate": 0.000165719646600849, "loss": 0.0349, "step": 5828 }, { "epoch": 2.7206534422403736, "grad_norm": 0.828125, "learning_rate": 0.00016570858620603164, "loss": 0.047, "step": 5829 }, { "epoch": 2.721120186697783, "grad_norm": 0.8828125, "learning_rate": 0.0001656975243964355, "loss": 0.0609, "step": 5830 }, { "epoch": 2.7215869311551923, "grad_norm": 0.76953125, "learning_rate": 0.0001656864611722988, "loss": 0.0445, "step": 5831 }, { "epoch": 2.722053675612602, "grad_norm": 1.046875, "learning_rate": 0.00016567539653385968, "loss": 0.0531, "step": 5832 }, { "epoch": 2.722520420070012, "grad_norm": 0.78515625, "learning_rate": 0.0001656643304813564, "loss": 0.0448, "step": 5833 }, { "epoch": 2.7229871645274213, "grad_norm": 0.79296875, "learning_rate": 0.00016565326301502727, "loss": 0.0535, "step": 5834 }, { "epoch": 2.7234539089848306, "grad_norm": 0.98046875, "learning_rate": 0.00016564219413511052, "loss": 0.0662, "step": 5835 }, { "epoch": 2.7239206534422404, "grad_norm": 0.57421875, "learning_rate": 0.00016563112384184448, "loss": 0.0363, "step": 5836 }, { "epoch": 2.72438739789965, "grad_norm": 0.8359375, "learning_rate": 0.00016562005213546755, "loss": 0.0469, "step": 5837 }, { "epoch": 2.7248541423570596, "grad_norm": 0.921875, "learning_rate": 0.0001656089790162181, "loss": 0.0446, "step": 5838 }, { "epoch": 2.725320886814469, "grad_norm": 0.6875, "learning_rate": 0.0001655979044843345, "loss": 0.0493, "step": 5839 }, { "epoch": 2.7257876312718787, "grad_norm": 0.86328125, "learning_rate": 0.00016558682854005526, "loss": 0.0512, "step": 5840 }, { "epoch": 2.726254375729288, "grad_norm": 0.9765625, "learning_rate": 0.0001655757511836188, "loss": 0.0723, "step": 5841 }, { "epoch": 2.726721120186698, "grad_norm": 0.796875, "learning_rate": 0.0001655646724152637, "loss": 0.0542, "step": 5842 }, { "epoch": 2.7271878646441072, "grad_norm": 1.109375, "learning_rate": 0.00016555359223522845, "loss": 0.0687, "step": 5843 }, { "epoch": 2.727654609101517, "grad_norm": 0.85546875, "learning_rate": 0.0001655425106437516, "loss": 0.0544, "step": 5844 }, { "epoch": 2.7281213535589264, "grad_norm": 0.8984375, "learning_rate": 0.00016553142764107176, "loss": 0.0425, "step": 5845 }, { "epoch": 2.728588098016336, "grad_norm": 0.75, "learning_rate": 0.00016552034322742763, "loss": 0.0508, "step": 5846 }, { "epoch": 2.7290548424737455, "grad_norm": 0.83984375, "learning_rate": 0.00016550925740305782, "loss": 0.054, "step": 5847 }, { "epoch": 2.7295215869311553, "grad_norm": 0.6484375, "learning_rate": 0.00016549817016820096, "loss": 0.041, "step": 5848 }, { "epoch": 2.7299883313885647, "grad_norm": 0.734375, "learning_rate": 0.00016548708152309583, "loss": 0.0398, "step": 5849 }, { "epoch": 2.7304550758459745, "grad_norm": 0.75, "learning_rate": 0.0001654759914679812, "loss": 0.0488, "step": 5850 }, { "epoch": 2.730921820303384, "grad_norm": 0.9140625, "learning_rate": 0.00016546490000309585, "loss": 0.0554, "step": 5851 }, { "epoch": 2.7313885647607936, "grad_norm": 0.80078125, "learning_rate": 0.00016545380712867854, "loss": 0.0413, "step": 5852 }, { "epoch": 2.731855309218203, "grad_norm": 0.82421875, "learning_rate": 0.00016544271284496813, "loss": 0.0532, "step": 5853 }, { "epoch": 2.7323220536756128, "grad_norm": 0.70703125, "learning_rate": 0.0001654316171522035, "loss": 0.0444, "step": 5854 }, { "epoch": 2.732788798133022, "grad_norm": 0.86328125, "learning_rate": 0.00016542052005062358, "loss": 0.0483, "step": 5855 }, { "epoch": 2.733255542590432, "grad_norm": 0.9609375, "learning_rate": 0.0001654094215404673, "loss": 0.0491, "step": 5856 }, { "epoch": 2.7337222870478413, "grad_norm": 0.87890625, "learning_rate": 0.00016539832162197355, "loss": 0.0706, "step": 5857 }, { "epoch": 2.7341890315052506, "grad_norm": 0.91796875, "learning_rate": 0.00016538722029538138, "loss": 0.0531, "step": 5858 }, { "epoch": 2.7346557759626604, "grad_norm": 0.8984375, "learning_rate": 0.0001653761175609298, "loss": 0.0848, "step": 5859 }, { "epoch": 2.73512252042007, "grad_norm": 0.81640625, "learning_rate": 0.00016536501341885793, "loss": 0.0406, "step": 5860 }, { "epoch": 2.7355892648774796, "grad_norm": 0.9765625, "learning_rate": 0.00016535390786940476, "loss": 0.0549, "step": 5861 }, { "epoch": 2.736056009334889, "grad_norm": 0.7265625, "learning_rate": 0.00016534280091280945, "loss": 0.0554, "step": 5862 }, { "epoch": 2.7365227537922987, "grad_norm": 0.67578125, "learning_rate": 0.00016533169254931115, "loss": 0.0373, "step": 5863 }, { "epoch": 2.7369894982497085, "grad_norm": 0.79296875, "learning_rate": 0.00016532058277914896, "loss": 0.0404, "step": 5864 }, { "epoch": 2.737456242707118, "grad_norm": 0.73046875, "learning_rate": 0.00016530947160256222, "loss": 0.0335, "step": 5865 }, { "epoch": 2.737922987164527, "grad_norm": 0.96875, "learning_rate": 0.00016529835901979005, "loss": 0.049, "step": 5866 }, { "epoch": 2.738389731621937, "grad_norm": 0.73046875, "learning_rate": 0.00016528724503107179, "loss": 0.0412, "step": 5867 }, { "epoch": 2.738856476079347, "grad_norm": 0.9609375, "learning_rate": 0.0001652761296366467, "loss": 0.0575, "step": 5868 }, { "epoch": 2.739323220536756, "grad_norm": 0.75390625, "learning_rate": 0.0001652650128367541, "loss": 0.0444, "step": 5869 }, { "epoch": 2.7397899649941655, "grad_norm": 0.96484375, "learning_rate": 0.00016525389463163337, "loss": 0.0774, "step": 5870 }, { "epoch": 2.7402567094515753, "grad_norm": 0.7578125, "learning_rate": 0.0001652427750215239, "loss": 0.04, "step": 5871 }, { "epoch": 2.740723453908985, "grad_norm": 0.67578125, "learning_rate": 0.00016523165400666504, "loss": 0.0324, "step": 5872 }, { "epoch": 2.7411901983663944, "grad_norm": 0.73046875, "learning_rate": 0.00016522053158729637, "loss": 0.0384, "step": 5873 }, { "epoch": 2.741656942823804, "grad_norm": 0.75390625, "learning_rate": 0.00016520940776365724, "loss": 0.0476, "step": 5874 }, { "epoch": 2.7421236872812136, "grad_norm": 0.890625, "learning_rate": 0.00016519828253598723, "loss": 0.0562, "step": 5875 }, { "epoch": 2.7425904317386234, "grad_norm": 0.765625, "learning_rate": 0.00016518715590452585, "loss": 0.0413, "step": 5876 }, { "epoch": 2.7430571761960327, "grad_norm": 0.90234375, "learning_rate": 0.0001651760278695127, "loss": 0.0547, "step": 5877 }, { "epoch": 2.743523920653442, "grad_norm": 0.890625, "learning_rate": 0.00016516489843118734, "loss": 0.0596, "step": 5878 }, { "epoch": 2.743990665110852, "grad_norm": 0.609375, "learning_rate": 0.0001651537675897894, "loss": 0.0338, "step": 5879 }, { "epoch": 2.7444574095682612, "grad_norm": 0.95703125, "learning_rate": 0.00016514263534555857, "loss": 0.0569, "step": 5880 }, { "epoch": 2.744924154025671, "grad_norm": 1.0546875, "learning_rate": 0.00016513150169873453, "loss": 0.0572, "step": 5881 }, { "epoch": 2.7453908984830804, "grad_norm": 0.78515625, "learning_rate": 0.00016512036664955698, "loss": 0.0499, "step": 5882 }, { "epoch": 2.74585764294049, "grad_norm": 0.82421875, "learning_rate": 0.0001651092301982657, "loss": 0.0608, "step": 5883 }, { "epoch": 2.7463243873978995, "grad_norm": 0.91796875, "learning_rate": 0.00016509809234510042, "loss": 0.0564, "step": 5884 }, { "epoch": 2.7467911318553093, "grad_norm": 0.91796875, "learning_rate": 0.00016508695309030102, "loss": 0.0456, "step": 5885 }, { "epoch": 2.7472578763127187, "grad_norm": 0.6640625, "learning_rate": 0.0001650758124341073, "loss": 0.0406, "step": 5886 }, { "epoch": 2.7477246207701285, "grad_norm": 0.83984375, "learning_rate": 0.00016506467037675913, "loss": 0.0447, "step": 5887 }, { "epoch": 2.748191365227538, "grad_norm": 0.7109375, "learning_rate": 0.00016505352691849643, "loss": 0.0414, "step": 5888 }, { "epoch": 2.7486581096849476, "grad_norm": 0.671875, "learning_rate": 0.00016504238205955908, "loss": 0.034, "step": 5889 }, { "epoch": 2.749124854142357, "grad_norm": 0.7890625, "learning_rate": 0.0001650312358001871, "loss": 0.0575, "step": 5890 }, { "epoch": 2.7495915985997668, "grad_norm": 1.015625, "learning_rate": 0.00016502008814062044, "loss": 0.0603, "step": 5891 }, { "epoch": 2.750058343057176, "grad_norm": 0.80859375, "learning_rate": 0.00016500893908109917, "loss": 0.0479, "step": 5892 }, { "epoch": 2.750525087514586, "grad_norm": 0.75, "learning_rate": 0.0001649977886218633, "loss": 0.0382, "step": 5893 }, { "epoch": 2.7509918319719953, "grad_norm": 0.78515625, "learning_rate": 0.00016498663676315294, "loss": 0.0496, "step": 5894 }, { "epoch": 2.751458576429405, "grad_norm": 0.6015625, "learning_rate": 0.0001649754835052082, "loss": 0.0374, "step": 5895 }, { "epoch": 2.7519253208868144, "grad_norm": 0.79296875, "learning_rate": 0.00016496432884826915, "loss": 0.0604, "step": 5896 }, { "epoch": 2.7519253208868144, "eval_loss": 1.6690407991409302, "eval_runtime": 94.473, "eval_samples_per_second": 19.095, "eval_steps_per_second": 2.392, "step": 5896 }, { "epoch": 2.752392065344224, "grad_norm": 0.828125, "learning_rate": 0.00016495317279257608, "loss": 0.0607, "step": 5897 }, { "epoch": 2.7528588098016336, "grad_norm": 1.015625, "learning_rate": 0.0001649420153383691, "loss": 0.0687, "step": 5898 }, { "epoch": 2.7533255542590434, "grad_norm": 0.80859375, "learning_rate": 0.00016493085648588846, "loss": 0.0367, "step": 5899 }, { "epoch": 2.7537922987164527, "grad_norm": 0.9453125, "learning_rate": 0.00016491969623537447, "loss": 0.0696, "step": 5900 }, { "epoch": 2.754259043173862, "grad_norm": 0.83203125, "learning_rate": 0.0001649085345870674, "loss": 0.0457, "step": 5901 }, { "epoch": 2.754725787631272, "grad_norm": 0.93359375, "learning_rate": 0.00016489737154120753, "loss": 0.0609, "step": 5902 }, { "epoch": 2.7551925320886816, "grad_norm": 0.953125, "learning_rate": 0.00016488620709803527, "loss": 0.0587, "step": 5903 }, { "epoch": 2.755659276546091, "grad_norm": 0.87109375, "learning_rate": 0.00016487504125779098, "loss": 0.0489, "step": 5904 }, { "epoch": 2.7561260210035003, "grad_norm": 0.9140625, "learning_rate": 0.00016486387402071503, "loss": 0.0575, "step": 5905 }, { "epoch": 2.75659276546091, "grad_norm": 0.80859375, "learning_rate": 0.00016485270538704795, "loss": 0.0476, "step": 5906 }, { "epoch": 2.75705950991832, "grad_norm": 0.87109375, "learning_rate": 0.00016484153535703015, "loss": 0.0516, "step": 5907 }, { "epoch": 2.7575262543757293, "grad_norm": 0.63671875, "learning_rate": 0.00016483036393090216, "loss": 0.0407, "step": 5908 }, { "epoch": 2.7579929988331386, "grad_norm": 0.90234375, "learning_rate": 0.0001648191911089045, "loss": 0.0505, "step": 5909 }, { "epoch": 2.7584597432905484, "grad_norm": 0.76953125, "learning_rate": 0.00016480801689127775, "loss": 0.0542, "step": 5910 }, { "epoch": 2.7589264877479582, "grad_norm": 1.0859375, "learning_rate": 0.00016479684127826248, "loss": 0.0603, "step": 5911 }, { "epoch": 2.7593932322053676, "grad_norm": 0.953125, "learning_rate": 0.0001647856642700993, "loss": 0.0573, "step": 5912 }, { "epoch": 2.759859976662777, "grad_norm": 0.765625, "learning_rate": 0.0001647744858670289, "loss": 0.043, "step": 5913 }, { "epoch": 2.7603267211201867, "grad_norm": 0.9140625, "learning_rate": 0.00016476330606929196, "loss": 0.0533, "step": 5914 }, { "epoch": 2.7607934655775965, "grad_norm": 0.75, "learning_rate": 0.0001647521248771292, "loss": 0.0417, "step": 5915 }, { "epoch": 2.761260210035006, "grad_norm": 0.73046875, "learning_rate": 0.0001647409422907813, "loss": 0.0409, "step": 5916 }, { "epoch": 2.7617269544924152, "grad_norm": 0.77734375, "learning_rate": 0.00016472975831048912, "loss": 0.0498, "step": 5917 }, { "epoch": 2.762193698949825, "grad_norm": 0.69921875, "learning_rate": 0.00016471857293649342, "loss": 0.0475, "step": 5918 }, { "epoch": 2.7626604434072344, "grad_norm": 0.88671875, "learning_rate": 0.00016470738616903503, "loss": 0.08, "step": 5919 }, { "epoch": 2.763127187864644, "grad_norm": 0.75390625, "learning_rate": 0.00016469619800835484, "loss": 0.043, "step": 5920 }, { "epoch": 2.7635939323220535, "grad_norm": 0.765625, "learning_rate": 0.00016468500845469368, "loss": 0.0513, "step": 5921 }, { "epoch": 2.7640606767794633, "grad_norm": 0.88671875, "learning_rate": 0.00016467381750829256, "loss": 0.0445, "step": 5922 }, { "epoch": 2.7645274212368727, "grad_norm": 0.6328125, "learning_rate": 0.00016466262516939237, "loss": 0.037, "step": 5923 }, { "epoch": 2.7649941656942825, "grad_norm": 0.703125, "learning_rate": 0.00016465143143823413, "loss": 0.0442, "step": 5924 }, { "epoch": 2.765460910151692, "grad_norm": 0.74609375, "learning_rate": 0.00016464023631505885, "loss": 0.0421, "step": 5925 }, { "epoch": 2.7659276546091016, "grad_norm": 0.7109375, "learning_rate": 0.00016462903980010756, "loss": 0.0462, "step": 5926 }, { "epoch": 2.766394399066511, "grad_norm": 0.7109375, "learning_rate": 0.00016461784189362132, "loss": 0.0454, "step": 5927 }, { "epoch": 2.7668611435239208, "grad_norm": 0.77734375, "learning_rate": 0.00016460664259584123, "loss": 0.0459, "step": 5928 }, { "epoch": 2.76732788798133, "grad_norm": 0.69921875, "learning_rate": 0.00016459544190700848, "loss": 0.0352, "step": 5929 }, { "epoch": 2.76779463243874, "grad_norm": 0.94921875, "learning_rate": 0.00016458423982736417, "loss": 0.046, "step": 5930 }, { "epoch": 2.7682613768961493, "grad_norm": 0.80078125, "learning_rate": 0.00016457303635714955, "loss": 0.0544, "step": 5931 }, { "epoch": 2.768728121353559, "grad_norm": 0.80859375, "learning_rate": 0.0001645618314966058, "loss": 0.0621, "step": 5932 }, { "epoch": 2.7691948658109684, "grad_norm": 0.72265625, "learning_rate": 0.00016455062524597418, "loss": 0.0486, "step": 5933 }, { "epoch": 2.769661610268378, "grad_norm": 0.91015625, "learning_rate": 0.00016453941760549595, "loss": 0.0445, "step": 5934 }, { "epoch": 2.7701283547257876, "grad_norm": 0.69921875, "learning_rate": 0.0001645282085754125, "loss": 0.0393, "step": 5935 }, { "epoch": 2.7705950991831974, "grad_norm": 0.92578125, "learning_rate": 0.0001645169981559651, "loss": 0.0632, "step": 5936 }, { "epoch": 2.7710618436406067, "grad_norm": 0.99609375, "learning_rate": 0.00016450578634739514, "loss": 0.0792, "step": 5937 }, { "epoch": 2.7715285880980165, "grad_norm": 0.796875, "learning_rate": 0.00016449457314994406, "loss": 0.046, "step": 5938 }, { "epoch": 2.771995332555426, "grad_norm": 0.78515625, "learning_rate": 0.00016448335856385321, "loss": 0.0462, "step": 5939 }, { "epoch": 2.772462077012835, "grad_norm": 0.81640625, "learning_rate": 0.00016447214258936415, "loss": 0.0438, "step": 5940 }, { "epoch": 2.772928821470245, "grad_norm": 0.953125, "learning_rate": 0.00016446092522671833, "loss": 0.0479, "step": 5941 }, { "epoch": 2.773395565927655, "grad_norm": 0.81640625, "learning_rate": 0.00016444970647615725, "loss": 0.042, "step": 5942 }, { "epoch": 2.773862310385064, "grad_norm": 0.73828125, "learning_rate": 0.00016443848633792248, "loss": 0.0386, "step": 5943 }, { "epoch": 2.7743290548424735, "grad_norm": 0.74609375, "learning_rate": 0.0001644272648122556, "loss": 0.0447, "step": 5944 }, { "epoch": 2.7747957992998833, "grad_norm": 0.890625, "learning_rate": 0.00016441604189939822, "loss": 0.054, "step": 5945 }, { "epoch": 2.775262543757293, "grad_norm": 0.8203125, "learning_rate": 0.000164404817599592, "loss": 0.0507, "step": 5946 }, { "epoch": 2.7757292882147024, "grad_norm": 0.91796875, "learning_rate": 0.00016439359191307862, "loss": 0.0482, "step": 5947 }, { "epoch": 2.776196032672112, "grad_norm": 0.75, "learning_rate": 0.00016438236484009973, "loss": 0.0437, "step": 5948 }, { "epoch": 2.7766627771295216, "grad_norm": 0.88671875, "learning_rate": 0.0001643711363808971, "loss": 0.0553, "step": 5949 }, { "epoch": 2.7771295215869314, "grad_norm": 0.71484375, "learning_rate": 0.00016435990653571248, "loss": 0.0476, "step": 5950 }, { "epoch": 2.7775962660443407, "grad_norm": 0.7890625, "learning_rate": 0.00016434867530478768, "loss": 0.0452, "step": 5951 }, { "epoch": 2.77806301050175, "grad_norm": 0.84375, "learning_rate": 0.00016433744268836445, "loss": 0.046, "step": 5952 }, { "epoch": 2.77852975495916, "grad_norm": 0.8359375, "learning_rate": 0.00016432620868668475, "loss": 0.0622, "step": 5953 }, { "epoch": 2.7789964994165697, "grad_norm": 0.65234375, "learning_rate": 0.00016431497329999042, "loss": 0.0381, "step": 5954 }, { "epoch": 2.779463243873979, "grad_norm": 0.83984375, "learning_rate": 0.00016430373652852329, "loss": 0.0402, "step": 5955 }, { "epoch": 2.7799299883313884, "grad_norm": 0.921875, "learning_rate": 0.0001642924983725254, "loss": 0.0563, "step": 5956 }, { "epoch": 2.780396732788798, "grad_norm": 0.9609375, "learning_rate": 0.00016428125883223872, "loss": 0.0556, "step": 5957 }, { "epoch": 2.780863477246208, "grad_norm": 0.85546875, "learning_rate": 0.00016427001790790516, "loss": 0.0433, "step": 5958 }, { "epoch": 2.7813302217036173, "grad_norm": 0.828125, "learning_rate": 0.00016425877559976684, "loss": 0.0408, "step": 5959 }, { "epoch": 2.7817969661610267, "grad_norm": 0.78515625, "learning_rate": 0.00016424753190806578, "loss": 0.0279, "step": 5960 }, { "epoch": 2.7822637106184365, "grad_norm": 0.734375, "learning_rate": 0.00016423628683304408, "loss": 0.0431, "step": 5961 }, { "epoch": 2.782730455075846, "grad_norm": 0.8828125, "learning_rate": 0.00016422504037494383, "loss": 0.0491, "step": 5962 }, { "epoch": 2.7831971995332556, "grad_norm": 0.71875, "learning_rate": 0.00016421379253400722, "loss": 0.0346, "step": 5963 }, { "epoch": 2.783663943990665, "grad_norm": 0.96875, "learning_rate": 0.0001642025433104764, "loss": 0.0416, "step": 5964 }, { "epoch": 2.7841306884480748, "grad_norm": 0.90234375, "learning_rate": 0.0001641912927045936, "loss": 0.0521, "step": 5965 }, { "epoch": 2.784597432905484, "grad_norm": 0.7578125, "learning_rate": 0.00016418004071660103, "loss": 0.0472, "step": 5966 }, { "epoch": 2.785064177362894, "grad_norm": 0.90625, "learning_rate": 0.000164168787346741, "loss": 0.0544, "step": 5967 }, { "epoch": 2.7855309218203033, "grad_norm": 0.7734375, "learning_rate": 0.00016415753259525577, "loss": 0.0585, "step": 5968 }, { "epoch": 2.785997666277713, "grad_norm": 0.8515625, "learning_rate": 0.00016414627646238767, "loss": 0.0417, "step": 5969 }, { "epoch": 2.7864644107351224, "grad_norm": 0.859375, "learning_rate": 0.0001641350189483791, "loss": 0.0487, "step": 5970 }, { "epoch": 2.786931155192532, "grad_norm": 0.8203125, "learning_rate": 0.00016412376005347238, "loss": 0.0492, "step": 5971 }, { "epoch": 2.7873978996499416, "grad_norm": 0.82421875, "learning_rate": 0.00016411249977790998, "loss": 0.0459, "step": 5972 }, { "epoch": 2.7878646441073514, "grad_norm": 0.8984375, "learning_rate": 0.00016410123812193427, "loss": 0.051, "step": 5973 }, { "epoch": 2.7883313885647607, "grad_norm": 0.859375, "learning_rate": 0.00016408997508578785, "loss": 0.0437, "step": 5974 }, { "epoch": 2.7887981330221705, "grad_norm": 1.0859375, "learning_rate": 0.0001640787106697131, "loss": 0.0582, "step": 5975 }, { "epoch": 2.78926487747958, "grad_norm": 0.8046875, "learning_rate": 0.00016406744487395267, "loss": 0.0427, "step": 5976 }, { "epoch": 2.7897316219369896, "grad_norm": 0.8125, "learning_rate": 0.00016405617769874904, "loss": 0.0564, "step": 5977 }, { "epoch": 2.790198366394399, "grad_norm": 0.91015625, "learning_rate": 0.00016404490914434484, "loss": 0.0509, "step": 5978 }, { "epoch": 2.790665110851809, "grad_norm": 0.57421875, "learning_rate": 0.00016403363921098268, "loss": 0.0337, "step": 5979 }, { "epoch": 2.791131855309218, "grad_norm": 0.98046875, "learning_rate": 0.0001640223678989052, "loss": 0.0478, "step": 5980 }, { "epoch": 2.791598599766628, "grad_norm": 0.92578125, "learning_rate": 0.00016401109520835516, "loss": 0.0484, "step": 5981 }, { "epoch": 2.7920653442240373, "grad_norm": 1.0, "learning_rate": 0.00016399982113957516, "loss": 0.0569, "step": 5982 }, { "epoch": 2.7925320886814466, "grad_norm": 0.890625, "learning_rate": 0.00016398854569280803, "loss": 0.0718, "step": 5983 }, { "epoch": 2.7929988331388564, "grad_norm": 0.7890625, "learning_rate": 0.00016397726886829652, "loss": 0.0609, "step": 5984 }, { "epoch": 2.7934655775962662, "grad_norm": 0.92578125, "learning_rate": 0.00016396599066628343, "loss": 0.0578, "step": 5985 }, { "epoch": 2.7939323220536756, "grad_norm": 0.8125, "learning_rate": 0.00016395471108701158, "loss": 0.0606, "step": 5986 }, { "epoch": 2.794399066511085, "grad_norm": 0.7578125, "learning_rate": 0.00016394343013072382, "loss": 0.0394, "step": 5987 }, { "epoch": 2.7948658109684947, "grad_norm": 0.89453125, "learning_rate": 0.0001639321477976631, "loss": 0.0475, "step": 5988 }, { "epoch": 2.7953325554259045, "grad_norm": 0.89453125, "learning_rate": 0.00016392086408807227, "loss": 0.051, "step": 5989 }, { "epoch": 2.795799299883314, "grad_norm": 0.7421875, "learning_rate": 0.0001639095790021943, "loss": 0.0525, "step": 5990 }, { "epoch": 2.7962660443407232, "grad_norm": 0.84765625, "learning_rate": 0.00016389829254027225, "loss": 0.0578, "step": 5991 }, { "epoch": 2.796732788798133, "grad_norm": 0.90234375, "learning_rate": 0.00016388700470254902, "loss": 0.0523, "step": 5992 }, { "epoch": 2.797199533255543, "grad_norm": 0.7109375, "learning_rate": 0.0001638757154892677, "loss": 0.0327, "step": 5993 }, { "epoch": 2.797666277712952, "grad_norm": 0.734375, "learning_rate": 0.00016386442490067137, "loss": 0.045, "step": 5994 }, { "epoch": 2.7981330221703615, "grad_norm": 0.765625, "learning_rate": 0.0001638531329370031, "loss": 0.0533, "step": 5995 }, { "epoch": 2.7985997666277713, "grad_norm": 1.2421875, "learning_rate": 0.00016384183959850605, "loss": 0.0584, "step": 5996 }, { "epoch": 2.799066511085181, "grad_norm": 0.796875, "learning_rate": 0.00016383054488542338, "loss": 0.0549, "step": 5997 }, { "epoch": 2.7995332555425905, "grad_norm": 0.62890625, "learning_rate": 0.00016381924879799818, "loss": 0.0359, "step": 5998 }, { "epoch": 2.8, "grad_norm": 0.91796875, "learning_rate": 0.00016380795133647382, "loss": 0.0486, "step": 5999 }, { "epoch": 2.8004667444574096, "grad_norm": 0.71484375, "learning_rate": 0.00016379665250109343, "loss": 0.0416, "step": 6000 }, { "epoch": 2.8009334889148194, "grad_norm": 0.55859375, "learning_rate": 0.00016378535229210034, "loss": 0.0338, "step": 6001 }, { "epoch": 2.8014002333722288, "grad_norm": 0.98046875, "learning_rate": 0.00016377405070973784, "loss": 0.0697, "step": 6002 }, { "epoch": 2.801866977829638, "grad_norm": 0.92578125, "learning_rate": 0.00016376274775424928, "loss": 0.0559, "step": 6003 }, { "epoch": 2.802333722287048, "grad_norm": 0.75390625, "learning_rate": 0.00016375144342587798, "loss": 0.0439, "step": 6004 }, { "epoch": 2.8028004667444573, "grad_norm": 0.8984375, "learning_rate": 0.0001637401377248674, "loss": 0.0528, "step": 6005 }, { "epoch": 2.803267211201867, "grad_norm": 0.875, "learning_rate": 0.00016372883065146092, "loss": 0.0523, "step": 6006 }, { "epoch": 2.8037339556592764, "grad_norm": 0.765625, "learning_rate": 0.00016371752220590202, "loss": 0.0368, "step": 6007 }, { "epoch": 2.804200700116686, "grad_norm": 0.81640625, "learning_rate": 0.00016370621238843419, "loss": 0.0493, "step": 6008 }, { "epoch": 2.8046674445740956, "grad_norm": 0.75, "learning_rate": 0.00016369490119930088, "loss": 0.0391, "step": 6009 }, { "epoch": 2.8051341890315054, "grad_norm": 0.890625, "learning_rate": 0.0001636835886387457, "loss": 0.047, "step": 6010 }, { "epoch": 2.8056009334889147, "grad_norm": 0.8515625, "learning_rate": 0.0001636722747070122, "loss": 0.0456, "step": 6011 }, { "epoch": 2.8060676779463245, "grad_norm": 0.70703125, "learning_rate": 0.000163660959404344, "loss": 0.0423, "step": 6012 }, { "epoch": 2.806534422403734, "grad_norm": 0.734375, "learning_rate": 0.00016364964273098468, "loss": 0.0373, "step": 6013 }, { "epoch": 2.8070011668611436, "grad_norm": 0.66796875, "learning_rate": 0.00016363832468717794, "loss": 0.039, "step": 6014 }, { "epoch": 2.807467911318553, "grad_norm": 0.98828125, "learning_rate": 0.00016362700527316747, "loss": 0.0485, "step": 6015 }, { "epoch": 2.807934655775963, "grad_norm": 0.625, "learning_rate": 0.00016361568448919694, "loss": 0.0353, "step": 6016 }, { "epoch": 2.808401400233372, "grad_norm": 0.6953125, "learning_rate": 0.00016360436233551018, "loss": 0.0362, "step": 6017 }, { "epoch": 2.808868144690782, "grad_norm": 0.8984375, "learning_rate": 0.00016359303881235092, "loss": 0.0504, "step": 6018 }, { "epoch": 2.8093348891481913, "grad_norm": 0.77734375, "learning_rate": 0.000163581713919963, "loss": 0.0524, "step": 6019 }, { "epoch": 2.809801633605601, "grad_norm": 0.9453125, "learning_rate": 0.0001635703876585902, "loss": 0.0462, "step": 6020 }, { "epoch": 2.8102683780630104, "grad_norm": 0.96875, "learning_rate": 0.00016355906002847644, "loss": 0.0519, "step": 6021 }, { "epoch": 2.81073512252042, "grad_norm": 0.8046875, "learning_rate": 0.0001635477310298656, "loss": 0.0506, "step": 6022 }, { "epoch": 2.8112018669778296, "grad_norm": 0.65234375, "learning_rate": 0.00016353640066300157, "loss": 0.0497, "step": 6023 }, { "epoch": 2.8116686114352394, "grad_norm": 0.6875, "learning_rate": 0.00016352506892812837, "loss": 0.0428, "step": 6024 }, { "epoch": 2.8121353558926487, "grad_norm": 0.88671875, "learning_rate": 0.00016351373582548995, "loss": 0.058, "step": 6025 }, { "epoch": 2.812602100350058, "grad_norm": 0.640625, "learning_rate": 0.0001635024013553303, "loss": 0.0295, "step": 6026 }, { "epoch": 2.813068844807468, "grad_norm": 0.890625, "learning_rate": 0.00016349106551789353, "loss": 0.0485, "step": 6027 }, { "epoch": 2.8135355892648777, "grad_norm": 0.6640625, "learning_rate": 0.0001634797283134237, "loss": 0.0401, "step": 6028 }, { "epoch": 2.814002333722287, "grad_norm": 0.9453125, "learning_rate": 0.0001634683897421648, "loss": 0.0537, "step": 6029 }, { "epoch": 2.8144690781796964, "grad_norm": 0.88671875, "learning_rate": 0.00016345704980436115, "loss": 0.0559, "step": 6030 }, { "epoch": 2.814935822637106, "grad_norm": 0.890625, "learning_rate": 0.00016344570850025675, "loss": 0.0449, "step": 6031 }, { "epoch": 2.815402567094516, "grad_norm": 0.87109375, "learning_rate": 0.00016343436583009582, "loss": 0.0477, "step": 6032 }, { "epoch": 2.8158693115519253, "grad_norm": 0.69140625, "learning_rate": 0.00016342302179412268, "loss": 0.0419, "step": 6033 }, { "epoch": 2.8163360560093347, "grad_norm": 0.734375, "learning_rate": 0.00016341167639258148, "loss": 0.0339, "step": 6034 }, { "epoch": 2.8168028004667445, "grad_norm": 0.79296875, "learning_rate": 0.00016340032962571655, "loss": 0.0535, "step": 6035 }, { "epoch": 2.8172695449241543, "grad_norm": 0.69921875, "learning_rate": 0.00016338898149377212, "loss": 0.0428, "step": 6036 }, { "epoch": 2.8177362893815636, "grad_norm": 0.87109375, "learning_rate": 0.00016337763199699264, "loss": 0.0565, "step": 6037 }, { "epoch": 2.818203033838973, "grad_norm": 0.921875, "learning_rate": 0.00016336628113562243, "loss": 0.0521, "step": 6038 }, { "epoch": 2.8186697782963828, "grad_norm": 0.91796875, "learning_rate": 0.00016335492890990586, "loss": 0.0466, "step": 6039 }, { "epoch": 2.8191365227537926, "grad_norm": 0.5859375, "learning_rate": 0.0001633435753200874, "loss": 0.0342, "step": 6040 }, { "epoch": 2.819603267211202, "grad_norm": 0.65234375, "learning_rate": 0.00016333222036641148, "loss": 0.0322, "step": 6041 }, { "epoch": 2.8200700116686113, "grad_norm": 0.8984375, "learning_rate": 0.00016332086404912254, "loss": 0.0379, "step": 6042 }, { "epoch": 2.820536756126021, "grad_norm": 0.73828125, "learning_rate": 0.0001633095063684652, "loss": 0.0434, "step": 6043 }, { "epoch": 2.8210035005834304, "grad_norm": 0.76953125, "learning_rate": 0.00016329814732468393, "loss": 0.0434, "step": 6044 }, { "epoch": 2.82147024504084, "grad_norm": 0.72265625, "learning_rate": 0.0001632867869180233, "loss": 0.0313, "step": 6045 }, { "epoch": 2.8219369894982496, "grad_norm": 1.0234375, "learning_rate": 0.00016327542514872793, "loss": 0.0633, "step": 6046 }, { "epoch": 2.8224037339556594, "grad_norm": 0.8125, "learning_rate": 0.00016326406201704248, "loss": 0.0593, "step": 6047 }, { "epoch": 2.8228704784130687, "grad_norm": 0.92578125, "learning_rate": 0.00016325269752321154, "loss": 0.0357, "step": 6048 }, { "epoch": 2.8233372228704785, "grad_norm": 0.765625, "learning_rate": 0.00016324133166747986, "loss": 0.0378, "step": 6049 }, { "epoch": 2.823803967327888, "grad_norm": 0.734375, "learning_rate": 0.00016322996445009214, "loss": 0.0421, "step": 6050 }, { "epoch": 2.8242707117852976, "grad_norm": 0.625, "learning_rate": 0.00016321859587129317, "loss": 0.029, "step": 6051 }, { "epoch": 2.824737456242707, "grad_norm": 0.82421875, "learning_rate": 0.0001632072259313276, "loss": 0.0517, "step": 6052 }, { "epoch": 2.825204200700117, "grad_norm": 0.79296875, "learning_rate": 0.0001631958546304404, "loss": 0.0329, "step": 6053 }, { "epoch": 2.825670945157526, "grad_norm": 0.6875, "learning_rate": 0.00016318448196887632, "loss": 0.0418, "step": 6054 }, { "epoch": 2.826137689614936, "grad_norm": 0.83984375, "learning_rate": 0.00016317310794688024, "loss": 0.0446, "step": 6055 }, { "epoch": 2.8266044340723453, "grad_norm": 0.95703125, "learning_rate": 0.00016316173256469702, "loss": 0.0426, "step": 6056 }, { "epoch": 2.827071178529755, "grad_norm": 0.8203125, "learning_rate": 0.00016315035582257167, "loss": 0.0565, "step": 6057 }, { "epoch": 2.8275379229871644, "grad_norm": 0.98828125, "learning_rate": 0.00016313897772074904, "loss": 0.061, "step": 6058 }, { "epoch": 2.8280046674445742, "grad_norm": 1.1015625, "learning_rate": 0.0001631275982594742, "loss": 0.0561, "step": 6059 }, { "epoch": 2.8284714119019836, "grad_norm": 0.6484375, "learning_rate": 0.00016311621743899212, "loss": 0.0382, "step": 6060 }, { "epoch": 2.8289381563593934, "grad_norm": 0.703125, "learning_rate": 0.00016310483525954785, "loss": 0.0282, "step": 6061 }, { "epoch": 2.8294049008168027, "grad_norm": 0.80859375, "learning_rate": 0.00016309345172138646, "loss": 0.0656, "step": 6062 }, { "epoch": 2.8298716452742125, "grad_norm": 0.64453125, "learning_rate": 0.00016308206682475306, "loss": 0.0407, "step": 6063 }, { "epoch": 2.830338389731622, "grad_norm": 0.8515625, "learning_rate": 0.00016307068056989275, "loss": 0.0541, "step": 6064 }, { "epoch": 2.8308051341890312, "grad_norm": 0.7265625, "learning_rate": 0.00016305929295705075, "loss": 0.0409, "step": 6065 }, { "epoch": 2.831271878646441, "grad_norm": 0.94140625, "learning_rate": 0.00016304790398647217, "loss": 0.0506, "step": 6066 }, { "epoch": 2.831738623103851, "grad_norm": 0.75, "learning_rate": 0.00016303651365840227, "loss": 0.0417, "step": 6067 }, { "epoch": 2.83220536756126, "grad_norm": 0.6640625, "learning_rate": 0.0001630251219730863, "loss": 0.0372, "step": 6068 }, { "epoch": 2.8326721120186695, "grad_norm": 0.8359375, "learning_rate": 0.00016301372893076952, "loss": 0.0485, "step": 6069 }, { "epoch": 2.8331388564760793, "grad_norm": 0.55859375, "learning_rate": 0.00016300233453169725, "loss": 0.0398, "step": 6070 }, { "epoch": 2.833605600933489, "grad_norm": 0.84375, "learning_rate": 0.0001629909387761148, "loss": 0.0493, "step": 6071 }, { "epoch": 2.8340723453908985, "grad_norm": 0.921875, "learning_rate": 0.00016297954166426756, "loss": 0.0467, "step": 6072 }, { "epoch": 2.834539089848308, "grad_norm": 0.90234375, "learning_rate": 0.0001629681431964009, "loss": 0.0519, "step": 6073 }, { "epoch": 2.8350058343057176, "grad_norm": 0.828125, "learning_rate": 0.00016295674337276026, "loss": 0.0494, "step": 6074 }, { "epoch": 2.8354725787631274, "grad_norm": 1.09375, "learning_rate": 0.0001629453421935911, "loss": 0.0501, "step": 6075 }, { "epoch": 2.8359393232205368, "grad_norm": 0.84375, "learning_rate": 0.00016293393965913886, "loss": 0.042, "step": 6076 }, { "epoch": 2.836406067677946, "grad_norm": 0.6953125, "learning_rate": 0.00016292253576964907, "loss": 0.04, "step": 6077 }, { "epoch": 2.836872812135356, "grad_norm": 0.7890625, "learning_rate": 0.00016291113052536727, "loss": 0.04, "step": 6078 }, { "epoch": 2.8373395565927657, "grad_norm": 0.96875, "learning_rate": 0.00016289972392653906, "loss": 0.0555, "step": 6079 }, { "epoch": 2.837806301050175, "grad_norm": 1.1171875, "learning_rate": 0.00016288831597340998, "loss": 0.0645, "step": 6080 }, { "epoch": 2.8382730455075844, "grad_norm": 1.046875, "learning_rate": 0.0001628769066662257, "loss": 0.0597, "step": 6081 }, { "epoch": 2.838739789964994, "grad_norm": 0.7265625, "learning_rate": 0.00016286549600523183, "loss": 0.0455, "step": 6082 }, { "epoch": 2.839206534422404, "grad_norm": 0.765625, "learning_rate": 0.00016285408399067407, "loss": 0.0363, "step": 6083 }, { "epoch": 2.8396732788798134, "grad_norm": 0.81640625, "learning_rate": 0.0001628426706227982, "loss": 0.0447, "step": 6084 }, { "epoch": 2.8401400233372227, "grad_norm": 0.703125, "learning_rate": 0.00016283125590184984, "loss": 0.039, "step": 6085 }, { "epoch": 2.8406067677946325, "grad_norm": 1.125, "learning_rate": 0.00016281983982807487, "loss": 0.0575, "step": 6086 }, { "epoch": 2.841073512252042, "grad_norm": 0.75390625, "learning_rate": 0.00016280842240171906, "loss": 0.0467, "step": 6087 }, { "epoch": 2.8415402567094517, "grad_norm": 0.9921875, "learning_rate": 0.00016279700362302818, "loss": 0.0634, "step": 6088 }, { "epoch": 2.842007001166861, "grad_norm": 0.73046875, "learning_rate": 0.00016278558349224818, "loss": 0.0458, "step": 6089 }, { "epoch": 2.842473745624271, "grad_norm": 0.7421875, "learning_rate": 0.0001627741620096249, "loss": 0.0663, "step": 6090 }, { "epoch": 2.84294049008168, "grad_norm": 0.8984375, "learning_rate": 0.00016276273917540423, "loss": 0.045, "step": 6091 }, { "epoch": 2.84340723453909, "grad_norm": 0.828125, "learning_rate": 0.0001627513149898322, "loss": 0.0505, "step": 6092 }, { "epoch": 2.8438739789964993, "grad_norm": 0.9453125, "learning_rate": 0.0001627398894531547, "loss": 0.0537, "step": 6093 }, { "epoch": 2.844340723453909, "grad_norm": 0.73828125, "learning_rate": 0.00016272846256561779, "loss": 0.0461, "step": 6094 }, { "epoch": 2.8448074679113184, "grad_norm": 0.77734375, "learning_rate": 0.00016271703432746746, "loss": 0.0429, "step": 6095 }, { "epoch": 2.8452742123687282, "grad_norm": 0.85546875, "learning_rate": 0.0001627056047389498, "loss": 0.0422, "step": 6096 }, { "epoch": 2.8457409568261376, "grad_norm": 0.84765625, "learning_rate": 0.00016269417380031088, "loss": 0.0587, "step": 6097 }, { "epoch": 2.8462077012835474, "grad_norm": 0.5859375, "learning_rate": 0.00016268274151179688, "loss": 0.0315, "step": 6098 }, { "epoch": 2.8466744457409567, "grad_norm": 0.7578125, "learning_rate": 0.00016267130787365387, "loss": 0.0375, "step": 6099 }, { "epoch": 2.8471411901983665, "grad_norm": 1.015625, "learning_rate": 0.0001626598728861281, "loss": 0.0541, "step": 6100 }, { "epoch": 2.847607934655776, "grad_norm": 0.8515625, "learning_rate": 0.00016264843654946573, "loss": 0.0509, "step": 6101 }, { "epoch": 2.8480746791131857, "grad_norm": 0.6640625, "learning_rate": 0.00016263699886391302, "loss": 0.0442, "step": 6102 }, { "epoch": 2.848541423570595, "grad_norm": 0.72265625, "learning_rate": 0.0001626255598297162, "loss": 0.0413, "step": 6103 }, { "epoch": 2.849008168028005, "grad_norm": 0.73828125, "learning_rate": 0.00016261411944712163, "loss": 0.0432, "step": 6104 }, { "epoch": 2.849474912485414, "grad_norm": 0.984375, "learning_rate": 0.00016260267771637556, "loss": 0.0496, "step": 6105 }, { "epoch": 2.849941656942824, "grad_norm": 0.953125, "learning_rate": 0.00016259123463772439, "loss": 0.0519, "step": 6106 }, { "epoch": 2.8504084014002333, "grad_norm": 0.84375, "learning_rate": 0.00016257979021141447, "loss": 0.0419, "step": 6107 }, { "epoch": 2.8508751458576427, "grad_norm": 0.72265625, "learning_rate": 0.0001625683444376923, "loss": 0.0363, "step": 6108 }, { "epoch": 2.8513418903150525, "grad_norm": 0.72265625, "learning_rate": 0.0001625568973168042, "loss": 0.0282, "step": 6109 }, { "epoch": 2.8518086347724623, "grad_norm": 0.76953125, "learning_rate": 0.00016254544884899673, "loss": 0.0449, "step": 6110 }, { "epoch": 2.8522753792298716, "grad_norm": 0.671875, "learning_rate": 0.00016253399903451632, "loss": 0.0342, "step": 6111 }, { "epoch": 2.852742123687281, "grad_norm": 0.84765625, "learning_rate": 0.00016252254787360953, "loss": 0.0421, "step": 6112 }, { "epoch": 2.8532088681446908, "grad_norm": 0.78515625, "learning_rate": 0.0001625110953665229, "loss": 0.042, "step": 6113 }, { "epoch": 2.8536756126021006, "grad_norm": 0.9296875, "learning_rate": 0.00016249964151350305, "loss": 0.0431, "step": 6114 }, { "epoch": 2.85414235705951, "grad_norm": 0.8671875, "learning_rate": 0.0001624881863147966, "loss": 0.0433, "step": 6115 }, { "epoch": 2.8546091015169193, "grad_norm": 0.75390625, "learning_rate": 0.00016247672977065013, "loss": 0.0379, "step": 6116 }, { "epoch": 2.855075845974329, "grad_norm": 0.82421875, "learning_rate": 0.00016246527188131036, "loss": 0.0548, "step": 6117 }, { "epoch": 2.855542590431739, "grad_norm": 0.6171875, "learning_rate": 0.00016245381264702398, "loss": 0.0384, "step": 6118 }, { "epoch": 2.856009334889148, "grad_norm": 0.8671875, "learning_rate": 0.00016244235206803773, "loss": 0.0688, "step": 6119 }, { "epoch": 2.8564760793465576, "grad_norm": 0.9296875, "learning_rate": 0.00016243089014459833, "loss": 0.0643, "step": 6120 }, { "epoch": 2.8569428238039674, "grad_norm": 0.88671875, "learning_rate": 0.00016241942687695261, "loss": 0.0369, "step": 6121 }, { "epoch": 2.857409568261377, "grad_norm": 0.796875, "learning_rate": 0.0001624079622653474, "loss": 0.0323, "step": 6122 }, { "epoch": 2.8578763127187865, "grad_norm": 0.72265625, "learning_rate": 0.00016239649631002946, "loss": 0.0331, "step": 6123 }, { "epoch": 2.858343057176196, "grad_norm": 0.8046875, "learning_rate": 0.0001623850290112458, "loss": 0.0314, "step": 6124 }, { "epoch": 2.8588098016336057, "grad_norm": 0.99609375, "learning_rate": 0.0001623735603692432, "loss": 0.0503, "step": 6125 }, { "epoch": 2.859276546091015, "grad_norm": 0.77734375, "learning_rate": 0.00016236209038426863, "loss": 0.0458, "step": 6126 }, { "epoch": 2.859743290548425, "grad_norm": 0.66796875, "learning_rate": 0.0001623506190565691, "loss": 0.0375, "step": 6127 }, { "epoch": 2.860210035005834, "grad_norm": 0.78125, "learning_rate": 0.00016233914638639154, "loss": 0.0614, "step": 6128 }, { "epoch": 2.860676779463244, "grad_norm": 0.53515625, "learning_rate": 0.00016232767237398302, "loss": 0.0401, "step": 6129 }, { "epoch": 2.8611435239206533, "grad_norm": 0.8203125, "learning_rate": 0.0001623161970195905, "loss": 0.0561, "step": 6130 }, { "epoch": 2.861610268378063, "grad_norm": 0.70703125, "learning_rate": 0.00016230472032346117, "loss": 0.0326, "step": 6131 }, { "epoch": 2.8620770128354724, "grad_norm": 0.78125, "learning_rate": 0.0001622932422858421, "loss": 0.0353, "step": 6132 }, { "epoch": 2.8625437572928822, "grad_norm": 0.71484375, "learning_rate": 0.00016228176290698033, "loss": 0.0363, "step": 6133 }, { "epoch": 2.8630105017502916, "grad_norm": 0.85546875, "learning_rate": 0.00016227028218712317, "loss": 0.0501, "step": 6134 }, { "epoch": 2.8634772462077014, "grad_norm": 0.75390625, "learning_rate": 0.00016225880012651773, "loss": 0.0486, "step": 6135 }, { "epoch": 2.8639439906651107, "grad_norm": 0.69140625, "learning_rate": 0.00016224731672541122, "loss": 0.0364, "step": 6136 }, { "epoch": 2.8644107351225205, "grad_norm": 0.671875, "learning_rate": 0.00016223583198405096, "loss": 0.0288, "step": 6137 }, { "epoch": 2.86487747957993, "grad_norm": 0.73828125, "learning_rate": 0.00016222434590268416, "loss": 0.0416, "step": 6138 }, { "epoch": 2.8653442240373397, "grad_norm": 0.72265625, "learning_rate": 0.00016221285848155815, "loss": 0.038, "step": 6139 }, { "epoch": 2.865810968494749, "grad_norm": 0.8203125, "learning_rate": 0.00016220136972092028, "loss": 0.0407, "step": 6140 }, { "epoch": 2.866277712952159, "grad_norm": 0.80859375, "learning_rate": 0.0001621898796210179, "loss": 0.0362, "step": 6141 }, { "epoch": 2.866744457409568, "grad_norm": 0.8203125, "learning_rate": 0.0001621783881820984, "loss": 0.0569, "step": 6142 }, { "epoch": 2.867211201866978, "grad_norm": 0.8125, "learning_rate": 0.00016216689540440925, "loss": 0.0462, "step": 6143 }, { "epoch": 2.8676779463243873, "grad_norm": 0.82421875, "learning_rate": 0.00016215540128819783, "loss": 0.0415, "step": 6144 }, { "epoch": 2.868144690781797, "grad_norm": 0.875, "learning_rate": 0.00016214390583371165, "loss": 0.0441, "step": 6145 }, { "epoch": 2.8686114352392065, "grad_norm": 0.515625, "learning_rate": 0.00016213240904119824, "loss": 0.0371, "step": 6146 }, { "epoch": 2.869078179696616, "grad_norm": 0.85546875, "learning_rate": 0.00016212091091090516, "loss": 0.0334, "step": 6147 }, { "epoch": 2.8695449241540256, "grad_norm": 1.015625, "learning_rate": 0.0001621094114430799, "loss": 0.0472, "step": 6148 }, { "epoch": 2.8700116686114354, "grad_norm": 0.93359375, "learning_rate": 0.00016209791063797013, "loss": 0.0454, "step": 6149 }, { "epoch": 2.8704784130688448, "grad_norm": 0.765625, "learning_rate": 0.00016208640849582344, "loss": 0.0429, "step": 6150 }, { "epoch": 2.870945157526254, "grad_norm": 0.73828125, "learning_rate": 0.00016207490501688747, "loss": 0.0396, "step": 6151 }, { "epoch": 2.871411901983664, "grad_norm": 0.6328125, "learning_rate": 0.00016206340020140993, "loss": 0.034, "step": 6152 }, { "epoch": 2.8718786464410737, "grad_norm": 1.2578125, "learning_rate": 0.00016205189404963852, "loss": 0.0622, "step": 6153 }, { "epoch": 2.872345390898483, "grad_norm": 1.0, "learning_rate": 0.000162040386561821, "loss": 0.0352, "step": 6154 }, { "epoch": 2.8728121353558924, "grad_norm": 0.69921875, "learning_rate": 0.00016202887773820513, "loss": 0.0431, "step": 6155 }, { "epoch": 2.873278879813302, "grad_norm": 0.85546875, "learning_rate": 0.00016201736757903866, "loss": 0.0403, "step": 6156 }, { "epoch": 2.873745624270712, "grad_norm": 0.84375, "learning_rate": 0.00016200585608456948, "loss": 0.0498, "step": 6157 }, { "epoch": 2.8742123687281214, "grad_norm": 0.80859375, "learning_rate": 0.00016199434325504542, "loss": 0.0361, "step": 6158 }, { "epoch": 2.8746791131855307, "grad_norm": 0.92578125, "learning_rate": 0.00016198282909071437, "loss": 0.0565, "step": 6159 }, { "epoch": 2.8751458576429405, "grad_norm": 0.91796875, "learning_rate": 0.00016197131359182423, "loss": 0.0547, "step": 6160 }, { "epoch": 2.8756126021003503, "grad_norm": 0.81640625, "learning_rate": 0.00016195979675862296, "loss": 0.0428, "step": 6161 }, { "epoch": 2.8760793465577597, "grad_norm": 0.84765625, "learning_rate": 0.0001619482785913585, "loss": 0.0468, "step": 6162 }, { "epoch": 2.876546091015169, "grad_norm": 0.7265625, "learning_rate": 0.0001619367590902789, "loss": 0.0389, "step": 6163 }, { "epoch": 2.877012835472579, "grad_norm": 0.91796875, "learning_rate": 0.00016192523825563215, "loss": 0.0536, "step": 6164 }, { "epoch": 2.8774795799299886, "grad_norm": 0.92578125, "learning_rate": 0.0001619137160876663, "loss": 0.0507, "step": 6165 }, { "epoch": 2.877946324387398, "grad_norm": 0.71484375, "learning_rate": 0.00016190219258662943, "loss": 0.0324, "step": 6166 }, { "epoch": 2.8784130688448073, "grad_norm": 0.8125, "learning_rate": 0.0001618906677527697, "loss": 0.0392, "step": 6167 }, { "epoch": 2.878879813302217, "grad_norm": 0.7734375, "learning_rate": 0.00016187914158633524, "loss": 0.0391, "step": 6168 }, { "epoch": 2.8793465577596264, "grad_norm": 0.83203125, "learning_rate": 0.0001618676140875742, "loss": 0.0396, "step": 6169 }, { "epoch": 2.8798133022170362, "grad_norm": 0.70703125, "learning_rate": 0.00016185608525673474, "loss": 0.0319, "step": 6170 }, { "epoch": 2.8802800466744456, "grad_norm": 0.64453125, "learning_rate": 0.00016184455509406519, "loss": 0.0355, "step": 6171 }, { "epoch": 2.8807467911318554, "grad_norm": 0.75, "learning_rate": 0.0001618330235998137, "loss": 0.0338, "step": 6172 }, { "epoch": 2.8812135355892647, "grad_norm": 0.76953125, "learning_rate": 0.00016182149077422864, "loss": 0.0485, "step": 6173 }, { "epoch": 2.8816802800466745, "grad_norm": 1.1015625, "learning_rate": 0.0001618099566175583, "loss": 0.0652, "step": 6174 }, { "epoch": 2.882147024504084, "grad_norm": 0.8828125, "learning_rate": 0.000161798421130051, "loss": 0.0497, "step": 6175 }, { "epoch": 2.8826137689614937, "grad_norm": 0.96484375, "learning_rate": 0.0001617868843119551, "loss": 0.0559, "step": 6176 }, { "epoch": 2.883080513418903, "grad_norm": 0.76171875, "learning_rate": 0.00016177534616351906, "loss": 0.0521, "step": 6177 }, { "epoch": 2.883547257876313, "grad_norm": 0.69921875, "learning_rate": 0.00016176380668499128, "loss": 0.0382, "step": 6178 }, { "epoch": 2.884014002333722, "grad_norm": 0.9296875, "learning_rate": 0.00016175226587662018, "loss": 0.0471, "step": 6179 }, { "epoch": 2.884480746791132, "grad_norm": 0.71484375, "learning_rate": 0.0001617407237386543, "loss": 0.0428, "step": 6180 }, { "epoch": 2.8849474912485413, "grad_norm": 0.76953125, "learning_rate": 0.00016172918027134215, "loss": 0.0465, "step": 6181 }, { "epoch": 2.885414235705951, "grad_norm": 0.85546875, "learning_rate": 0.00016171763547493222, "loss": 0.0374, "step": 6182 }, { "epoch": 2.8858809801633605, "grad_norm": 0.6640625, "learning_rate": 0.00016170608934967318, "loss": 0.0425, "step": 6183 }, { "epoch": 2.8863477246207703, "grad_norm": 0.90625, "learning_rate": 0.00016169454189581353, "loss": 0.0369, "step": 6184 }, { "epoch": 2.8868144690781796, "grad_norm": 0.5234375, "learning_rate": 0.00016168299311360195, "loss": 0.0342, "step": 6185 }, { "epoch": 2.8872812135355894, "grad_norm": 0.60546875, "learning_rate": 0.00016167144300328708, "loss": 0.0401, "step": 6186 }, { "epoch": 2.8877479579929988, "grad_norm": 0.63671875, "learning_rate": 0.00016165989156511764, "loss": 0.0451, "step": 6187 }, { "epoch": 2.8882147024504086, "grad_norm": 0.515625, "learning_rate": 0.0001616483387993423, "loss": 0.0262, "step": 6188 }, { "epoch": 2.888681446907818, "grad_norm": 0.640625, "learning_rate": 0.00016163678470620986, "loss": 0.0326, "step": 6189 }, { "epoch": 2.8891481913652273, "grad_norm": 0.78515625, "learning_rate": 0.00016162522928596902, "loss": 0.0466, "step": 6190 }, { "epoch": 2.889614935822637, "grad_norm": 0.765625, "learning_rate": 0.00016161367253886863, "loss": 0.0364, "step": 6191 }, { "epoch": 2.890081680280047, "grad_norm": 0.90625, "learning_rate": 0.00016160211446515752, "loss": 0.0482, "step": 6192 }, { "epoch": 2.890548424737456, "grad_norm": 0.79296875, "learning_rate": 0.0001615905550650845, "loss": 0.0333, "step": 6193 }, { "epoch": 2.8910151691948656, "grad_norm": 1.046875, "learning_rate": 0.00016157899433889855, "loss": 0.0413, "step": 6194 }, { "epoch": 2.8914819136522754, "grad_norm": 0.65234375, "learning_rate": 0.00016156743228684848, "loss": 0.0415, "step": 6195 }, { "epoch": 2.891948658109685, "grad_norm": 0.93359375, "learning_rate": 0.00016155586890918331, "loss": 0.0472, "step": 6196 }, { "epoch": 2.8924154025670945, "grad_norm": 0.79296875, "learning_rate": 0.00016154430420615198, "loss": 0.0375, "step": 6197 }, { "epoch": 2.892882147024504, "grad_norm": 0.76171875, "learning_rate": 0.0001615327381780035, "loss": 0.0394, "step": 6198 }, { "epoch": 2.8933488914819137, "grad_norm": 0.79296875, "learning_rate": 0.0001615211708249869, "loss": 0.0421, "step": 6199 }, { "epoch": 2.8938156359393234, "grad_norm": 0.75, "learning_rate": 0.00016150960214735122, "loss": 0.0425, "step": 6200 }, { "epoch": 2.894282380396733, "grad_norm": 0.6796875, "learning_rate": 0.00016149803214534557, "loss": 0.0313, "step": 6201 }, { "epoch": 2.894749124854142, "grad_norm": 0.7578125, "learning_rate": 0.00016148646081921904, "loss": 0.0403, "step": 6202 }, { "epoch": 2.895215869311552, "grad_norm": 0.81640625, "learning_rate": 0.00016147488816922078, "loss": 0.0382, "step": 6203 }, { "epoch": 2.8956826137689617, "grad_norm": 0.81640625, "learning_rate": 0.0001614633141956, "loss": 0.0291, "step": 6204 }, { "epoch": 2.896149358226371, "grad_norm": 0.625, "learning_rate": 0.00016145173889860583, "loss": 0.0258, "step": 6205 }, { "epoch": 2.8966161026837804, "grad_norm": 0.76171875, "learning_rate": 0.00016144016227848758, "loss": 0.0326, "step": 6206 }, { "epoch": 2.8970828471411902, "grad_norm": 0.9765625, "learning_rate": 0.00016142858433549443, "loss": 0.035, "step": 6207 }, { "epoch": 2.8975495915986, "grad_norm": 0.6015625, "learning_rate": 0.00016141700506987574, "loss": 0.0234, "step": 6208 }, { "epoch": 2.8980163360560094, "grad_norm": 0.87890625, "learning_rate": 0.0001614054244818808, "loss": 0.0505, "step": 6209 }, { "epoch": 2.8984830805134187, "grad_norm": 0.83203125, "learning_rate": 0.00016139384257175888, "loss": 0.0347, "step": 6210 }, { "epoch": 2.8989498249708285, "grad_norm": 0.74609375, "learning_rate": 0.00016138225933975945, "loss": 0.0362, "step": 6211 }, { "epoch": 2.899416569428238, "grad_norm": 0.93359375, "learning_rate": 0.00016137067478613187, "loss": 0.0453, "step": 6212 }, { "epoch": 2.8998833138856477, "grad_norm": 0.88671875, "learning_rate": 0.00016135908891112556, "loss": 0.0367, "step": 6213 }, { "epoch": 2.900350058343057, "grad_norm": 0.95703125, "learning_rate": 0.00016134750171499, "loss": 0.0464, "step": 6214 }, { "epoch": 2.900816802800467, "grad_norm": 0.6640625, "learning_rate": 0.00016133591319797468, "loss": 0.0433, "step": 6215 }, { "epoch": 2.901283547257876, "grad_norm": 0.99609375, "learning_rate": 0.00016132432336032906, "loss": 0.048, "step": 6216 }, { "epoch": 2.901750291715286, "grad_norm": 0.796875, "learning_rate": 0.00016131273220230274, "loss": 0.0439, "step": 6217 }, { "epoch": 2.9022170361726953, "grad_norm": 0.89453125, "learning_rate": 0.00016130113972414524, "loss": 0.0373, "step": 6218 }, { "epoch": 2.902683780630105, "grad_norm": 0.9140625, "learning_rate": 0.00016128954592610622, "loss": 0.0444, "step": 6219 }, { "epoch": 2.9031505250875145, "grad_norm": 0.8359375, "learning_rate": 0.00016127795080843525, "loss": 0.0362, "step": 6220 }, { "epoch": 2.9036172695449243, "grad_norm": 0.65234375, "learning_rate": 0.000161266354371382, "loss": 0.0312, "step": 6221 }, { "epoch": 2.9040840140023336, "grad_norm": 0.76171875, "learning_rate": 0.0001612547566151962, "loss": 0.0326, "step": 6222 }, { "epoch": 2.9045507584597434, "grad_norm": 0.5859375, "learning_rate": 0.0001612431575401275, "loss": 0.0331, "step": 6223 }, { "epoch": 2.9050175029171528, "grad_norm": 0.99609375, "learning_rate": 0.00016123155714642566, "loss": 0.0567, "step": 6224 }, { "epoch": 2.9054842473745626, "grad_norm": 0.78125, "learning_rate": 0.0001612199554343405, "loss": 0.0475, "step": 6225 }, { "epoch": 2.905950991831972, "grad_norm": 0.73828125, "learning_rate": 0.00016120835240412173, "loss": 0.0377, "step": 6226 }, { "epoch": 2.9064177362893817, "grad_norm": 0.58984375, "learning_rate": 0.00016119674805601926, "loss": 0.0335, "step": 6227 }, { "epoch": 2.906884480746791, "grad_norm": 0.99609375, "learning_rate": 0.00016118514239028286, "loss": 0.0427, "step": 6228 }, { "epoch": 2.907351225204201, "grad_norm": 0.87109375, "learning_rate": 0.0001611735354071625, "loss": 0.0457, "step": 6229 }, { "epoch": 2.90781796966161, "grad_norm": 0.7890625, "learning_rate": 0.00016116192710690803, "loss": 0.0472, "step": 6230 }, { "epoch": 2.90828471411902, "grad_norm": 0.62890625, "learning_rate": 0.0001611503174897694, "loss": 0.0339, "step": 6231 }, { "epoch": 2.9087514585764294, "grad_norm": 0.76953125, "learning_rate": 0.0001611387065559966, "loss": 0.0485, "step": 6232 }, { "epoch": 2.9092182030338387, "grad_norm": 0.921875, "learning_rate": 0.00016112709430583962, "loss": 0.0457, "step": 6233 }, { "epoch": 2.9096849474912485, "grad_norm": 0.65625, "learning_rate": 0.00016111548073954846, "loss": 0.0335, "step": 6234 }, { "epoch": 2.9101516919486583, "grad_norm": 0.83984375, "learning_rate": 0.00016110386585737322, "loss": 0.046, "step": 6235 }, { "epoch": 2.9106184364060677, "grad_norm": 0.7734375, "learning_rate": 0.00016109224965956393, "loss": 0.0432, "step": 6236 }, { "epoch": 2.911085180863477, "grad_norm": 0.9140625, "learning_rate": 0.0001610806321463707, "loss": 0.031, "step": 6237 }, { "epoch": 2.911551925320887, "grad_norm": 0.8671875, "learning_rate": 0.00016106901331804373, "loss": 0.0492, "step": 6238 }, { "epoch": 2.9120186697782966, "grad_norm": 0.73046875, "learning_rate": 0.00016105739317483312, "loss": 0.0518, "step": 6239 }, { "epoch": 2.912485414235706, "grad_norm": 0.79296875, "learning_rate": 0.0001610457717169891, "loss": 0.0398, "step": 6240 }, { "epoch": 2.9129521586931153, "grad_norm": 0.76171875, "learning_rate": 0.0001610341489447619, "loss": 0.0396, "step": 6241 }, { "epoch": 2.913418903150525, "grad_norm": 0.78515625, "learning_rate": 0.00016102252485840173, "loss": 0.0465, "step": 6242 }, { "epoch": 2.913885647607935, "grad_norm": 0.765625, "learning_rate": 0.0001610108994581589, "loss": 0.0551, "step": 6243 }, { "epoch": 2.9143523920653442, "grad_norm": 0.9375, "learning_rate": 0.00016099927274428372, "loss": 0.059, "step": 6244 }, { "epoch": 2.9148191365227536, "grad_norm": 0.90234375, "learning_rate": 0.00016098764471702653, "loss": 0.0506, "step": 6245 }, { "epoch": 2.9152858809801634, "grad_norm": 0.68359375, "learning_rate": 0.00016097601537663764, "loss": 0.0356, "step": 6246 }, { "epoch": 2.915752625437573, "grad_norm": 0.5859375, "learning_rate": 0.0001609643847233675, "loss": 0.0336, "step": 6247 }, { "epoch": 2.9162193698949825, "grad_norm": 0.828125, "learning_rate": 0.00016095275275746654, "loss": 0.0559, "step": 6248 }, { "epoch": 2.916686114352392, "grad_norm": 0.875, "learning_rate": 0.00016094111947918517, "loss": 0.0441, "step": 6249 }, { "epoch": 2.9171528588098017, "grad_norm": 0.81640625, "learning_rate": 0.00016092948488877388, "loss": 0.0342, "step": 6250 }, { "epoch": 2.917619603267211, "grad_norm": 0.7578125, "learning_rate": 0.0001609178489864832, "loss": 0.0345, "step": 6251 }, { "epoch": 2.918086347724621, "grad_norm": 0.69140625, "learning_rate": 0.00016090621177256362, "loss": 0.0549, "step": 6252 }, { "epoch": 2.91855309218203, "grad_norm": 0.9140625, "learning_rate": 0.0001608945732472657, "loss": 0.0459, "step": 6253 }, { "epoch": 2.91901983663944, "grad_norm": 0.86328125, "learning_rate": 0.0001608829334108401, "loss": 0.0393, "step": 6254 }, { "epoch": 2.9194865810968493, "grad_norm": 0.8203125, "learning_rate": 0.00016087129226353736, "loss": 0.0513, "step": 6255 }, { "epoch": 2.919953325554259, "grad_norm": 0.67578125, "learning_rate": 0.00016085964980560818, "loss": 0.0358, "step": 6256 }, { "epoch": 2.9204200700116685, "grad_norm": 1.1796875, "learning_rate": 0.0001608480060373032, "loss": 0.0471, "step": 6257 }, { "epoch": 2.9208868144690783, "grad_norm": 0.8203125, "learning_rate": 0.00016083636095887315, "loss": 0.0364, "step": 6258 }, { "epoch": 2.9213535589264876, "grad_norm": 0.953125, "learning_rate": 0.00016082471457056876, "loss": 0.0448, "step": 6259 }, { "epoch": 2.9218203033838974, "grad_norm": 0.859375, "learning_rate": 0.00016081306687264077, "loss": 0.0618, "step": 6260 }, { "epoch": 2.9222870478413068, "grad_norm": 0.71484375, "learning_rate": 0.00016080141786533995, "loss": 0.0443, "step": 6261 }, { "epoch": 2.9227537922987166, "grad_norm": 0.6328125, "learning_rate": 0.00016078976754891716, "loss": 0.0355, "step": 6262 }, { "epoch": 2.923220536756126, "grad_norm": 0.8046875, "learning_rate": 0.00016077811592362325, "loss": 0.0477, "step": 6263 }, { "epoch": 2.9236872812135357, "grad_norm": 0.80859375, "learning_rate": 0.00016076646298970903, "loss": 0.0445, "step": 6264 }, { "epoch": 2.924154025670945, "grad_norm": 0.859375, "learning_rate": 0.0001607548087474255, "loss": 0.0447, "step": 6265 }, { "epoch": 2.924620770128355, "grad_norm": 0.63671875, "learning_rate": 0.00016074315319702346, "loss": 0.0392, "step": 6266 }, { "epoch": 2.925087514585764, "grad_norm": 0.67578125, "learning_rate": 0.00016073149633875398, "loss": 0.0396, "step": 6267 }, { "epoch": 2.925554259043174, "grad_norm": 0.640625, "learning_rate": 0.00016071983817286798, "loss": 0.0324, "step": 6268 }, { "epoch": 2.9260210035005834, "grad_norm": 0.83203125, "learning_rate": 0.0001607081786996165, "loss": 0.048, "step": 6269 }, { "epoch": 2.926487747957993, "grad_norm": 0.67578125, "learning_rate": 0.00016069651791925059, "loss": 0.0465, "step": 6270 }, { "epoch": 2.9269544924154025, "grad_norm": 0.87109375, "learning_rate": 0.0001606848558320213, "loss": 0.0492, "step": 6271 }, { "epoch": 2.927421236872812, "grad_norm": 0.435546875, "learning_rate": 0.00016067319243817972, "loss": 0.0228, "step": 6272 }, { "epoch": 2.9278879813302217, "grad_norm": 0.8203125, "learning_rate": 0.000160661527737977, "loss": 0.0409, "step": 6273 }, { "epoch": 2.9283547257876315, "grad_norm": 0.703125, "learning_rate": 0.00016064986173166426, "loss": 0.0319, "step": 6274 }, { "epoch": 2.928821470245041, "grad_norm": 0.7421875, "learning_rate": 0.00016063819441949271, "loss": 0.0407, "step": 6275 }, { "epoch": 2.92928821470245, "grad_norm": 0.7578125, "learning_rate": 0.0001606265258017136, "loss": 0.0447, "step": 6276 }, { "epoch": 2.92975495915986, "grad_norm": 0.70703125, "learning_rate": 0.00016061485587857806, "loss": 0.0401, "step": 6277 }, { "epoch": 2.9302217036172697, "grad_norm": 0.9375, "learning_rate": 0.00016060318465033747, "loss": 0.0515, "step": 6278 }, { "epoch": 2.930688448074679, "grad_norm": 0.91796875, "learning_rate": 0.00016059151211724303, "loss": 0.0621, "step": 6279 }, { "epoch": 2.9311551925320884, "grad_norm": 0.95703125, "learning_rate": 0.00016057983827954612, "loss": 0.0515, "step": 6280 }, { "epoch": 2.9316219369894982, "grad_norm": 0.84375, "learning_rate": 0.0001605681631374981, "loss": 0.0612, "step": 6281 }, { "epoch": 2.932088681446908, "grad_norm": 0.8125, "learning_rate": 0.0001605564866913503, "loss": 0.0538, "step": 6282 }, { "epoch": 2.9325554259043174, "grad_norm": 0.87109375, "learning_rate": 0.0001605448089413542, "loss": 0.0487, "step": 6283 }, { "epoch": 2.9330221703617267, "grad_norm": 0.91015625, "learning_rate": 0.00016053312988776117, "loss": 0.0381, "step": 6284 }, { "epoch": 2.9334889148191365, "grad_norm": 0.58984375, "learning_rate": 0.00016052144953082267, "loss": 0.0286, "step": 6285 }, { "epoch": 2.9339556592765463, "grad_norm": 0.79296875, "learning_rate": 0.0001605097678707902, "loss": 0.039, "step": 6286 }, { "epoch": 2.9344224037339557, "grad_norm": 0.76953125, "learning_rate": 0.0001604980849079153, "loss": 0.0393, "step": 6287 }, { "epoch": 2.934889148191365, "grad_norm": 0.79296875, "learning_rate": 0.00016048640064244952, "loss": 0.0372, "step": 6288 }, { "epoch": 2.935355892648775, "grad_norm": 0.640625, "learning_rate": 0.00016047471507464443, "loss": 0.0378, "step": 6289 }, { "epoch": 2.9358226371061846, "grad_norm": 0.69140625, "learning_rate": 0.00016046302820475164, "loss": 0.0501, "step": 6290 }, { "epoch": 2.936289381563594, "grad_norm": 0.6640625, "learning_rate": 0.00016045134003302272, "loss": 0.0395, "step": 6291 }, { "epoch": 2.9367561260210033, "grad_norm": 0.75, "learning_rate": 0.0001604396505597094, "loss": 0.0455, "step": 6292 }, { "epoch": 2.937222870478413, "grad_norm": 0.462890625, "learning_rate": 0.00016042795978506334, "loss": 0.0247, "step": 6293 }, { "epoch": 2.9376896149358225, "grad_norm": 0.81640625, "learning_rate": 0.00016041626770933628, "loss": 0.0381, "step": 6294 }, { "epoch": 2.9381563593932323, "grad_norm": 0.83203125, "learning_rate": 0.0001604045743327799, "loss": 0.0556, "step": 6295 }, { "epoch": 2.9386231038506416, "grad_norm": 0.7421875, "learning_rate": 0.00016039287965564607, "loss": 0.0535, "step": 6296 }, { "epoch": 2.9390898483080514, "grad_norm": 0.8828125, "learning_rate": 0.0001603811836781865, "loss": 0.0481, "step": 6297 }, { "epoch": 2.9395565927654608, "grad_norm": 0.7109375, "learning_rate": 0.00016036948640065307, "loss": 0.0367, "step": 6298 }, { "epoch": 2.9400233372228706, "grad_norm": 0.7734375, "learning_rate": 0.0001603577878232976, "loss": 0.0484, "step": 6299 }, { "epoch": 2.94049008168028, "grad_norm": 0.80078125, "learning_rate": 0.00016034608794637199, "loss": 0.0342, "step": 6300 }, { "epoch": 2.9409568261376897, "grad_norm": 0.703125, "learning_rate": 0.00016033438677012817, "loss": 0.0362, "step": 6301 }, { "epoch": 2.941423570595099, "grad_norm": 0.91015625, "learning_rate": 0.00016032268429481804, "loss": 0.0472, "step": 6302 }, { "epoch": 2.941890315052509, "grad_norm": 0.578125, "learning_rate": 0.0001603109805206936, "loss": 0.035, "step": 6303 }, { "epoch": 2.942357059509918, "grad_norm": 1.015625, "learning_rate": 0.00016029927544800683, "loss": 0.0471, "step": 6304 }, { "epoch": 2.942823803967328, "grad_norm": 0.75, "learning_rate": 0.00016028756907700975, "loss": 0.0452, "step": 6305 }, { "epoch": 2.9432905484247374, "grad_norm": 0.77734375, "learning_rate": 0.00016027586140795444, "loss": 0.0305, "step": 6306 }, { "epoch": 2.943757292882147, "grad_norm": 0.7890625, "learning_rate": 0.00016026415244109292, "loss": 0.0422, "step": 6307 }, { "epoch": 2.9442240373395565, "grad_norm": 0.95703125, "learning_rate": 0.00016025244217667738, "loss": 0.0515, "step": 6308 }, { "epoch": 2.9446907817969663, "grad_norm": 0.86328125, "learning_rate": 0.00016024073061495986, "loss": 0.045, "step": 6309 }, { "epoch": 2.9451575262543757, "grad_norm": 0.65625, "learning_rate": 0.0001602290177561926, "loss": 0.0386, "step": 6310 }, { "epoch": 2.9456242707117855, "grad_norm": 0.69140625, "learning_rate": 0.00016021730360062778, "loss": 0.0391, "step": 6311 }, { "epoch": 2.946091015169195, "grad_norm": 0.8359375, "learning_rate": 0.00016020558814851756, "loss": 0.0462, "step": 6312 }, { "epoch": 2.9465577596266046, "grad_norm": 0.77734375, "learning_rate": 0.00016019387140011425, "loss": 0.0324, "step": 6313 }, { "epoch": 2.947024504084014, "grad_norm": 0.74609375, "learning_rate": 0.00016018215335567007, "loss": 0.0353, "step": 6314 }, { "epoch": 2.9474912485414233, "grad_norm": 0.828125, "learning_rate": 0.00016017043401543742, "loss": 0.0374, "step": 6315 }, { "epoch": 2.947957992998833, "grad_norm": 0.55078125, "learning_rate": 0.00016015871337966852, "loss": 0.032, "step": 6316 }, { "epoch": 2.948424737456243, "grad_norm": 1.1015625, "learning_rate": 0.0001601469914486158, "loss": 0.0499, "step": 6317 }, { "epoch": 2.9488914819136522, "grad_norm": 0.7578125, "learning_rate": 0.00016013526822253159, "loss": 0.041, "step": 6318 }, { "epoch": 2.9493582263710616, "grad_norm": 0.83984375, "learning_rate": 0.00016012354370166832, "loss": 0.0414, "step": 6319 }, { "epoch": 2.9498249708284714, "grad_norm": 0.78515625, "learning_rate": 0.00016011181788627848, "loss": 0.0444, "step": 6320 }, { "epoch": 2.950291715285881, "grad_norm": 0.75390625, "learning_rate": 0.00016010009077661449, "loss": 0.0432, "step": 6321 }, { "epoch": 2.9507584597432905, "grad_norm": 0.7578125, "learning_rate": 0.00016008836237292888, "loss": 0.0254, "step": 6322 }, { "epoch": 2.9512252042007, "grad_norm": 0.78125, "learning_rate": 0.00016007663267547415, "loss": 0.0288, "step": 6323 }, { "epoch": 2.9516919486581097, "grad_norm": 0.765625, "learning_rate": 0.00016006490168450285, "loss": 0.0448, "step": 6324 }, { "epoch": 2.9521586931155195, "grad_norm": 0.73828125, "learning_rate": 0.0001600531694002676, "loss": 0.05, "step": 6325 }, { "epoch": 2.952625437572929, "grad_norm": 0.92578125, "learning_rate": 0.00016004143582302096, "loss": 0.0324, "step": 6326 }, { "epoch": 2.953092182030338, "grad_norm": 0.7734375, "learning_rate": 0.0001600297009530156, "loss": 0.0331, "step": 6327 }, { "epoch": 2.953558926487748, "grad_norm": 0.98828125, "learning_rate": 0.00016001796479050417, "loss": 0.0556, "step": 6328 }, { "epoch": 2.9540256709451578, "grad_norm": 1.0546875, "learning_rate": 0.00016000622733573934, "loss": 0.0485, "step": 6329 }, { "epoch": 2.954492415402567, "grad_norm": 0.68359375, "learning_rate": 0.0001599944885889739, "loss": 0.0312, "step": 6330 }, { "epoch": 2.9549591598599765, "grad_norm": 0.859375, "learning_rate": 0.00015998274855046058, "loss": 0.0481, "step": 6331 }, { "epoch": 2.9554259043173863, "grad_norm": 0.76953125, "learning_rate": 0.0001599710072204521, "loss": 0.0259, "step": 6332 }, { "epoch": 2.955892648774796, "grad_norm": 0.83203125, "learning_rate": 0.00015995926459920126, "loss": 0.0483, "step": 6333 }, { "epoch": 2.9563593932322054, "grad_norm": 0.73828125, "learning_rate": 0.00015994752068696096, "loss": 0.0487, "step": 6334 }, { "epoch": 2.9568261376896148, "grad_norm": 0.91015625, "learning_rate": 0.00015993577548398404, "loss": 0.049, "step": 6335 }, { "epoch": 2.9572928821470246, "grad_norm": 0.7578125, "learning_rate": 0.00015992402899052334, "loss": 0.0377, "step": 6336 }, { "epoch": 2.957759626604434, "grad_norm": 0.8046875, "learning_rate": 0.00015991228120683184, "loss": 0.0303, "step": 6337 }, { "epoch": 2.9582263710618437, "grad_norm": 0.98828125, "learning_rate": 0.00015990053213316244, "loss": 0.0465, "step": 6338 }, { "epoch": 2.958693115519253, "grad_norm": 0.796875, "learning_rate": 0.00015988878176976813, "loss": 0.0297, "step": 6339 }, { "epoch": 2.959159859976663, "grad_norm": 0.6796875, "learning_rate": 0.00015987703011690187, "loss": 0.0374, "step": 6340 }, { "epoch": 2.959626604434072, "grad_norm": 0.69140625, "learning_rate": 0.00015986527717481674, "loss": 0.0391, "step": 6341 }, { "epoch": 2.960093348891482, "grad_norm": 0.7578125, "learning_rate": 0.00015985352294376577, "loss": 0.0393, "step": 6342 }, { "epoch": 2.9605600933488914, "grad_norm": 0.7265625, "learning_rate": 0.00015984176742400203, "loss": 0.0409, "step": 6343 }, { "epoch": 2.961026837806301, "grad_norm": 0.87109375, "learning_rate": 0.00015983001061577868, "loss": 0.0458, "step": 6344 }, { "epoch": 2.9614935822637105, "grad_norm": 1.109375, "learning_rate": 0.00015981825251934877, "loss": 0.0505, "step": 6345 }, { "epoch": 2.9619603267211203, "grad_norm": 0.7421875, "learning_rate": 0.00015980649313496553, "loss": 0.0355, "step": 6346 }, { "epoch": 2.9624270711785297, "grad_norm": 0.86328125, "learning_rate": 0.00015979473246288214, "loss": 0.0356, "step": 6347 }, { "epoch": 2.9628938156359395, "grad_norm": 0.90234375, "learning_rate": 0.00015978297050335183, "loss": 0.0476, "step": 6348 }, { "epoch": 2.963360560093349, "grad_norm": 0.796875, "learning_rate": 0.0001597712072566278, "loss": 0.0451, "step": 6349 }, { "epoch": 2.9638273045507586, "grad_norm": 0.63671875, "learning_rate": 0.0001597594427229634, "loss": 0.0349, "step": 6350 }, { "epoch": 2.964294049008168, "grad_norm": 0.74609375, "learning_rate": 0.00015974767690261188, "loss": 0.0389, "step": 6351 }, { "epoch": 2.9647607934655777, "grad_norm": 0.7109375, "learning_rate": 0.00015973590979582658, "loss": 0.036, "step": 6352 }, { "epoch": 2.965227537922987, "grad_norm": 0.90625, "learning_rate": 0.00015972414140286086, "loss": 0.0485, "step": 6353 }, { "epoch": 2.965694282380397, "grad_norm": 0.89453125, "learning_rate": 0.00015971237172396813, "loss": 0.0428, "step": 6354 }, { "epoch": 2.9661610268378062, "grad_norm": 0.68359375, "learning_rate": 0.00015970060075940179, "loss": 0.0364, "step": 6355 }, { "epoch": 2.966627771295216, "grad_norm": 0.56640625, "learning_rate": 0.00015968882850941524, "loss": 0.0164, "step": 6356 }, { "epoch": 2.9670945157526254, "grad_norm": 0.8359375, "learning_rate": 0.000159677054974262, "loss": 0.0385, "step": 6357 }, { "epoch": 2.9675612602100347, "grad_norm": 0.875, "learning_rate": 0.00015966528015419557, "loss": 0.047, "step": 6358 }, { "epoch": 2.9680280046674445, "grad_norm": 0.80859375, "learning_rate": 0.00015965350404946945, "loss": 0.0409, "step": 6359 }, { "epoch": 2.9684947491248543, "grad_norm": 0.71875, "learning_rate": 0.0001596417266603372, "loss": 0.0471, "step": 6360 }, { "epoch": 2.9689614935822637, "grad_norm": 0.87890625, "learning_rate": 0.0001596299479870524, "loss": 0.0387, "step": 6361 }, { "epoch": 2.969428238039673, "grad_norm": 0.89453125, "learning_rate": 0.00015961816802986865, "loss": 0.0549, "step": 6362 }, { "epoch": 2.969894982497083, "grad_norm": 0.8359375, "learning_rate": 0.00015960638678903962, "loss": 0.0474, "step": 6363 }, { "epoch": 2.9703617269544926, "grad_norm": 0.8515625, "learning_rate": 0.00015959460426481892, "loss": 0.0386, "step": 6364 }, { "epoch": 2.970828471411902, "grad_norm": 0.76171875, "learning_rate": 0.00015958282045746026, "loss": 0.0283, "step": 6365 }, { "epoch": 2.9712952158693113, "grad_norm": 1.0390625, "learning_rate": 0.0001595710353672174, "loss": 0.0505, "step": 6366 }, { "epoch": 2.971761960326721, "grad_norm": 0.7578125, "learning_rate": 0.00015955924899434406, "loss": 0.0603, "step": 6367 }, { "epoch": 2.972228704784131, "grad_norm": 0.91796875, "learning_rate": 0.00015954746133909397, "loss": 0.0466, "step": 6368 }, { "epoch": 2.9726954492415403, "grad_norm": 0.79296875, "learning_rate": 0.000159535672401721, "loss": 0.0427, "step": 6369 }, { "epoch": 2.9731621936989496, "grad_norm": 0.70703125, "learning_rate": 0.00015952388218247893, "loss": 0.0315, "step": 6370 }, { "epoch": 2.9736289381563594, "grad_norm": 0.69140625, "learning_rate": 0.00015951209068162166, "loss": 0.0326, "step": 6371 }, { "epoch": 2.974095682613769, "grad_norm": 0.61328125, "learning_rate": 0.00015950029789940298, "loss": 0.0318, "step": 6372 }, { "epoch": 2.9745624270711786, "grad_norm": 0.71875, "learning_rate": 0.00015948850383607695, "loss": 0.0409, "step": 6373 }, { "epoch": 2.975029171528588, "grad_norm": 0.75, "learning_rate": 0.00015947670849189737, "loss": 0.0404, "step": 6374 }, { "epoch": 2.9754959159859977, "grad_norm": 0.76953125, "learning_rate": 0.0001594649118671183, "loss": 0.0433, "step": 6375 }, { "epoch": 2.975962660443407, "grad_norm": 0.80078125, "learning_rate": 0.0001594531139619937, "loss": 0.0553, "step": 6376 }, { "epoch": 2.976429404900817, "grad_norm": 0.890625, "learning_rate": 0.00015944131477677758, "loss": 0.0416, "step": 6377 }, { "epoch": 2.976896149358226, "grad_norm": 0.5390625, "learning_rate": 0.00015942951431172398, "loss": 0.0253, "step": 6378 }, { "epoch": 2.977362893815636, "grad_norm": 0.671875, "learning_rate": 0.00015941771256708704, "loss": 0.0282, "step": 6379 }, { "epoch": 2.9778296382730454, "grad_norm": 0.76171875, "learning_rate": 0.0001594059095431208, "loss": 0.0562, "step": 6380 }, { "epoch": 2.978296382730455, "grad_norm": 0.60546875, "learning_rate": 0.00015939410524007944, "loss": 0.0335, "step": 6381 }, { "epoch": 2.9787631271878645, "grad_norm": 0.8203125, "learning_rate": 0.00015938229965821704, "loss": 0.0421, "step": 6382 }, { "epoch": 2.9792298716452743, "grad_norm": 0.828125, "learning_rate": 0.0001593704927977879, "loss": 0.0546, "step": 6383 }, { "epoch": 2.9796966161026837, "grad_norm": 0.82421875, "learning_rate": 0.00015935868465904616, "loss": 0.0417, "step": 6384 }, { "epoch": 2.9801633605600935, "grad_norm": 0.53515625, "learning_rate": 0.00015934687524224607, "loss": 0.0323, "step": 6385 }, { "epoch": 2.980630105017503, "grad_norm": 0.828125, "learning_rate": 0.00015933506454764192, "loss": 0.0424, "step": 6386 }, { "epoch": 2.9810968494749126, "grad_norm": 0.8828125, "learning_rate": 0.00015932325257548798, "loss": 0.048, "step": 6387 }, { "epoch": 2.981563593932322, "grad_norm": 0.71484375, "learning_rate": 0.0001593114393260386, "loss": 0.035, "step": 6388 }, { "epoch": 2.9820303383897317, "grad_norm": 0.87890625, "learning_rate": 0.00015929962479954815, "loss": 0.0538, "step": 6389 }, { "epoch": 2.982497082847141, "grad_norm": 0.7890625, "learning_rate": 0.00015928780899627097, "loss": 0.0348, "step": 6390 }, { "epoch": 2.982963827304551, "grad_norm": 0.9140625, "learning_rate": 0.00015927599191646144, "loss": 0.042, "step": 6391 }, { "epoch": 2.9834305717619602, "grad_norm": 0.73828125, "learning_rate": 0.00015926417356037408, "loss": 0.0442, "step": 6392 }, { "epoch": 2.98389731621937, "grad_norm": 0.75, "learning_rate": 0.0001592523539282633, "loss": 0.0461, "step": 6393 }, { "epoch": 2.9843640606767794, "grad_norm": 0.82421875, "learning_rate": 0.00015924053302038358, "loss": 0.0442, "step": 6394 }, { "epoch": 2.984830805134189, "grad_norm": 0.66015625, "learning_rate": 0.00015922871083698948, "loss": 0.0298, "step": 6395 }, { "epoch": 2.9852975495915985, "grad_norm": 0.88671875, "learning_rate": 0.0001592168873783355, "loss": 0.0611, "step": 6396 }, { "epoch": 2.985764294049008, "grad_norm": 0.640625, "learning_rate": 0.00015920506264467625, "loss": 0.0385, "step": 6397 }, { "epoch": 2.9862310385064177, "grad_norm": 0.6953125, "learning_rate": 0.00015919323663626632, "loss": 0.0307, "step": 6398 }, { "epoch": 2.9866977829638275, "grad_norm": 0.92578125, "learning_rate": 0.0001591814093533603, "loss": 0.0416, "step": 6399 }, { "epoch": 2.987164527421237, "grad_norm": 0.953125, "learning_rate": 0.0001591695807962129, "loss": 0.0446, "step": 6400 }, { "epoch": 2.987631271878646, "grad_norm": 0.79296875, "learning_rate": 0.00015915775096507874, "loss": 0.0431, "step": 6401 }, { "epoch": 2.988098016336056, "grad_norm": 0.81640625, "learning_rate": 0.00015914591986021263, "loss": 0.0493, "step": 6402 }, { "epoch": 2.988564760793466, "grad_norm": 0.75, "learning_rate": 0.0001591340874818692, "loss": 0.0398, "step": 6403 }, { "epoch": 2.989031505250875, "grad_norm": 0.9140625, "learning_rate": 0.00015912225383030325, "loss": 0.0386, "step": 6404 }, { "epoch": 2.9894982497082845, "grad_norm": 0.68359375, "learning_rate": 0.0001591104189057696, "loss": 0.0306, "step": 6405 }, { "epoch": 2.9899649941656943, "grad_norm": 0.8125, "learning_rate": 0.00015909858270852302, "loss": 0.044, "step": 6406 }, { "epoch": 2.990431738623104, "grad_norm": 0.5859375, "learning_rate": 0.0001590867452388184, "loss": 0.0242, "step": 6407 }, { "epoch": 2.9908984830805134, "grad_norm": 0.96484375, "learning_rate": 0.0001590749064969106, "loss": 0.0474, "step": 6408 }, { "epoch": 2.9913652275379228, "grad_norm": 0.84765625, "learning_rate": 0.0001590630664830545, "loss": 0.0436, "step": 6409 }, { "epoch": 2.9918319719953326, "grad_norm": 0.63671875, "learning_rate": 0.00015905122519750507, "loss": 0.0323, "step": 6410 }, { "epoch": 2.9922987164527424, "grad_norm": 0.8359375, "learning_rate": 0.0001590393826405172, "loss": 0.0384, "step": 6411 }, { "epoch": 2.9927654609101517, "grad_norm": 0.7890625, "learning_rate": 0.00015902753881234598, "loss": 0.0256, "step": 6412 }, { "epoch": 2.993232205367561, "grad_norm": 0.578125, "learning_rate": 0.0001590156937132463, "loss": 0.0312, "step": 6413 }, { "epoch": 2.993698949824971, "grad_norm": 0.85546875, "learning_rate": 0.00015900384734347328, "loss": 0.0376, "step": 6414 }, { "epoch": 2.9941656942823807, "grad_norm": 0.93359375, "learning_rate": 0.00015899199970328194, "loss": 0.0414, "step": 6415 }, { "epoch": 2.99463243873979, "grad_norm": 0.77734375, "learning_rate": 0.00015898015079292746, "loss": 0.0431, "step": 6416 }, { "epoch": 2.9950991831971994, "grad_norm": 0.9609375, "learning_rate": 0.00015896830061266483, "loss": 0.0403, "step": 6417 }, { "epoch": 2.995565927654609, "grad_norm": 0.7109375, "learning_rate": 0.00015895644916274929, "loss": 0.0296, "step": 6418 }, { "epoch": 2.9960326721120185, "grad_norm": 0.93359375, "learning_rate": 0.00015894459644343596, "loss": 0.0495, "step": 6419 }, { "epoch": 2.9964994165694283, "grad_norm": 1.0390625, "learning_rate": 0.0001589327424549801, "loss": 0.0718, "step": 6420 }, { "epoch": 2.9969661610268377, "grad_norm": 0.75, "learning_rate": 0.0001589208871976369, "loss": 0.0417, "step": 6421 }, { "epoch": 2.9974329054842475, "grad_norm": 0.81640625, "learning_rate": 0.00015890903067166157, "loss": 0.048, "step": 6422 }, { "epoch": 2.997899649941657, "grad_norm": 0.859375, "learning_rate": 0.00015889717287730952, "loss": 0.0491, "step": 6423 }, { "epoch": 2.9983663943990666, "grad_norm": 0.61328125, "learning_rate": 0.00015888531381483598, "loss": 0.0301, "step": 6424 }, { "epoch": 2.998833138856476, "grad_norm": 0.80859375, "learning_rate": 0.00015887345348449626, "loss": 0.0449, "step": 6425 }, { "epoch": 2.9992998833138858, "grad_norm": 0.7265625, "learning_rate": 0.00015886159188654585, "loss": 0.0429, "step": 6426 }, { "epoch": 2.999766627771295, "grad_norm": 0.73828125, "learning_rate": 0.00015884972902124, "loss": 0.0344, "step": 6427 }, { "epoch": 3.000233372228705, "grad_norm": 0.8046875, "learning_rate": 0.00015883786488883416, "loss": 0.0389, "step": 6428 }, { "epoch": 3.0007001166861142, "grad_norm": 0.7890625, "learning_rate": 0.00015882599948958384, "loss": 0.0385, "step": 6429 }, { "epoch": 3.001166861143524, "grad_norm": 0.60546875, "learning_rate": 0.00015881413282374448, "loss": 0.0257, "step": 6430 }, { "epoch": 3.0016336056009334, "grad_norm": 0.71484375, "learning_rate": 0.00015880226489157158, "loss": 0.0439, "step": 6431 }, { "epoch": 3.002100350058343, "grad_norm": 0.61328125, "learning_rate": 0.00015879039569332068, "loss": 0.0352, "step": 6432 }, { "epoch": 3.002100350058343, "eval_loss": 1.6984552145004272, "eval_runtime": 56.6516, "eval_samples_per_second": 31.844, "eval_steps_per_second": 3.989, "step": 6432 }, { "epoch": 3.0025670945157525, "grad_norm": 0.6640625, "learning_rate": 0.00015877852522924732, "loss": 0.0355, "step": 6433 }, { "epoch": 3.0030338389731623, "grad_norm": 0.75, "learning_rate": 0.00015876665349960712, "loss": 0.0413, "step": 6434 }, { "epoch": 3.0035005834305717, "grad_norm": 0.7421875, "learning_rate": 0.00015875478050465564, "loss": 0.0377, "step": 6435 }, { "epoch": 3.0039673278879815, "grad_norm": 0.625, "learning_rate": 0.00015874290624464857, "loss": 0.0338, "step": 6436 }, { "epoch": 3.004434072345391, "grad_norm": 0.66015625, "learning_rate": 0.00015873103071984153, "loss": 0.032, "step": 6437 }, { "epoch": 3.0049008168028006, "grad_norm": 0.7734375, "learning_rate": 0.00015871915393049024, "loss": 0.0349, "step": 6438 }, { "epoch": 3.00536756126021, "grad_norm": 0.734375, "learning_rate": 0.00015870727587685043, "loss": 0.0259, "step": 6439 }, { "epoch": 3.00583430571762, "grad_norm": 0.765625, "learning_rate": 0.0001586953965591778, "loss": 0.0251, "step": 6440 }, { "epoch": 3.006301050175029, "grad_norm": 1.1484375, "learning_rate": 0.00015868351597772822, "loss": 0.0528, "step": 6441 }, { "epoch": 3.006767794632439, "grad_norm": 0.71484375, "learning_rate": 0.00015867163413275738, "loss": 0.0298, "step": 6442 }, { "epoch": 3.0072345390898483, "grad_norm": 0.81640625, "learning_rate": 0.00015865975102452118, "loss": 0.04, "step": 6443 }, { "epoch": 3.007701283547258, "grad_norm": 0.63671875, "learning_rate": 0.00015864786665327544, "loss": 0.028, "step": 6444 }, { "epoch": 3.0081680280046674, "grad_norm": 0.7109375, "learning_rate": 0.00015863598101927608, "loss": 0.0247, "step": 6445 }, { "epoch": 3.0086347724620772, "grad_norm": 0.86328125, "learning_rate": 0.000158624094122779, "loss": 0.0329, "step": 6446 }, { "epoch": 3.0091015169194866, "grad_norm": 0.69140625, "learning_rate": 0.0001586122059640401, "loss": 0.0345, "step": 6447 }, { "epoch": 3.0095682613768964, "grad_norm": 0.8125, "learning_rate": 0.00015860031654331538, "loss": 0.0317, "step": 6448 }, { "epoch": 3.0100350058343057, "grad_norm": 0.828125, "learning_rate": 0.00015858842586086085, "loss": 0.0252, "step": 6449 }, { "epoch": 3.010501750291715, "grad_norm": 0.87890625, "learning_rate": 0.0001585765339169325, "loss": 0.0352, "step": 6450 }, { "epoch": 3.010968494749125, "grad_norm": 0.69921875, "learning_rate": 0.00015856464071178637, "loss": 0.0356, "step": 6451 }, { "epoch": 3.011435239206534, "grad_norm": 0.7890625, "learning_rate": 0.00015855274624567854, "loss": 0.0306, "step": 6452 }, { "epoch": 3.011901983663944, "grad_norm": 0.6171875, "learning_rate": 0.00015854085051886512, "loss": 0.0237, "step": 6453 }, { "epoch": 3.0123687281213534, "grad_norm": 1.359375, "learning_rate": 0.00015852895353160224, "loss": 0.0491, "step": 6454 }, { "epoch": 3.012835472578763, "grad_norm": 0.7578125, "learning_rate": 0.00015851705528414605, "loss": 0.0448, "step": 6455 }, { "epoch": 3.0133022170361725, "grad_norm": 0.734375, "learning_rate": 0.00015850515577675275, "loss": 0.0415, "step": 6456 }, { "epoch": 3.0137689614935823, "grad_norm": 0.6875, "learning_rate": 0.00015849325500967852, "loss": 0.0258, "step": 6457 }, { "epoch": 3.0142357059509917, "grad_norm": 0.67578125, "learning_rate": 0.0001584813529831796, "loss": 0.0319, "step": 6458 }, { "epoch": 3.0147024504084015, "grad_norm": 0.609375, "learning_rate": 0.00015846944969751228, "loss": 0.0236, "step": 6459 }, { "epoch": 3.015169194865811, "grad_norm": 0.83984375, "learning_rate": 0.00015845754515293284, "loss": 0.0324, "step": 6460 }, { "epoch": 3.0156359393232206, "grad_norm": 0.70703125, "learning_rate": 0.0001584456393496976, "loss": 0.0339, "step": 6461 }, { "epoch": 3.01610268378063, "grad_norm": 0.87109375, "learning_rate": 0.00015843373228806288, "loss": 0.0329, "step": 6462 }, { "epoch": 3.0165694282380398, "grad_norm": 0.7734375, "learning_rate": 0.00015842182396828505, "loss": 0.0302, "step": 6463 }, { "epoch": 3.017036172695449, "grad_norm": 0.6953125, "learning_rate": 0.00015840991439062055, "loss": 0.0253, "step": 6464 }, { "epoch": 3.017502917152859, "grad_norm": 0.8046875, "learning_rate": 0.0001583980035553258, "loss": 0.0436, "step": 6465 }, { "epoch": 3.0179696616102683, "grad_norm": 0.51171875, "learning_rate": 0.00015838609146265722, "loss": 0.0263, "step": 6466 }, { "epoch": 3.018436406067678, "grad_norm": 0.890625, "learning_rate": 0.00015837417811287132, "loss": 0.0462, "step": 6467 }, { "epoch": 3.0189031505250874, "grad_norm": 0.76953125, "learning_rate": 0.00015836226350622457, "loss": 0.0424, "step": 6468 }, { "epoch": 3.019369894982497, "grad_norm": 0.62890625, "learning_rate": 0.00015835034764297357, "loss": 0.0291, "step": 6469 }, { "epoch": 3.0198366394399065, "grad_norm": 0.796875, "learning_rate": 0.00015833843052337482, "loss": 0.0348, "step": 6470 }, { "epoch": 3.0203033838973163, "grad_norm": 0.859375, "learning_rate": 0.00015832651214768497, "loss": 0.031, "step": 6471 }, { "epoch": 3.0207701283547257, "grad_norm": 0.5625, "learning_rate": 0.00015831459251616057, "loss": 0.0251, "step": 6472 }, { "epoch": 3.0212368728121355, "grad_norm": 0.76953125, "learning_rate": 0.0001583026716290583, "loss": 0.0294, "step": 6473 }, { "epoch": 3.021703617269545, "grad_norm": 0.84765625, "learning_rate": 0.00015829074948663484, "loss": 0.0346, "step": 6474 }, { "epoch": 3.0221703617269546, "grad_norm": 0.7265625, "learning_rate": 0.00015827882608914683, "loss": 0.0346, "step": 6475 }, { "epoch": 3.022637106184364, "grad_norm": 0.80859375, "learning_rate": 0.00015826690143685106, "loss": 0.0376, "step": 6476 }, { "epoch": 3.023103850641774, "grad_norm": 0.703125, "learning_rate": 0.00015825497553000422, "loss": 0.0276, "step": 6477 }, { "epoch": 3.023570595099183, "grad_norm": 1.1875, "learning_rate": 0.0001582430483688632, "loss": 0.0324, "step": 6478 }, { "epoch": 3.024037339556593, "grad_norm": 0.75, "learning_rate": 0.00015823111995368465, "loss": 0.0325, "step": 6479 }, { "epoch": 3.0245040840140023, "grad_norm": 0.734375, "learning_rate": 0.0001582191902847255, "loss": 0.0307, "step": 6480 }, { "epoch": 3.024970828471412, "grad_norm": 0.61328125, "learning_rate": 0.0001582072593622426, "loss": 0.0273, "step": 6481 }, { "epoch": 3.0254375729288214, "grad_norm": 0.66015625, "learning_rate": 0.0001581953271864928, "loss": 0.0226, "step": 6482 }, { "epoch": 3.0259043173862312, "grad_norm": 0.71875, "learning_rate": 0.00015818339375773304, "loss": 0.0284, "step": 6483 }, { "epoch": 3.0263710618436406, "grad_norm": 0.82421875, "learning_rate": 0.0001581714590762203, "loss": 0.0378, "step": 6484 }, { "epoch": 3.0268378063010504, "grad_norm": 0.69921875, "learning_rate": 0.00015815952314221146, "loss": 0.0286, "step": 6485 }, { "epoch": 3.0273045507584597, "grad_norm": 0.703125, "learning_rate": 0.00015814758595596358, "loss": 0.02, "step": 6486 }, { "epoch": 3.0277712952158695, "grad_norm": 0.83984375, "learning_rate": 0.0001581356475177336, "loss": 0.0397, "step": 6487 }, { "epoch": 3.028238039673279, "grad_norm": 0.81640625, "learning_rate": 0.00015812370782777873, "loss": 0.0346, "step": 6488 }, { "epoch": 3.0287047841306887, "grad_norm": 0.81640625, "learning_rate": 0.00015811176688635588, "loss": 0.0311, "step": 6489 }, { "epoch": 3.029171528588098, "grad_norm": 0.51953125, "learning_rate": 0.0001580998246937222, "loss": 0.0233, "step": 6490 }, { "epoch": 3.0296382730455074, "grad_norm": 0.6640625, "learning_rate": 0.00015808788125013486, "loss": 0.0241, "step": 6491 }, { "epoch": 3.030105017502917, "grad_norm": 0.7578125, "learning_rate": 0.00015807593655585095, "loss": 0.0367, "step": 6492 }, { "epoch": 3.0305717619603265, "grad_norm": 0.8828125, "learning_rate": 0.00015806399061112772, "loss": 0.0278, "step": 6493 }, { "epoch": 3.0310385064177363, "grad_norm": 0.59375, "learning_rate": 0.00015805204341622236, "loss": 0.0311, "step": 6494 }, { "epoch": 3.0315052508751457, "grad_norm": 0.59375, "learning_rate": 0.0001580400949713921, "loss": 0.0253, "step": 6495 }, { "epoch": 3.0319719953325555, "grad_norm": 0.8125, "learning_rate": 0.00015802814527689416, "loss": 0.0313, "step": 6496 }, { "epoch": 3.032438739789965, "grad_norm": 0.76171875, "learning_rate": 0.00015801619433298587, "loss": 0.0267, "step": 6497 }, { "epoch": 3.0329054842473746, "grad_norm": 0.9375, "learning_rate": 0.0001580042421399246, "loss": 0.0408, "step": 6498 }, { "epoch": 3.033372228704784, "grad_norm": 0.8125, "learning_rate": 0.00015799228869796758, "loss": 0.039, "step": 6499 }, { "epoch": 3.0338389731621938, "grad_norm": 1.0078125, "learning_rate": 0.00015798033400737227, "loss": 0.0667, "step": 6500 }, { "epoch": 3.034305717619603, "grad_norm": 0.71875, "learning_rate": 0.00015796837806839604, "loss": 0.0361, "step": 6501 }, { "epoch": 3.034772462077013, "grad_norm": 0.80859375, "learning_rate": 0.0001579564208812963, "loss": 0.0272, "step": 6502 }, { "epoch": 3.0352392065344223, "grad_norm": 0.9609375, "learning_rate": 0.00015794446244633053, "loss": 0.0399, "step": 6503 }, { "epoch": 3.035705950991832, "grad_norm": 0.94921875, "learning_rate": 0.0001579325027637562, "loss": 0.0434, "step": 6504 }, { "epoch": 3.0361726954492414, "grad_norm": 0.69921875, "learning_rate": 0.00015792054183383077, "loss": 0.0328, "step": 6505 }, { "epoch": 3.036639439906651, "grad_norm": 0.77734375, "learning_rate": 0.00015790857965681184, "loss": 0.031, "step": 6506 }, { "epoch": 3.0371061843640605, "grad_norm": 0.859375, "learning_rate": 0.00015789661623295692, "loss": 0.0359, "step": 6507 }, { "epoch": 3.0375729288214703, "grad_norm": 0.69140625, "learning_rate": 0.00015788465156252364, "loss": 0.0356, "step": 6508 }, { "epoch": 3.0380396732788797, "grad_norm": 0.734375, "learning_rate": 0.00015787268564576955, "loss": 0.0352, "step": 6509 }, { "epoch": 3.0385064177362895, "grad_norm": 0.81640625, "learning_rate": 0.00015786071848295237, "loss": 0.0266, "step": 6510 }, { "epoch": 3.038973162193699, "grad_norm": 0.58203125, "learning_rate": 0.0001578487500743297, "loss": 0.036, "step": 6511 }, { "epoch": 3.0394399066511086, "grad_norm": 0.984375, "learning_rate": 0.00015783678042015925, "loss": 0.0465, "step": 6512 }, { "epoch": 3.039906651108518, "grad_norm": 0.94921875, "learning_rate": 0.00015782480952069878, "loss": 0.0358, "step": 6513 }, { "epoch": 3.040373395565928, "grad_norm": 0.7109375, "learning_rate": 0.00015781283737620599, "loss": 0.0343, "step": 6514 }, { "epoch": 3.040840140023337, "grad_norm": 0.6640625, "learning_rate": 0.00015780086398693865, "loss": 0.0302, "step": 6515 }, { "epoch": 3.041306884480747, "grad_norm": 0.71875, "learning_rate": 0.00015778888935315458, "loss": 0.0388, "step": 6516 }, { "epoch": 3.0417736289381563, "grad_norm": 0.75, "learning_rate": 0.00015777691347511162, "loss": 0.0356, "step": 6517 }, { "epoch": 3.042240373395566, "grad_norm": 0.8125, "learning_rate": 0.00015776493635306758, "loss": 0.0294, "step": 6518 }, { "epoch": 3.0427071178529754, "grad_norm": 0.6640625, "learning_rate": 0.0001577529579872804, "loss": 0.0322, "step": 6519 }, { "epoch": 3.0431738623103852, "grad_norm": 0.90234375, "learning_rate": 0.00015774097837800794, "loss": 0.0398, "step": 6520 }, { "epoch": 3.0436406067677946, "grad_norm": 0.71484375, "learning_rate": 0.00015772899752550815, "loss": 0.0395, "step": 6521 }, { "epoch": 3.0441073512252044, "grad_norm": 0.78125, "learning_rate": 0.00015771701543003904, "loss": 0.0322, "step": 6522 }, { "epoch": 3.0445740956826137, "grad_norm": 0.6328125, "learning_rate": 0.0001577050320918585, "loss": 0.0267, "step": 6523 }, { "epoch": 3.0450408401400235, "grad_norm": 0.5625, "learning_rate": 0.0001576930475112246, "loss": 0.0235, "step": 6524 }, { "epoch": 3.045507584597433, "grad_norm": 0.62890625, "learning_rate": 0.00015768106168839537, "loss": 0.0221, "step": 6525 }, { "epoch": 3.0459743290548427, "grad_norm": 0.99609375, "learning_rate": 0.0001576690746236289, "loss": 0.0489, "step": 6526 }, { "epoch": 3.046441073512252, "grad_norm": 0.71484375, "learning_rate": 0.00015765708631718326, "loss": 0.034, "step": 6527 }, { "epoch": 3.046907817969662, "grad_norm": 0.77734375, "learning_rate": 0.00015764509676931656, "loss": 0.0399, "step": 6528 }, { "epoch": 3.047374562427071, "grad_norm": 0.74609375, "learning_rate": 0.000157633105980287, "loss": 0.0326, "step": 6529 }, { "epoch": 3.047841306884481, "grad_norm": 0.72265625, "learning_rate": 0.0001576211139503527, "loss": 0.0295, "step": 6530 }, { "epoch": 3.0483080513418903, "grad_norm": 0.76953125, "learning_rate": 0.00015760912067977188, "loss": 0.0352, "step": 6531 }, { "epoch": 3.0487747957993, "grad_norm": 0.84375, "learning_rate": 0.0001575971261688028, "loss": 0.0436, "step": 6532 }, { "epoch": 3.0492415402567095, "grad_norm": 1.1484375, "learning_rate": 0.00015758513041770367, "loss": 0.0388, "step": 6533 }, { "epoch": 3.049708284714119, "grad_norm": 0.73046875, "learning_rate": 0.0001575731334267328, "loss": 0.0333, "step": 6534 }, { "epoch": 3.0501750291715286, "grad_norm": 0.83984375, "learning_rate": 0.00015756113519614848, "loss": 0.0358, "step": 6535 }, { "epoch": 3.050641773628938, "grad_norm": 0.953125, "learning_rate": 0.00015754913572620907, "loss": 0.0464, "step": 6536 }, { "epoch": 3.0511085180863478, "grad_norm": 0.625, "learning_rate": 0.00015753713501717292, "loss": 0.0251, "step": 6537 }, { "epoch": 3.051575262543757, "grad_norm": 0.796875, "learning_rate": 0.00015752513306929839, "loss": 0.038, "step": 6538 }, { "epoch": 3.052042007001167, "grad_norm": 0.96484375, "learning_rate": 0.00015751312988284392, "loss": 0.0416, "step": 6539 }, { "epoch": 3.0525087514585763, "grad_norm": 0.88671875, "learning_rate": 0.00015750112545806797, "loss": 0.0425, "step": 6540 }, { "epoch": 3.052975495915986, "grad_norm": 0.73828125, "learning_rate": 0.000157489119795229, "loss": 0.0455, "step": 6541 }, { "epoch": 3.0534422403733954, "grad_norm": 0.90625, "learning_rate": 0.00015747711289458547, "loss": 0.0404, "step": 6542 }, { "epoch": 3.053908984830805, "grad_norm": 0.78125, "learning_rate": 0.00015746510475639594, "loss": 0.0385, "step": 6543 }, { "epoch": 3.0543757292882145, "grad_norm": 0.78515625, "learning_rate": 0.00015745309538091893, "loss": 0.0349, "step": 6544 }, { "epoch": 3.0548424737456243, "grad_norm": 0.62890625, "learning_rate": 0.00015744108476841305, "loss": 0.0338, "step": 6545 }, { "epoch": 3.0553092182030337, "grad_norm": 0.61328125, "learning_rate": 0.00015742907291913687, "loss": 0.029, "step": 6546 }, { "epoch": 3.0557759626604435, "grad_norm": 0.74609375, "learning_rate": 0.00015741705983334905, "loss": 0.0283, "step": 6547 }, { "epoch": 3.056242707117853, "grad_norm": 0.69140625, "learning_rate": 0.0001574050455113082, "loss": 0.0291, "step": 6548 }, { "epoch": 3.0567094515752626, "grad_norm": 0.66015625, "learning_rate": 0.00015739302995327305, "loss": 0.0321, "step": 6549 }, { "epoch": 3.057176196032672, "grad_norm": 0.5859375, "learning_rate": 0.00015738101315950227, "loss": 0.0307, "step": 6550 }, { "epoch": 3.057642940490082, "grad_norm": 0.63671875, "learning_rate": 0.0001573689951302546, "loss": 0.0275, "step": 6551 }, { "epoch": 3.058109684947491, "grad_norm": 0.96875, "learning_rate": 0.00015735697586578885, "loss": 0.0413, "step": 6552 }, { "epoch": 3.058576429404901, "grad_norm": 0.8203125, "learning_rate": 0.00015734495536636375, "loss": 0.0341, "step": 6553 }, { "epoch": 3.0590431738623103, "grad_norm": 0.67578125, "learning_rate": 0.00015733293363223813, "loss": 0.0334, "step": 6554 }, { "epoch": 3.05950991831972, "grad_norm": 1.0234375, "learning_rate": 0.00015732091066367085, "loss": 0.0612, "step": 6555 }, { "epoch": 3.0599766627771294, "grad_norm": 1.0390625, "learning_rate": 0.00015730888646092077, "loss": 0.0426, "step": 6556 }, { "epoch": 3.0604434072345392, "grad_norm": 0.671875, "learning_rate": 0.00015729686102424677, "loss": 0.0302, "step": 6557 }, { "epoch": 3.0609101516919486, "grad_norm": 0.84765625, "learning_rate": 0.00015728483435390776, "loss": 0.0351, "step": 6558 }, { "epoch": 3.0613768961493584, "grad_norm": 0.70703125, "learning_rate": 0.00015727280645016275, "loss": 0.0359, "step": 6559 }, { "epoch": 3.0618436406067677, "grad_norm": 0.62890625, "learning_rate": 0.00015726077731327066, "loss": 0.033, "step": 6560 }, { "epoch": 3.0623103850641775, "grad_norm": 0.734375, "learning_rate": 0.00015724874694349045, "loss": 0.0295, "step": 6561 }, { "epoch": 3.062777129521587, "grad_norm": 0.5625, "learning_rate": 0.00015723671534108123, "loss": 0.0346, "step": 6562 }, { "epoch": 3.0632438739789967, "grad_norm": 0.8203125, "learning_rate": 0.00015722468250630204, "loss": 0.0348, "step": 6563 }, { "epoch": 3.063710618436406, "grad_norm": 0.75390625, "learning_rate": 0.00015721264843941195, "loss": 0.0331, "step": 6564 }, { "epoch": 3.064177362893816, "grad_norm": 0.8671875, "learning_rate": 0.00015720061314067007, "loss": 0.0474, "step": 6565 }, { "epoch": 3.064644107351225, "grad_norm": 0.5546875, "learning_rate": 0.00015718857661033551, "loss": 0.0193, "step": 6566 }, { "epoch": 3.065110851808635, "grad_norm": 0.609375, "learning_rate": 0.0001571765388486674, "loss": 0.0254, "step": 6567 }, { "epoch": 3.0655775962660443, "grad_norm": 0.640625, "learning_rate": 0.00015716449985592502, "loss": 0.0327, "step": 6568 }, { "epoch": 3.066044340723454, "grad_norm": 0.6796875, "learning_rate": 0.00015715245963236752, "loss": 0.0299, "step": 6569 }, { "epoch": 3.0665110851808635, "grad_norm": 0.7578125, "learning_rate": 0.00015714041817825418, "loss": 0.032, "step": 6570 }, { "epoch": 3.0669778296382733, "grad_norm": 0.83984375, "learning_rate": 0.0001571283754938442, "loss": 0.0343, "step": 6571 }, { "epoch": 3.0674445740956826, "grad_norm": 0.74609375, "learning_rate": 0.00015711633157939692, "loss": 0.0488, "step": 6572 }, { "epoch": 3.067911318553092, "grad_norm": 0.70703125, "learning_rate": 0.00015710428643517162, "loss": 0.0354, "step": 6573 }, { "epoch": 3.0683780630105018, "grad_norm": 0.7265625, "learning_rate": 0.00015709224006142772, "loss": 0.033, "step": 6574 }, { "epoch": 3.068844807467911, "grad_norm": 0.8125, "learning_rate": 0.00015708019245842453, "loss": 0.0313, "step": 6575 }, { "epoch": 3.069311551925321, "grad_norm": 0.9609375, "learning_rate": 0.00015706814362642147, "loss": 0.0283, "step": 6576 }, { "epoch": 3.0697782963827303, "grad_norm": 0.71875, "learning_rate": 0.00015705609356567797, "loss": 0.0339, "step": 6577 }, { "epoch": 3.07024504084014, "grad_norm": 0.76171875, "learning_rate": 0.00015704404227645345, "loss": 0.0308, "step": 6578 }, { "epoch": 3.0707117852975494, "grad_norm": 0.875, "learning_rate": 0.0001570319897590074, "loss": 0.0275, "step": 6579 }, { "epoch": 3.071178529754959, "grad_norm": 0.6640625, "learning_rate": 0.00015701993601359936, "loss": 0.0316, "step": 6580 }, { "epoch": 3.0716452742123685, "grad_norm": 0.5390625, "learning_rate": 0.00015700788104048882, "loss": 0.02, "step": 6581 }, { "epoch": 3.0721120186697783, "grad_norm": 0.82421875, "learning_rate": 0.00015699582483993534, "loss": 0.0315, "step": 6582 }, { "epoch": 3.0725787631271877, "grad_norm": 0.88671875, "learning_rate": 0.00015698376741219854, "loss": 0.0459, "step": 6583 }, { "epoch": 3.0730455075845975, "grad_norm": 0.84375, "learning_rate": 0.00015697170875753798, "loss": 0.0312, "step": 6584 }, { "epoch": 3.073512252042007, "grad_norm": 0.73828125, "learning_rate": 0.00015695964887621334, "loss": 0.0312, "step": 6585 }, { "epoch": 3.0739789964994166, "grad_norm": 0.7421875, "learning_rate": 0.00015694758776848425, "loss": 0.0299, "step": 6586 }, { "epoch": 3.074445740956826, "grad_norm": 0.82421875, "learning_rate": 0.0001569355254346104, "loss": 0.0335, "step": 6587 }, { "epoch": 3.074912485414236, "grad_norm": 0.90625, "learning_rate": 0.0001569234618748515, "loss": 0.0365, "step": 6588 }, { "epoch": 3.075379229871645, "grad_norm": 0.7890625, "learning_rate": 0.00015691139708946735, "loss": 0.0455, "step": 6589 }, { "epoch": 3.075845974329055, "grad_norm": 0.6875, "learning_rate": 0.00015689933107871767, "loss": 0.0441, "step": 6590 }, { "epoch": 3.0763127187864643, "grad_norm": 1.0078125, "learning_rate": 0.00015688726384286222, "loss": 0.0511, "step": 6591 }, { "epoch": 3.076779463243874, "grad_norm": 0.8359375, "learning_rate": 0.00015687519538216092, "loss": 0.0299, "step": 6592 }, { "epoch": 3.0772462077012834, "grad_norm": 0.84375, "learning_rate": 0.00015686312569687352, "loss": 0.0271, "step": 6593 }, { "epoch": 3.0777129521586932, "grad_norm": 0.6484375, "learning_rate": 0.00015685105478725997, "loss": 0.0254, "step": 6594 }, { "epoch": 3.0781796966161026, "grad_norm": 0.6796875, "learning_rate": 0.0001568389826535801, "loss": 0.0287, "step": 6595 }, { "epoch": 3.0786464410735124, "grad_norm": 0.91796875, "learning_rate": 0.00015682690929609387, "loss": 0.0428, "step": 6596 }, { "epoch": 3.0791131855309217, "grad_norm": 0.75, "learning_rate": 0.00015681483471506125, "loss": 0.034, "step": 6597 }, { "epoch": 3.0795799299883315, "grad_norm": 0.8203125, "learning_rate": 0.00015680275891074219, "loss": 0.0479, "step": 6598 }, { "epoch": 3.080046674445741, "grad_norm": 0.6484375, "learning_rate": 0.0001567906818833967, "loss": 0.0206, "step": 6599 }, { "epoch": 3.0805134189031507, "grad_norm": 0.890625, "learning_rate": 0.00015677860363328482, "loss": 0.0438, "step": 6600 }, { "epoch": 3.08098016336056, "grad_norm": 0.578125, "learning_rate": 0.0001567665241606666, "loss": 0.0271, "step": 6601 }, { "epoch": 3.08144690781797, "grad_norm": 0.74609375, "learning_rate": 0.00015675444346580216, "loss": 0.0398, "step": 6602 }, { "epoch": 3.081913652275379, "grad_norm": 0.70703125, "learning_rate": 0.00015674236154895156, "loss": 0.0232, "step": 6603 }, { "epoch": 3.082380396732789, "grad_norm": 1.3125, "learning_rate": 0.00015673027841037497, "loss": 0.0515, "step": 6604 }, { "epoch": 3.0828471411901983, "grad_norm": 0.84375, "learning_rate": 0.00015671819405033255, "loss": 0.032, "step": 6605 }, { "epoch": 3.083313885647608, "grad_norm": 0.74609375, "learning_rate": 0.00015670610846908444, "loss": 0.0335, "step": 6606 }, { "epoch": 3.0837806301050175, "grad_norm": 0.8203125, "learning_rate": 0.00015669402166689097, "loss": 0.0334, "step": 6607 }, { "epoch": 3.0842473745624273, "grad_norm": 0.83984375, "learning_rate": 0.00015668193364401224, "loss": 0.0344, "step": 6608 }, { "epoch": 3.0847141190198366, "grad_norm": 0.86328125, "learning_rate": 0.00015666984440070863, "loss": 0.0283, "step": 6609 }, { "epoch": 3.0851808634772464, "grad_norm": 0.828125, "learning_rate": 0.00015665775393724037, "loss": 0.0319, "step": 6610 }, { "epoch": 3.0856476079346558, "grad_norm": 0.70703125, "learning_rate": 0.0001566456622538678, "loss": 0.044, "step": 6611 }, { "epoch": 3.0861143523920656, "grad_norm": 0.75, "learning_rate": 0.0001566335693508513, "loss": 0.0278, "step": 6612 }, { "epoch": 3.086581096849475, "grad_norm": 0.84375, "learning_rate": 0.0001566214752284512, "loss": 0.0374, "step": 6613 }, { "epoch": 3.0870478413068847, "grad_norm": 0.74609375, "learning_rate": 0.00015660937988692795, "loss": 0.0303, "step": 6614 }, { "epoch": 3.087514585764294, "grad_norm": 0.72265625, "learning_rate": 0.00015659728332654188, "loss": 0.0309, "step": 6615 }, { "epoch": 3.0879813302217034, "grad_norm": 0.80078125, "learning_rate": 0.00015658518554755355, "loss": 0.0331, "step": 6616 }, { "epoch": 3.088448074679113, "grad_norm": 0.6484375, "learning_rate": 0.0001565730865502234, "loss": 0.0287, "step": 6617 }, { "epoch": 3.0889148191365225, "grad_norm": 0.78125, "learning_rate": 0.0001565609863348119, "loss": 0.0288, "step": 6618 }, { "epoch": 3.0893815635939323, "grad_norm": 0.6171875, "learning_rate": 0.00015654888490157962, "loss": 0.0265, "step": 6619 }, { "epoch": 3.0898483080513417, "grad_norm": 0.83203125, "learning_rate": 0.0001565367822507871, "loss": 0.0389, "step": 6620 }, { "epoch": 3.0903150525087515, "grad_norm": 0.74609375, "learning_rate": 0.00015652467838269494, "loss": 0.0287, "step": 6621 }, { "epoch": 3.090781796966161, "grad_norm": 0.75390625, "learning_rate": 0.00015651257329756373, "loss": 0.0319, "step": 6622 }, { "epoch": 3.0912485414235706, "grad_norm": 0.62109375, "learning_rate": 0.0001565004669956541, "loss": 0.0398, "step": 6623 }, { "epoch": 3.09171528588098, "grad_norm": 0.6796875, "learning_rate": 0.00015648835947722675, "loss": 0.0382, "step": 6624 }, { "epoch": 3.09218203033839, "grad_norm": 0.796875, "learning_rate": 0.00015647625074254235, "loss": 0.0365, "step": 6625 }, { "epoch": 3.092648774795799, "grad_norm": 0.86328125, "learning_rate": 0.00015646414079186158, "loss": 0.0333, "step": 6626 }, { "epoch": 3.093115519253209, "grad_norm": 0.61328125, "learning_rate": 0.00015645202962544523, "loss": 0.0256, "step": 6627 }, { "epoch": 3.0935822637106183, "grad_norm": 0.83203125, "learning_rate": 0.00015643991724355406, "loss": 0.0376, "step": 6628 }, { "epoch": 3.094049008168028, "grad_norm": 0.578125, "learning_rate": 0.0001564278036464488, "loss": 0.0252, "step": 6629 }, { "epoch": 3.0945157526254374, "grad_norm": 0.69140625, "learning_rate": 0.0001564156888343904, "loss": 0.0288, "step": 6630 }, { "epoch": 3.0949824970828472, "grad_norm": 0.953125, "learning_rate": 0.00015640357280763956, "loss": 0.0363, "step": 6631 }, { "epoch": 3.0954492415402566, "grad_norm": 0.65625, "learning_rate": 0.00015639145556645725, "loss": 0.0361, "step": 6632 }, { "epoch": 3.0959159859976664, "grad_norm": 0.76953125, "learning_rate": 0.00015637933711110433, "loss": 0.0332, "step": 6633 }, { "epoch": 3.0963827304550757, "grad_norm": 0.609375, "learning_rate": 0.00015636721744184172, "loss": 0.0299, "step": 6634 }, { "epoch": 3.0968494749124855, "grad_norm": 0.78515625, "learning_rate": 0.00015635509655893037, "loss": 0.03, "step": 6635 }, { "epoch": 3.097316219369895, "grad_norm": 0.7890625, "learning_rate": 0.00015634297446263126, "loss": 0.038, "step": 6636 }, { "epoch": 3.0977829638273047, "grad_norm": 0.79296875, "learning_rate": 0.00015633085115320541, "loss": 0.0375, "step": 6637 }, { "epoch": 3.098249708284714, "grad_norm": 0.7265625, "learning_rate": 0.00015631872663091385, "loss": 0.0338, "step": 6638 }, { "epoch": 3.098716452742124, "grad_norm": 1.0390625, "learning_rate": 0.00015630660089601759, "loss": 0.0359, "step": 6639 }, { "epoch": 3.099183197199533, "grad_norm": 0.546875, "learning_rate": 0.00015629447394877776, "loss": 0.0157, "step": 6640 }, { "epoch": 3.099649941656943, "grad_norm": 0.7265625, "learning_rate": 0.00015628234578945547, "loss": 0.0459, "step": 6641 }, { "epoch": 3.1001166861143523, "grad_norm": 0.76171875, "learning_rate": 0.00015627021641831176, "loss": 0.0333, "step": 6642 }, { "epoch": 3.100583430571762, "grad_norm": 0.82421875, "learning_rate": 0.0001562580858356079, "loss": 0.0376, "step": 6643 }, { "epoch": 3.1010501750291715, "grad_norm": 0.640625, "learning_rate": 0.00015624595404160503, "loss": 0.0347, "step": 6644 }, { "epoch": 3.1015169194865813, "grad_norm": 0.58984375, "learning_rate": 0.00015623382103656437, "loss": 0.0256, "step": 6645 }, { "epoch": 3.1019836639439906, "grad_norm": 0.84375, "learning_rate": 0.00015622168682074715, "loss": 0.0416, "step": 6646 }, { "epoch": 3.1024504084014004, "grad_norm": 0.70703125, "learning_rate": 0.00015620955139441462, "loss": 0.0339, "step": 6647 }, { "epoch": 3.1029171528588098, "grad_norm": 0.6640625, "learning_rate": 0.00015619741475782808, "loss": 0.025, "step": 6648 }, { "epoch": 3.1033838973162196, "grad_norm": 0.82421875, "learning_rate": 0.00015618527691124885, "loss": 0.0334, "step": 6649 }, { "epoch": 3.103850641773629, "grad_norm": 0.82421875, "learning_rate": 0.00015617313785493828, "loss": 0.0373, "step": 6650 }, { "epoch": 3.1043173862310387, "grad_norm": 0.51953125, "learning_rate": 0.00015616099758915773, "loss": 0.0164, "step": 6651 }, { "epoch": 3.104784130688448, "grad_norm": 0.59375, "learning_rate": 0.0001561488561141686, "loss": 0.0266, "step": 6652 }, { "epoch": 3.105250875145858, "grad_norm": 0.55078125, "learning_rate": 0.00015613671343023223, "loss": 0.0293, "step": 6653 }, { "epoch": 3.105717619603267, "grad_norm": 0.60546875, "learning_rate": 0.00015612456953761021, "loss": 0.0309, "step": 6654 }, { "epoch": 3.106184364060677, "grad_norm": 0.78125, "learning_rate": 0.0001561124244365639, "loss": 0.0382, "step": 6655 }, { "epoch": 3.1066511085180863, "grad_norm": 0.76953125, "learning_rate": 0.00015610027812735486, "loss": 0.0322, "step": 6656 }, { "epoch": 3.107117852975496, "grad_norm": 0.828125, "learning_rate": 0.0001560881306102446, "loss": 0.0414, "step": 6657 }, { "epoch": 3.1075845974329055, "grad_norm": 0.52734375, "learning_rate": 0.0001560759818854946, "loss": 0.0245, "step": 6658 }, { "epoch": 3.108051341890315, "grad_norm": 0.65234375, "learning_rate": 0.00015606383195336654, "loss": 0.0294, "step": 6659 }, { "epoch": 3.1085180863477246, "grad_norm": 0.76953125, "learning_rate": 0.00015605168081412197, "loss": 0.0353, "step": 6660 }, { "epoch": 3.108984830805134, "grad_norm": 0.66796875, "learning_rate": 0.0001560395284680225, "loss": 0.0309, "step": 6661 }, { "epoch": 3.109451575262544, "grad_norm": 0.53125, "learning_rate": 0.0001560273749153298, "loss": 0.0273, "step": 6662 }, { "epoch": 3.109918319719953, "grad_norm": 0.625, "learning_rate": 0.00015601522015630558, "loss": 0.0238, "step": 6663 }, { "epoch": 3.110385064177363, "grad_norm": 0.77734375, "learning_rate": 0.0001560030641912115, "loss": 0.036, "step": 6664 }, { "epoch": 3.1108518086347723, "grad_norm": 0.7109375, "learning_rate": 0.00015599090702030934, "loss": 0.0277, "step": 6665 }, { "epoch": 3.111318553092182, "grad_norm": 0.734375, "learning_rate": 0.00015597874864386083, "loss": 0.0278, "step": 6666 }, { "epoch": 3.1117852975495914, "grad_norm": 0.9765625, "learning_rate": 0.00015596658906212772, "loss": 0.0519, "step": 6667 }, { "epoch": 3.1122520420070012, "grad_norm": 0.67578125, "learning_rate": 0.00015595442827537185, "loss": 0.0307, "step": 6668 }, { "epoch": 3.1127187864644106, "grad_norm": 0.73046875, "learning_rate": 0.00015594226628385508, "loss": 0.0342, "step": 6669 }, { "epoch": 3.1131855309218204, "grad_norm": 0.8046875, "learning_rate": 0.00015593010308783924, "loss": 0.0335, "step": 6670 }, { "epoch": 3.1136522753792297, "grad_norm": 0.80078125, "learning_rate": 0.00015591793868758623, "loss": 0.0347, "step": 6671 }, { "epoch": 3.1141190198366395, "grad_norm": 0.640625, "learning_rate": 0.00015590577308335796, "loss": 0.0336, "step": 6672 }, { "epoch": 3.114585764294049, "grad_norm": 0.63671875, "learning_rate": 0.00015589360627541634, "loss": 0.0363, "step": 6673 }, { "epoch": 3.1150525087514587, "grad_norm": 0.60546875, "learning_rate": 0.0001558814382640234, "loss": 0.0223, "step": 6674 }, { "epoch": 3.115519253208868, "grad_norm": 0.67578125, "learning_rate": 0.00015586926904944106, "loss": 0.0359, "step": 6675 }, { "epoch": 3.115985997666278, "grad_norm": 0.65234375, "learning_rate": 0.0001558570986319314, "loss": 0.0303, "step": 6676 }, { "epoch": 3.116452742123687, "grad_norm": 0.81640625, "learning_rate": 0.0001558449270117564, "loss": 0.0362, "step": 6677 }, { "epoch": 3.116919486581097, "grad_norm": 0.6875, "learning_rate": 0.00015583275418917819, "loss": 0.0416, "step": 6678 }, { "epoch": 3.1173862310385063, "grad_norm": 0.6875, "learning_rate": 0.00015582058016445883, "loss": 0.0282, "step": 6679 }, { "epoch": 3.117852975495916, "grad_norm": 0.69140625, "learning_rate": 0.0001558084049378604, "loss": 0.0311, "step": 6680 }, { "epoch": 3.1183197199533255, "grad_norm": 0.953125, "learning_rate": 0.00015579622850964514, "loss": 0.0397, "step": 6681 }, { "epoch": 3.1187864644107353, "grad_norm": 0.640625, "learning_rate": 0.00015578405088007515, "loss": 0.0284, "step": 6682 }, { "epoch": 3.1192532088681446, "grad_norm": 0.671875, "learning_rate": 0.00015577187204941266, "loss": 0.0404, "step": 6683 }, { "epoch": 3.1197199533255544, "grad_norm": 0.78515625, "learning_rate": 0.00015575969201791988, "loss": 0.038, "step": 6684 }, { "epoch": 3.1201866977829638, "grad_norm": 0.64453125, "learning_rate": 0.00015574751078585908, "loss": 0.0269, "step": 6685 }, { "epoch": 3.1206534422403736, "grad_norm": 0.7734375, "learning_rate": 0.0001557353283534925, "loss": 0.0368, "step": 6686 }, { "epoch": 3.121120186697783, "grad_norm": 0.65625, "learning_rate": 0.00015572314472108248, "loss": 0.0308, "step": 6687 }, { "epoch": 3.1215869311551927, "grad_norm": 0.81640625, "learning_rate": 0.00015571095988889134, "loss": 0.0352, "step": 6688 }, { "epoch": 3.122053675612602, "grad_norm": 0.78515625, "learning_rate": 0.0001556987738571814, "loss": 0.0278, "step": 6689 }, { "epoch": 3.122520420070012, "grad_norm": 0.8046875, "learning_rate": 0.00015568658662621507, "loss": 0.0451, "step": 6690 }, { "epoch": 3.122987164527421, "grad_norm": 0.73828125, "learning_rate": 0.00015567439819625474, "loss": 0.0338, "step": 6691 }, { "epoch": 3.123453908984831, "grad_norm": 0.76953125, "learning_rate": 0.0001556622085675628, "loss": 0.0347, "step": 6692 }, { "epoch": 3.1239206534422403, "grad_norm": 0.77734375, "learning_rate": 0.0001556500177404018, "loss": 0.0306, "step": 6693 }, { "epoch": 3.12438739789965, "grad_norm": 0.5703125, "learning_rate": 0.00015563782571503416, "loss": 0.0239, "step": 6694 }, { "epoch": 3.1248541423570595, "grad_norm": 0.7890625, "learning_rate": 0.0001556256324917224, "loss": 0.0364, "step": 6695 }, { "epoch": 3.1253208868144693, "grad_norm": 0.8046875, "learning_rate": 0.00015561343807072908, "loss": 0.0284, "step": 6696 }, { "epoch": 3.1257876312718786, "grad_norm": 1.0390625, "learning_rate": 0.00015560124245231672, "loss": 0.039, "step": 6697 }, { "epoch": 3.126254375729288, "grad_norm": 0.88671875, "learning_rate": 0.0001555890456367479, "loss": 0.0365, "step": 6698 }, { "epoch": 3.126721120186698, "grad_norm": 0.57421875, "learning_rate": 0.00015557684762428528, "loss": 0.0231, "step": 6699 }, { "epoch": 3.1271878646441076, "grad_norm": 0.6875, "learning_rate": 0.00015556464841519146, "loss": 0.0264, "step": 6700 }, { "epoch": 3.127654609101517, "grad_norm": 0.8203125, "learning_rate": 0.0001555524480097291, "loss": 0.0271, "step": 6701 }, { "epoch": 3.1281213535589263, "grad_norm": 0.703125, "learning_rate": 0.00015554024640816088, "loss": 0.0257, "step": 6702 }, { "epoch": 3.128588098016336, "grad_norm": 0.65234375, "learning_rate": 0.00015552804361074955, "loss": 0.0268, "step": 6703 }, { "epoch": 3.1290548424737454, "grad_norm": 0.6328125, "learning_rate": 0.00015551583961775784, "loss": 0.0238, "step": 6704 }, { "epoch": 3.1295215869311552, "grad_norm": 0.8984375, "learning_rate": 0.0001555036344294485, "loss": 0.0269, "step": 6705 }, { "epoch": 3.1299883313885646, "grad_norm": 0.87109375, "learning_rate": 0.00015549142804608431, "loss": 0.0393, "step": 6706 }, { "epoch": 3.1304550758459744, "grad_norm": 0.76171875, "learning_rate": 0.00015547922046792813, "loss": 0.0293, "step": 6707 }, { "epoch": 3.1309218203033837, "grad_norm": 0.69140625, "learning_rate": 0.0001554670116952428, "loss": 0.0255, "step": 6708 }, { "epoch": 3.1313885647607935, "grad_norm": 0.79296875, "learning_rate": 0.00015545480172829112, "loss": 0.0377, "step": 6709 }, { "epoch": 3.131855309218203, "grad_norm": 0.7578125, "learning_rate": 0.00015544259056733605, "loss": 0.0366, "step": 6710 }, { "epoch": 3.1323220536756127, "grad_norm": 0.9609375, "learning_rate": 0.00015543037821264048, "loss": 0.0331, "step": 6711 }, { "epoch": 3.132788798133022, "grad_norm": 0.75, "learning_rate": 0.00015541816466446736, "loss": 0.0361, "step": 6712 }, { "epoch": 3.133255542590432, "grad_norm": 0.58984375, "learning_rate": 0.0001554059499230797, "loss": 0.0194, "step": 6713 }, { "epoch": 3.133722287047841, "grad_norm": 0.94140625, "learning_rate": 0.00015539373398874045, "loss": 0.0339, "step": 6714 }, { "epoch": 3.134189031505251, "grad_norm": 0.81640625, "learning_rate": 0.00015538151686171264, "loss": 0.0265, "step": 6715 }, { "epoch": 3.1346557759626603, "grad_norm": 0.8828125, "learning_rate": 0.00015536929854225934, "loss": 0.0374, "step": 6716 }, { "epoch": 3.13512252042007, "grad_norm": 0.7265625, "learning_rate": 0.0001553570790306436, "loss": 0.0345, "step": 6717 }, { "epoch": 3.1355892648774795, "grad_norm": 0.58203125, "learning_rate": 0.0001553448583271285, "loss": 0.0403, "step": 6718 }, { "epoch": 3.1360560093348893, "grad_norm": 0.8828125, "learning_rate": 0.00015533263643197724, "loss": 0.0448, "step": 6719 }, { "epoch": 3.1365227537922986, "grad_norm": 0.62109375, "learning_rate": 0.00015532041334545291, "loss": 0.0251, "step": 6720 }, { "epoch": 3.1369894982497084, "grad_norm": 0.7578125, "learning_rate": 0.0001553081890678187, "loss": 0.0312, "step": 6721 }, { "epoch": 3.1374562427071178, "grad_norm": 0.68359375, "learning_rate": 0.0001552959635993378, "loss": 0.0316, "step": 6722 }, { "epoch": 3.1379229871645276, "grad_norm": 0.6953125, "learning_rate": 0.00015528373694027344, "loss": 0.025, "step": 6723 }, { "epoch": 3.138389731621937, "grad_norm": 0.71484375, "learning_rate": 0.00015527150909088895, "loss": 0.0287, "step": 6724 }, { "epoch": 3.1388564760793467, "grad_norm": 0.65625, "learning_rate": 0.00015525928005144748, "loss": 0.0252, "step": 6725 }, { "epoch": 3.139323220536756, "grad_norm": 0.55078125, "learning_rate": 0.00015524704982221242, "loss": 0.0214, "step": 6726 }, { "epoch": 3.139789964994166, "grad_norm": 0.890625, "learning_rate": 0.0001552348184034471, "loss": 0.0411, "step": 6727 }, { "epoch": 3.140256709451575, "grad_norm": 0.73828125, "learning_rate": 0.00015522258579541483, "loss": 0.0365, "step": 6728 }, { "epoch": 3.140723453908985, "grad_norm": 0.7734375, "learning_rate": 0.000155210351998379, "loss": 0.0366, "step": 6729 }, { "epoch": 3.1411901983663943, "grad_norm": 0.76171875, "learning_rate": 0.00015519811701260309, "loss": 0.0309, "step": 6730 }, { "epoch": 3.141656942823804, "grad_norm": 0.828125, "learning_rate": 0.00015518588083835043, "loss": 0.0284, "step": 6731 }, { "epoch": 3.1421236872812135, "grad_norm": 0.86328125, "learning_rate": 0.00015517364347588454, "loss": 0.0408, "step": 6732 }, { "epoch": 3.1425904317386233, "grad_norm": 0.91015625, "learning_rate": 0.0001551614049254689, "loss": 0.0385, "step": 6733 }, { "epoch": 3.1430571761960326, "grad_norm": 0.77734375, "learning_rate": 0.000155149165187367, "loss": 0.0287, "step": 6734 }, { "epoch": 3.1435239206534424, "grad_norm": 0.7421875, "learning_rate": 0.00015513692426184238, "loss": 0.0343, "step": 6735 }, { "epoch": 3.143990665110852, "grad_norm": 0.62109375, "learning_rate": 0.00015512468214915864, "loss": 0.0261, "step": 6736 }, { "epoch": 3.144457409568261, "grad_norm": 0.76171875, "learning_rate": 0.0001551124388495793, "loss": 0.0337, "step": 6737 }, { "epoch": 3.144924154025671, "grad_norm": 0.546875, "learning_rate": 0.000155100194363368, "loss": 0.0308, "step": 6738 }, { "epoch": 3.1453908984830807, "grad_norm": 0.6875, "learning_rate": 0.0001550879486907884, "loss": 0.0213, "step": 6739 }, { "epoch": 3.14585764294049, "grad_norm": 0.9375, "learning_rate": 0.0001550757018321041, "loss": 0.0352, "step": 6740 }, { "epoch": 3.1463243873978994, "grad_norm": 0.6953125, "learning_rate": 0.00015506345378757885, "loss": 0.0257, "step": 6741 }, { "epoch": 3.1467911318553092, "grad_norm": 0.8828125, "learning_rate": 0.0001550512045574764, "loss": 0.03, "step": 6742 }, { "epoch": 3.1472578763127186, "grad_norm": 0.81640625, "learning_rate": 0.00015503895414206037, "loss": 0.0299, "step": 6743 }, { "epoch": 3.1477246207701284, "grad_norm": 0.7265625, "learning_rate": 0.00015502670254159462, "loss": 0.0261, "step": 6744 }, { "epoch": 3.1481913652275377, "grad_norm": 0.77734375, "learning_rate": 0.0001550144497563429, "loss": 0.0365, "step": 6745 }, { "epoch": 3.1486581096849475, "grad_norm": 0.62109375, "learning_rate": 0.00015500219578656904, "loss": 0.0284, "step": 6746 }, { "epoch": 3.149124854142357, "grad_norm": 0.90234375, "learning_rate": 0.00015498994063253688, "loss": 0.0327, "step": 6747 }, { "epoch": 3.1495915985997667, "grad_norm": 0.71484375, "learning_rate": 0.00015497768429451026, "loss": 0.0284, "step": 6748 }, { "epoch": 3.150058343057176, "grad_norm": 0.87109375, "learning_rate": 0.00015496542677275317, "loss": 0.0391, "step": 6749 }, { "epoch": 3.150525087514586, "grad_norm": 0.76171875, "learning_rate": 0.0001549531680675294, "loss": 0.0267, "step": 6750 }, { "epoch": 3.150991831971995, "grad_norm": 0.86328125, "learning_rate": 0.00015494090817910294, "loss": 0.0377, "step": 6751 }, { "epoch": 3.151458576429405, "grad_norm": 0.734375, "learning_rate": 0.0001549286471077378, "loss": 0.0311, "step": 6752 }, { "epoch": 3.1519253208868143, "grad_norm": 0.7734375, "learning_rate": 0.00015491638485369793, "loss": 0.0336, "step": 6753 }, { "epoch": 3.152392065344224, "grad_norm": 0.875, "learning_rate": 0.00015490412141724738, "loss": 0.0467, "step": 6754 }, { "epoch": 3.1528588098016335, "grad_norm": 0.6953125, "learning_rate": 0.00015489185679865014, "loss": 0.0297, "step": 6755 }, { "epoch": 3.1533255542590433, "grad_norm": 1.0859375, "learning_rate": 0.00015487959099817034, "loss": 0.0499, "step": 6756 }, { "epoch": 3.1537922987164526, "grad_norm": 0.80078125, "learning_rate": 0.00015486732401607208, "loss": 0.0281, "step": 6757 }, { "epoch": 3.1542590431738624, "grad_norm": 0.62109375, "learning_rate": 0.00015485505585261943, "loss": 0.0369, "step": 6758 }, { "epoch": 3.1547257876312718, "grad_norm": 0.90625, "learning_rate": 0.00015484278650807658, "loss": 0.0334, "step": 6759 }, { "epoch": 3.1551925320886816, "grad_norm": 0.87890625, "learning_rate": 0.00015483051598270768, "loss": 0.0387, "step": 6760 }, { "epoch": 3.155659276546091, "grad_norm": 0.73046875, "learning_rate": 0.00015481824427677693, "loss": 0.0272, "step": 6761 }, { "epoch": 3.1561260210035007, "grad_norm": 0.55859375, "learning_rate": 0.00015480597139054858, "loss": 0.0167, "step": 6762 }, { "epoch": 3.15659276546091, "grad_norm": 1.0546875, "learning_rate": 0.00015479369732428685, "loss": 0.0432, "step": 6763 }, { "epoch": 3.15705950991832, "grad_norm": 0.82421875, "learning_rate": 0.00015478142207825602, "loss": 0.0337, "step": 6764 }, { "epoch": 3.157526254375729, "grad_norm": 0.81640625, "learning_rate": 0.00015476914565272038, "loss": 0.026, "step": 6765 }, { "epoch": 3.157992998833139, "grad_norm": 0.93359375, "learning_rate": 0.00015475686804794427, "loss": 0.0403, "step": 6766 }, { "epoch": 3.1584597432905483, "grad_norm": 0.78515625, "learning_rate": 0.00015474458926419203, "loss": 0.0349, "step": 6767 }, { "epoch": 3.158926487747958, "grad_norm": 0.9140625, "learning_rate": 0.00015473230930172806, "loss": 0.0383, "step": 6768 }, { "epoch": 3.1593932322053675, "grad_norm": 0.5625, "learning_rate": 0.00015472002816081673, "loss": 0.0248, "step": 6769 }, { "epoch": 3.1598599766627773, "grad_norm": 0.69921875, "learning_rate": 0.0001547077458417225, "loss": 0.0309, "step": 6770 }, { "epoch": 3.1603267211201866, "grad_norm": 0.59765625, "learning_rate": 0.0001546954623447098, "loss": 0.0288, "step": 6771 }, { "epoch": 3.1607934655775964, "grad_norm": 0.6953125, "learning_rate": 0.0001546831776700431, "loss": 0.0323, "step": 6772 }, { "epoch": 3.161260210035006, "grad_norm": 0.9296875, "learning_rate": 0.00015467089181798687, "loss": 0.0519, "step": 6773 }, { "epoch": 3.1617269544924156, "grad_norm": 0.765625, "learning_rate": 0.00015465860478880577, "loss": 0.0367, "step": 6774 }, { "epoch": 3.162193698949825, "grad_norm": 0.71875, "learning_rate": 0.00015464631658276422, "loss": 0.0387, "step": 6775 }, { "epoch": 3.1626604434072347, "grad_norm": 1.2109375, "learning_rate": 0.00015463402720012682, "loss": 0.031, "step": 6776 }, { "epoch": 3.163127187864644, "grad_norm": 0.7890625, "learning_rate": 0.0001546217366411582, "loss": 0.0364, "step": 6777 }, { "epoch": 3.163593932322054, "grad_norm": 0.875, "learning_rate": 0.000154609444906123, "loss": 0.0436, "step": 6778 }, { "epoch": 3.1640606767794632, "grad_norm": 0.68359375, "learning_rate": 0.00015459715199528588, "loss": 0.0266, "step": 6779 }, { "epoch": 3.1645274212368726, "grad_norm": 0.7734375, "learning_rate": 0.00015458485790891148, "loss": 0.0434, "step": 6780 }, { "epoch": 3.1649941656942824, "grad_norm": 0.7578125, "learning_rate": 0.00015457256264726454, "loss": 0.0232, "step": 6781 }, { "epoch": 3.165460910151692, "grad_norm": 0.87109375, "learning_rate": 0.00015456026621060977, "loss": 0.0419, "step": 6782 }, { "epoch": 3.1659276546091015, "grad_norm": 0.703125, "learning_rate": 0.00015454796859921195, "loss": 0.0324, "step": 6783 }, { "epoch": 3.166394399066511, "grad_norm": 0.6640625, "learning_rate": 0.00015453566981333582, "loss": 0.032, "step": 6784 }, { "epoch": 3.1668611435239207, "grad_norm": 0.74609375, "learning_rate": 0.00015452336985324623, "loss": 0.0438, "step": 6785 }, { "epoch": 3.16732788798133, "grad_norm": 0.671875, "learning_rate": 0.00015451106871920795, "loss": 0.0325, "step": 6786 }, { "epoch": 3.16779463243874, "grad_norm": 0.65625, "learning_rate": 0.0001544987664114859, "loss": 0.0282, "step": 6787 }, { "epoch": 3.168261376896149, "grad_norm": 0.55859375, "learning_rate": 0.00015448646293034498, "loss": 0.0178, "step": 6788 }, { "epoch": 3.168728121353559, "grad_norm": 0.76171875, "learning_rate": 0.00015447415827605002, "loss": 0.0393, "step": 6789 }, { "epoch": 3.1691948658109683, "grad_norm": 0.76171875, "learning_rate": 0.000154461852448866, "loss": 0.03, "step": 6790 }, { "epoch": 3.169661610268378, "grad_norm": 0.69921875, "learning_rate": 0.00015444954544905788, "loss": 0.0284, "step": 6791 }, { "epoch": 3.1701283547257875, "grad_norm": 0.78515625, "learning_rate": 0.0001544372372768906, "loss": 0.0293, "step": 6792 }, { "epoch": 3.1705950991831973, "grad_norm": 0.5625, "learning_rate": 0.00015442492793262927, "loss": 0.0236, "step": 6793 }, { "epoch": 3.1710618436406066, "grad_norm": 1.046875, "learning_rate": 0.00015441261741653878, "loss": 0.0371, "step": 6794 }, { "epoch": 3.1715285880980164, "grad_norm": 0.7109375, "learning_rate": 0.0001544003057288843, "loss": 0.036, "step": 6795 }, { "epoch": 3.1719953325554258, "grad_norm": 0.6953125, "learning_rate": 0.00015438799286993088, "loss": 0.0239, "step": 6796 }, { "epoch": 3.1724620770128356, "grad_norm": 0.7734375, "learning_rate": 0.00015437567883994362, "loss": 0.0343, "step": 6797 }, { "epoch": 3.172928821470245, "grad_norm": 0.796875, "learning_rate": 0.0001543633636391877, "loss": 0.0254, "step": 6798 }, { "epoch": 3.1733955659276547, "grad_norm": 0.71484375, "learning_rate": 0.0001543510472679282, "loss": 0.0278, "step": 6799 }, { "epoch": 3.173862310385064, "grad_norm": 0.703125, "learning_rate": 0.00015433872972643036, "loss": 0.0295, "step": 6800 }, { "epoch": 3.174329054842474, "grad_norm": 0.80859375, "learning_rate": 0.00015432641101495936, "loss": 0.0273, "step": 6801 }, { "epoch": 3.174795799299883, "grad_norm": 0.6953125, "learning_rate": 0.0001543140911337805, "loss": 0.0285, "step": 6802 }, { "epoch": 3.175262543757293, "grad_norm": 0.62890625, "learning_rate": 0.00015430177008315897, "loss": 0.0242, "step": 6803 }, { "epoch": 3.1757292882147024, "grad_norm": 0.75, "learning_rate": 0.00015428944786336009, "loss": 0.0359, "step": 6804 }, { "epoch": 3.176196032672112, "grad_norm": 0.78515625, "learning_rate": 0.00015427712447464915, "loss": 0.0373, "step": 6805 }, { "epoch": 3.1766627771295215, "grad_norm": 1.3828125, "learning_rate": 0.00015426479991729152, "loss": 0.0595, "step": 6806 }, { "epoch": 3.1771295215869313, "grad_norm": 0.8046875, "learning_rate": 0.00015425247419155252, "loss": 0.0335, "step": 6807 }, { "epoch": 3.1775962660443406, "grad_norm": 0.8984375, "learning_rate": 0.00015424014729769754, "loss": 0.0501, "step": 6808 }, { "epoch": 3.1780630105017504, "grad_norm": 1.0, "learning_rate": 0.00015422781923599206, "loss": 0.0329, "step": 6809 }, { "epoch": 3.17852975495916, "grad_norm": 0.69140625, "learning_rate": 0.00015421549000670145, "loss": 0.0261, "step": 6810 }, { "epoch": 3.1789964994165696, "grad_norm": 0.609375, "learning_rate": 0.00015420315961009115, "loss": 0.0137, "step": 6811 }, { "epoch": 3.179463243873979, "grad_norm": 0.8515625, "learning_rate": 0.00015419082804642671, "loss": 0.0358, "step": 6812 }, { "epoch": 3.1799299883313887, "grad_norm": 0.671875, "learning_rate": 0.00015417849531597358, "loss": 0.0205, "step": 6813 }, { "epoch": 3.180396732788798, "grad_norm": 1.1015625, "learning_rate": 0.0001541661614189974, "loss": 0.0326, "step": 6814 }, { "epoch": 3.180863477246208, "grad_norm": 0.74609375, "learning_rate": 0.00015415382635576363, "loss": 0.0282, "step": 6815 }, { "epoch": 3.1813302217036172, "grad_norm": 0.91796875, "learning_rate": 0.00015414149012653788, "loss": 0.0382, "step": 6816 }, { "epoch": 3.181796966161027, "grad_norm": 0.609375, "learning_rate": 0.0001541291527315858, "loss": 0.0233, "step": 6817 }, { "epoch": 3.1822637106184364, "grad_norm": 0.57421875, "learning_rate": 0.00015411681417117298, "loss": 0.0328, "step": 6818 }, { "epoch": 3.182730455075846, "grad_norm": 0.5703125, "learning_rate": 0.00015410447444556515, "loss": 0.0306, "step": 6819 }, { "epoch": 3.1831971995332555, "grad_norm": 0.83203125, "learning_rate": 0.00015409213355502792, "loss": 0.0413, "step": 6820 }, { "epoch": 3.1836639439906653, "grad_norm": 0.625, "learning_rate": 0.00015407979149982705, "loss": 0.0275, "step": 6821 }, { "epoch": 3.1841306884480747, "grad_norm": 0.609375, "learning_rate": 0.0001540674482802283, "loss": 0.034, "step": 6822 }, { "epoch": 3.184597432905484, "grad_norm": 0.609375, "learning_rate": 0.00015405510389649734, "loss": 0.0259, "step": 6823 }, { "epoch": 3.185064177362894, "grad_norm": 0.58984375, "learning_rate": 0.00015404275834890002, "loss": 0.0306, "step": 6824 }, { "epoch": 3.1855309218203036, "grad_norm": 0.8828125, "learning_rate": 0.00015403041163770218, "loss": 0.0345, "step": 6825 }, { "epoch": 3.185997666277713, "grad_norm": 0.8359375, "learning_rate": 0.00015401806376316962, "loss": 0.0353, "step": 6826 }, { "epoch": 3.1864644107351223, "grad_norm": 0.8359375, "learning_rate": 0.00015400571472556823, "loss": 0.0363, "step": 6827 }, { "epoch": 3.186931155192532, "grad_norm": 0.77734375, "learning_rate": 0.00015399336452516385, "loss": 0.0273, "step": 6828 }, { "epoch": 3.1873978996499415, "grad_norm": 0.94140625, "learning_rate": 0.00015398101316222245, "loss": 0.0369, "step": 6829 }, { "epoch": 3.1878646441073513, "grad_norm": 0.52734375, "learning_rate": 0.00015396866063700995, "loss": 0.0256, "step": 6830 }, { "epoch": 3.1883313885647606, "grad_norm": 0.6484375, "learning_rate": 0.00015395630694979229, "loss": 0.0363, "step": 6831 }, { "epoch": 3.1887981330221704, "grad_norm": 0.7734375, "learning_rate": 0.00015394395210083545, "loss": 0.0331, "step": 6832 }, { "epoch": 3.1892648774795798, "grad_norm": 0.4609375, "learning_rate": 0.00015393159609040552, "loss": 0.0205, "step": 6833 }, { "epoch": 3.1897316219369896, "grad_norm": 0.84765625, "learning_rate": 0.00015391923891876848, "loss": 0.029, "step": 6834 }, { "epoch": 3.190198366394399, "grad_norm": 0.8203125, "learning_rate": 0.00015390688058619038, "loss": 0.037, "step": 6835 }, { "epoch": 3.1906651108518087, "grad_norm": 0.671875, "learning_rate": 0.00015389452109293737, "loss": 0.0297, "step": 6836 }, { "epoch": 3.191131855309218, "grad_norm": 0.66796875, "learning_rate": 0.00015388216043927546, "loss": 0.038, "step": 6837 }, { "epoch": 3.191598599766628, "grad_norm": 0.71484375, "learning_rate": 0.00015386979862547088, "loss": 0.0257, "step": 6838 }, { "epoch": 3.192065344224037, "grad_norm": 0.76953125, "learning_rate": 0.0001538574356517898, "loss": 0.0301, "step": 6839 }, { "epoch": 3.192532088681447, "grad_norm": 0.51171875, "learning_rate": 0.00015384507151849832, "loss": 0.0238, "step": 6840 }, { "epoch": 3.1929988331388564, "grad_norm": 0.75390625, "learning_rate": 0.00015383270622586274, "loss": 0.0422, "step": 6841 }, { "epoch": 3.193465577596266, "grad_norm": 0.484375, "learning_rate": 0.00015382033977414923, "loss": 0.0194, "step": 6842 }, { "epoch": 3.1939323220536755, "grad_norm": 0.63671875, "learning_rate": 0.00015380797216362412, "loss": 0.0233, "step": 6843 }, { "epoch": 3.1943990665110853, "grad_norm": 0.7578125, "learning_rate": 0.0001537956033945537, "loss": 0.029, "step": 6844 }, { "epoch": 3.1948658109684946, "grad_norm": 0.9296875, "learning_rate": 0.0001537832334672042, "loss": 0.0354, "step": 6845 }, { "epoch": 3.1953325554259044, "grad_norm": 0.6875, "learning_rate": 0.00015377086238184203, "loss": 0.0281, "step": 6846 }, { "epoch": 3.195799299883314, "grad_norm": 0.828125, "learning_rate": 0.00015375849013873354, "loss": 0.0361, "step": 6847 }, { "epoch": 3.1962660443407236, "grad_norm": 0.74609375, "learning_rate": 0.0001537461167381451, "loss": 0.0397, "step": 6848 }, { "epoch": 3.196732788798133, "grad_norm": 0.69921875, "learning_rate": 0.00015373374218034314, "loss": 0.0293, "step": 6849 }, { "epoch": 3.1971995332555427, "grad_norm": 0.86328125, "learning_rate": 0.0001537213664655941, "loss": 0.041, "step": 6850 }, { "epoch": 3.197666277712952, "grad_norm": 0.7421875, "learning_rate": 0.0001537089895941644, "loss": 0.0336, "step": 6851 }, { "epoch": 3.198133022170362, "grad_norm": 0.9765625, "learning_rate": 0.00015369661156632063, "loss": 0.0281, "step": 6852 }, { "epoch": 3.1985997666277712, "grad_norm": 0.92578125, "learning_rate": 0.0001536842323823292, "loss": 0.0322, "step": 6853 }, { "epoch": 3.199066511085181, "grad_norm": 0.66796875, "learning_rate": 0.00015367185204245666, "loss": 0.0276, "step": 6854 }, { "epoch": 3.1995332555425904, "grad_norm": 0.58203125, "learning_rate": 0.0001536594705469696, "loss": 0.0249, "step": 6855 }, { "epoch": 3.2, "grad_norm": 0.671875, "learning_rate": 0.00015364708789613465, "loss": 0.0242, "step": 6856 }, { "epoch": 3.2004667444574095, "grad_norm": 0.8046875, "learning_rate": 0.00015363470409021833, "loss": 0.0406, "step": 6857 }, { "epoch": 3.2009334889148193, "grad_norm": 0.7890625, "learning_rate": 0.00015362231912948734, "loss": 0.0315, "step": 6858 }, { "epoch": 3.2014002333722287, "grad_norm": 0.97265625, "learning_rate": 0.0001536099330142083, "loss": 0.0415, "step": 6859 }, { "epoch": 3.2018669778296385, "grad_norm": 0.74609375, "learning_rate": 0.00015359754574464796, "loss": 0.0292, "step": 6860 }, { "epoch": 3.202333722287048, "grad_norm": 0.69921875, "learning_rate": 0.00015358515732107297, "loss": 0.0357, "step": 6861 }, { "epoch": 3.202800466744457, "grad_norm": 0.66015625, "learning_rate": 0.0001535727677437501, "loss": 0.0252, "step": 6862 }, { "epoch": 3.203267211201867, "grad_norm": 0.703125, "learning_rate": 0.00015356037701294608, "loss": 0.0335, "step": 6863 }, { "epoch": 3.2037339556592768, "grad_norm": 0.7109375, "learning_rate": 0.0001535479851289277, "loss": 0.0319, "step": 6864 }, { "epoch": 3.204200700116686, "grad_norm": 0.8125, "learning_rate": 0.00015353559209196183, "loss": 0.028, "step": 6865 }, { "epoch": 3.2046674445740955, "grad_norm": 0.7890625, "learning_rate": 0.00015352319790231525, "loss": 0.0204, "step": 6866 }, { "epoch": 3.2051341890315053, "grad_norm": 0.625, "learning_rate": 0.00015351080256025484, "loss": 0.0162, "step": 6867 }, { "epoch": 3.2056009334889146, "grad_norm": 0.83984375, "learning_rate": 0.00015349840606604747, "loss": 0.0387, "step": 6868 }, { "epoch": 3.2060676779463244, "grad_norm": 0.82421875, "learning_rate": 0.00015348600841996003, "loss": 0.0337, "step": 6869 }, { "epoch": 3.2065344224037338, "grad_norm": 0.64453125, "learning_rate": 0.00015347360962225955, "loss": 0.0235, "step": 6870 }, { "epoch": 3.2070011668611436, "grad_norm": 0.76953125, "learning_rate": 0.00015346120967321287, "loss": 0.0335, "step": 6871 }, { "epoch": 3.207467911318553, "grad_norm": 0.828125, "learning_rate": 0.00015344880857308705, "loss": 0.0297, "step": 6872 }, { "epoch": 3.2079346557759627, "grad_norm": 0.86328125, "learning_rate": 0.0001534364063221491, "loss": 0.0371, "step": 6873 }, { "epoch": 3.208401400233372, "grad_norm": 0.54296875, "learning_rate": 0.000153424002920666, "loss": 0.0158, "step": 6874 }, { "epoch": 3.208868144690782, "grad_norm": 0.8359375, "learning_rate": 0.00015341159836890485, "loss": 0.0283, "step": 6875 }, { "epoch": 3.209334889148191, "grad_norm": 0.7109375, "learning_rate": 0.0001533991926671327, "loss": 0.0248, "step": 6876 }, { "epoch": 3.209801633605601, "grad_norm": 0.64453125, "learning_rate": 0.00015338678581561674, "loss": 0.0312, "step": 6877 }, { "epoch": 3.2102683780630104, "grad_norm": 0.63671875, "learning_rate": 0.00015337437781462405, "loss": 0.0212, "step": 6878 }, { "epoch": 3.21073512252042, "grad_norm": 0.796875, "learning_rate": 0.00015336196866442176, "loss": 0.035, "step": 6879 }, { "epoch": 3.2112018669778295, "grad_norm": 0.640625, "learning_rate": 0.00015334955836527708, "loss": 0.0265, "step": 6880 }, { "epoch": 3.2116686114352393, "grad_norm": 0.6171875, "learning_rate": 0.00015333714691745723, "loss": 0.0396, "step": 6881 }, { "epoch": 3.2121353558926486, "grad_norm": 0.66796875, "learning_rate": 0.0001533247343212294, "loss": 0.0231, "step": 6882 }, { "epoch": 3.2126021003500584, "grad_norm": 0.6953125, "learning_rate": 0.00015331232057686086, "loss": 0.0342, "step": 6883 }, { "epoch": 3.213068844807468, "grad_norm": 0.84765625, "learning_rate": 0.00015329990568461896, "loss": 0.0318, "step": 6884 }, { "epoch": 3.2135355892648776, "grad_norm": 0.57421875, "learning_rate": 0.00015328748964477093, "loss": 0.0194, "step": 6885 }, { "epoch": 3.214002333722287, "grad_norm": 0.78515625, "learning_rate": 0.00015327507245758412, "loss": 0.0247, "step": 6886 }, { "epoch": 3.2144690781796967, "grad_norm": 0.9296875, "learning_rate": 0.00015326265412332588, "loss": 0.042, "step": 6887 }, { "epoch": 3.214935822637106, "grad_norm": 0.796875, "learning_rate": 0.00015325023464226364, "loss": 0.0366, "step": 6888 }, { "epoch": 3.215402567094516, "grad_norm": 0.80078125, "learning_rate": 0.00015323781401466473, "loss": 0.0373, "step": 6889 }, { "epoch": 3.2158693115519252, "grad_norm": 0.625, "learning_rate": 0.00015322539224079662, "loss": 0.0317, "step": 6890 }, { "epoch": 3.216336056009335, "grad_norm": 0.82421875, "learning_rate": 0.00015321296932092676, "loss": 0.0341, "step": 6891 }, { "epoch": 3.2168028004667444, "grad_norm": 0.9140625, "learning_rate": 0.00015320054525532265, "loss": 0.0417, "step": 6892 }, { "epoch": 3.217269544924154, "grad_norm": 0.87890625, "learning_rate": 0.00015318812004425176, "loss": 0.0361, "step": 6893 }, { "epoch": 3.2177362893815635, "grad_norm": 0.6484375, "learning_rate": 0.00015317569368798162, "loss": 0.0278, "step": 6894 }, { "epoch": 3.2182030338389733, "grad_norm": 0.7734375, "learning_rate": 0.0001531632661867798, "loss": 0.027, "step": 6895 }, { "epoch": 3.2186697782963827, "grad_norm": 0.79296875, "learning_rate": 0.00015315083754091388, "loss": 0.0283, "step": 6896 }, { "epoch": 3.2191365227537925, "grad_norm": 0.7109375, "learning_rate": 0.00015313840775065148, "loss": 0.0295, "step": 6897 }, { "epoch": 3.219603267211202, "grad_norm": 0.95703125, "learning_rate": 0.00015312597681626014, "loss": 0.0293, "step": 6898 }, { "epoch": 3.2200700116686116, "grad_norm": 0.83203125, "learning_rate": 0.00015311354473800763, "loss": 0.0386, "step": 6899 }, { "epoch": 3.220536756126021, "grad_norm": 0.671875, "learning_rate": 0.00015310111151616156, "loss": 0.0213, "step": 6900 }, { "epoch": 3.2210035005834308, "grad_norm": 0.75, "learning_rate": 0.00015308867715098961, "loss": 0.0274, "step": 6901 }, { "epoch": 3.22147024504084, "grad_norm": 1.015625, "learning_rate": 0.00015307624164275958, "loss": 0.0396, "step": 6902 }, { "epoch": 3.22193698949825, "grad_norm": 0.83984375, "learning_rate": 0.00015306380499173917, "loss": 0.0266, "step": 6903 }, { "epoch": 3.2224037339556593, "grad_norm": 0.6796875, "learning_rate": 0.00015305136719819616, "loss": 0.0273, "step": 6904 }, { "epoch": 3.2228704784130686, "grad_norm": 0.7890625, "learning_rate": 0.00015303892826239835, "loss": 0.0277, "step": 6905 }, { "epoch": 3.2233372228704784, "grad_norm": 0.61328125, "learning_rate": 0.0001530264881846136, "loss": 0.0257, "step": 6906 }, { "epoch": 3.223803967327888, "grad_norm": 0.80078125, "learning_rate": 0.00015301404696510968, "loss": 0.0344, "step": 6907 }, { "epoch": 3.2242707117852976, "grad_norm": 0.77734375, "learning_rate": 0.00015300160460415452, "loss": 0.0362, "step": 6908 }, { "epoch": 3.224737456242707, "grad_norm": 0.703125, "learning_rate": 0.000152989161102016, "loss": 0.0317, "step": 6909 }, { "epoch": 3.2252042007001167, "grad_norm": 0.95703125, "learning_rate": 0.00015297671645896204, "loss": 0.0481, "step": 6910 }, { "epoch": 3.225670945157526, "grad_norm": 0.66796875, "learning_rate": 0.00015296427067526061, "loss": 0.0233, "step": 6911 }, { "epoch": 3.226137689614936, "grad_norm": 1.0625, "learning_rate": 0.0001529518237511797, "loss": 0.046, "step": 6912 }, { "epoch": 3.226604434072345, "grad_norm": 0.7734375, "learning_rate": 0.00015293937568698724, "loss": 0.0379, "step": 6913 }, { "epoch": 3.227071178529755, "grad_norm": 0.890625, "learning_rate": 0.0001529269264829513, "loss": 0.0376, "step": 6914 }, { "epoch": 3.2275379229871644, "grad_norm": 0.7109375, "learning_rate": 0.00015291447613933988, "loss": 0.0304, "step": 6915 }, { "epoch": 3.228004667444574, "grad_norm": 0.73046875, "learning_rate": 0.0001529020246564211, "loss": 0.0286, "step": 6916 }, { "epoch": 3.2284714119019835, "grad_norm": 0.62109375, "learning_rate": 0.000152889572034463, "loss": 0.0346, "step": 6917 }, { "epoch": 3.2289381563593933, "grad_norm": 0.80078125, "learning_rate": 0.00015287711827373376, "loss": 0.0358, "step": 6918 }, { "epoch": 3.2294049008168026, "grad_norm": 0.703125, "learning_rate": 0.00015286466337450152, "loss": 0.0332, "step": 6919 }, { "epoch": 3.2298716452742124, "grad_norm": 0.75390625, "learning_rate": 0.0001528522073370344, "loss": 0.0418, "step": 6920 }, { "epoch": 3.230338389731622, "grad_norm": 0.67578125, "learning_rate": 0.0001528397501616006, "loss": 0.0257, "step": 6921 }, { "epoch": 3.2308051341890316, "grad_norm": 0.671875, "learning_rate": 0.00015282729184846837, "loss": 0.0307, "step": 6922 }, { "epoch": 3.231271878646441, "grad_norm": 0.86328125, "learning_rate": 0.00015281483239790595, "loss": 0.0431, "step": 6923 }, { "epoch": 3.2317386231038507, "grad_norm": 0.7890625, "learning_rate": 0.00015280237181018155, "loss": 0.0453, "step": 6924 }, { "epoch": 3.23220536756126, "grad_norm": 0.84375, "learning_rate": 0.00015278991008556351, "loss": 0.0278, "step": 6925 }, { "epoch": 3.23267211201867, "grad_norm": 0.7265625, "learning_rate": 0.00015277744722432018, "loss": 0.0315, "step": 6926 }, { "epoch": 3.2331388564760792, "grad_norm": 0.7890625, "learning_rate": 0.00015276498322671983, "loss": 0.0278, "step": 6927 }, { "epoch": 3.233605600933489, "grad_norm": 0.71484375, "learning_rate": 0.00015275251809303083, "loss": 0.015, "step": 6928 }, { "epoch": 3.2340723453908984, "grad_norm": 0.54296875, "learning_rate": 0.00015274005182352158, "loss": 0.0215, "step": 6929 }, { "epoch": 3.234539089848308, "grad_norm": 0.69140625, "learning_rate": 0.00015272758441846053, "loss": 0.0322, "step": 6930 }, { "epoch": 3.2350058343057175, "grad_norm": 1.2265625, "learning_rate": 0.00015271511587811604, "loss": 0.0346, "step": 6931 }, { "epoch": 3.2354725787631273, "grad_norm": 0.87109375, "learning_rate": 0.00015270264620275664, "loss": 0.0522, "step": 6932 }, { "epoch": 3.2359393232205367, "grad_norm": 0.8046875, "learning_rate": 0.0001526901753926508, "loss": 0.0267, "step": 6933 }, { "epoch": 3.2364060676779465, "grad_norm": 0.69921875, "learning_rate": 0.00015267770344806704, "loss": 0.0327, "step": 6934 }, { "epoch": 3.236872812135356, "grad_norm": 0.796875, "learning_rate": 0.00015266523036927386, "loss": 0.0286, "step": 6935 }, { "epoch": 3.2373395565927656, "grad_norm": 0.60546875, "learning_rate": 0.0001526527561565398, "loss": 0.0189, "step": 6936 }, { "epoch": 3.237806301050175, "grad_norm": 0.78515625, "learning_rate": 0.00015264028081013353, "loss": 0.0333, "step": 6937 }, { "epoch": 3.2382730455075848, "grad_norm": 0.83203125, "learning_rate": 0.00015262780433032358, "loss": 0.027, "step": 6938 }, { "epoch": 3.238739789964994, "grad_norm": 0.58203125, "learning_rate": 0.00015261532671737862, "loss": 0.0246, "step": 6939 }, { "epoch": 3.239206534422404, "grad_norm": 1.046875, "learning_rate": 0.0001526028479715673, "loss": 0.037, "step": 6940 }, { "epoch": 3.2396732788798133, "grad_norm": 0.6484375, "learning_rate": 0.0001525903680931583, "loss": 0.0217, "step": 6941 }, { "epoch": 3.240140023337223, "grad_norm": 0.703125, "learning_rate": 0.00015257788708242034, "loss": 0.0461, "step": 6942 }, { "epoch": 3.2406067677946324, "grad_norm": 0.8046875, "learning_rate": 0.00015256540493962213, "loss": 0.0309, "step": 6943 }, { "epoch": 3.241073512252042, "grad_norm": 0.828125, "learning_rate": 0.0001525529216650324, "loss": 0.0403, "step": 6944 }, { "epoch": 3.2415402567094516, "grad_norm": 0.96484375, "learning_rate": 0.00015254043725892, "loss": 0.0324, "step": 6945 }, { "epoch": 3.2420070011668614, "grad_norm": 0.734375, "learning_rate": 0.00015252795172155366, "loss": 0.0301, "step": 6946 }, { "epoch": 3.2424737456242707, "grad_norm": 0.6875, "learning_rate": 0.00015251546505320223, "loss": 0.0283, "step": 6947 }, { "epoch": 3.24294049008168, "grad_norm": 0.65625, "learning_rate": 0.00015250297725413457, "loss": 0.0245, "step": 6948 }, { "epoch": 3.24340723453909, "grad_norm": 0.78515625, "learning_rate": 0.00015249048832461962, "loss": 0.0313, "step": 6949 }, { "epoch": 3.2438739789964997, "grad_norm": 0.65625, "learning_rate": 0.00015247799826492614, "loss": 0.0339, "step": 6950 }, { "epoch": 3.244340723453909, "grad_norm": 0.65625, "learning_rate": 0.00015246550707532318, "loss": 0.0282, "step": 6951 }, { "epoch": 3.2448074679113184, "grad_norm": 0.8359375, "learning_rate": 0.00015245301475607962, "loss": 0.04, "step": 6952 }, { "epoch": 3.245274212368728, "grad_norm": 0.875, "learning_rate": 0.00015244052130746448, "loss": 0.0445, "step": 6953 }, { "epoch": 3.2457409568261375, "grad_norm": 0.85546875, "learning_rate": 0.0001524280267297467, "loss": 0.0306, "step": 6954 }, { "epoch": 3.2462077012835473, "grad_norm": 0.7421875, "learning_rate": 0.00015241553102319534, "loss": 0.0358, "step": 6955 }, { "epoch": 3.2466744457409566, "grad_norm": 0.7890625, "learning_rate": 0.00015240303418807946, "loss": 0.0289, "step": 6956 }, { "epoch": 3.2471411901983664, "grad_norm": 0.828125, "learning_rate": 0.00015239053622466807, "loss": 0.0464, "step": 6957 }, { "epoch": 3.247607934655776, "grad_norm": 0.859375, "learning_rate": 0.00015237803713323035, "loss": 0.0442, "step": 6958 }, { "epoch": 3.2480746791131856, "grad_norm": 0.75, "learning_rate": 0.00015236553691403538, "loss": 0.0414, "step": 6959 }, { "epoch": 3.248541423570595, "grad_norm": 1.0625, "learning_rate": 0.00015235303556735227, "loss": 0.0383, "step": 6960 }, { "epoch": 3.2490081680280047, "grad_norm": 0.8125, "learning_rate": 0.00015234053309345024, "loss": 0.0406, "step": 6961 }, { "epoch": 3.249474912485414, "grad_norm": 1.0703125, "learning_rate": 0.00015232802949259844, "loss": 0.0595, "step": 6962 }, { "epoch": 3.249941656942824, "grad_norm": 0.76953125, "learning_rate": 0.00015231552476506613, "loss": 0.0276, "step": 6963 }, { "epoch": 3.2504084014002332, "grad_norm": 0.87109375, "learning_rate": 0.0001523030189111225, "loss": 0.0523, "step": 6964 }, { "epoch": 3.250875145857643, "grad_norm": 0.83203125, "learning_rate": 0.00015229051193103686, "loss": 0.0475, "step": 6965 }, { "epoch": 3.2513418903150524, "grad_norm": 0.578125, "learning_rate": 0.00015227800382507849, "loss": 0.0348, "step": 6966 }, { "epoch": 3.251808634772462, "grad_norm": 0.875, "learning_rate": 0.00015226549459351665, "loss": 0.0337, "step": 6967 }, { "epoch": 3.2522753792298715, "grad_norm": 1.015625, "learning_rate": 0.00015225298423662074, "loss": 0.0296, "step": 6968 }, { "epoch": 3.2522753792298715, "eval_loss": 1.7366282939910889, "eval_runtime": 56.0103, "eval_samples_per_second": 32.208, "eval_steps_per_second": 4.035, "step": 6968 }, { "epoch": 3.2527421236872813, "grad_norm": 0.6015625, "learning_rate": 0.00015224047275466015, "loss": 0.0262, "step": 6969 }, { "epoch": 3.2532088681446907, "grad_norm": 0.56640625, "learning_rate": 0.00015222796014790417, "loss": 0.0315, "step": 6970 }, { "epoch": 3.2536756126021005, "grad_norm": 0.71875, "learning_rate": 0.00015221544641662226, "loss": 0.0311, "step": 6971 }, { "epoch": 3.25414235705951, "grad_norm": 0.82421875, "learning_rate": 0.00015220293156108388, "loss": 0.0401, "step": 6972 }, { "epoch": 3.2546091015169196, "grad_norm": 0.62890625, "learning_rate": 0.00015219041558155847, "loss": 0.0163, "step": 6973 }, { "epoch": 3.255075845974329, "grad_norm": 0.74609375, "learning_rate": 0.00015217789847831546, "loss": 0.0422, "step": 6974 }, { "epoch": 3.2555425904317388, "grad_norm": 0.5234375, "learning_rate": 0.00015216538025162444, "loss": 0.021, "step": 6975 }, { "epoch": 3.256009334889148, "grad_norm": 0.63671875, "learning_rate": 0.0001521528609017549, "loss": 0.0266, "step": 6976 }, { "epoch": 3.256476079346558, "grad_norm": 0.90625, "learning_rate": 0.0001521403404289764, "loss": 0.0387, "step": 6977 }, { "epoch": 3.2569428238039673, "grad_norm": 0.609375, "learning_rate": 0.00015212781883355852, "loss": 0.0241, "step": 6978 }, { "epoch": 3.257409568261377, "grad_norm": 0.7421875, "learning_rate": 0.00015211529611577088, "loss": 0.0251, "step": 6979 }, { "epoch": 3.2578763127187864, "grad_norm": 0.98828125, "learning_rate": 0.00015210277227588304, "loss": 0.0424, "step": 6980 }, { "epoch": 3.258343057176196, "grad_norm": 0.78125, "learning_rate": 0.00015209024731416474, "loss": 0.0256, "step": 6981 }, { "epoch": 3.2588098016336056, "grad_norm": 0.8671875, "learning_rate": 0.00015207772123088562, "loss": 0.0347, "step": 6982 }, { "epoch": 3.2592765460910154, "grad_norm": 0.78515625, "learning_rate": 0.00015206519402631536, "loss": 0.0416, "step": 6983 }, { "epoch": 3.2597432905484247, "grad_norm": 0.74609375, "learning_rate": 0.00015205266570072375, "loss": 0.0308, "step": 6984 }, { "epoch": 3.2602100350058345, "grad_norm": 0.90234375, "learning_rate": 0.00015204013625438046, "loss": 0.0311, "step": 6985 }, { "epoch": 3.260676779463244, "grad_norm": 0.81640625, "learning_rate": 0.0001520276056875553, "loss": 0.0338, "step": 6986 }, { "epoch": 3.261143523920653, "grad_norm": 0.83984375, "learning_rate": 0.00015201507400051803, "loss": 0.0314, "step": 6987 }, { "epoch": 3.261610268378063, "grad_norm": 0.69921875, "learning_rate": 0.0001520025411935386, "loss": 0.0347, "step": 6988 }, { "epoch": 3.262077012835473, "grad_norm": 0.73046875, "learning_rate": 0.00015199000726688668, "loss": 0.0276, "step": 6989 }, { "epoch": 3.262543757292882, "grad_norm": 0.5859375, "learning_rate": 0.00015197747222083223, "loss": 0.0302, "step": 6990 }, { "epoch": 3.2630105017502915, "grad_norm": 0.4296875, "learning_rate": 0.00015196493605564514, "loss": 0.0159, "step": 6991 }, { "epoch": 3.2634772462077013, "grad_norm": 0.64453125, "learning_rate": 0.00015195239877159535, "loss": 0.0249, "step": 6992 }, { "epoch": 3.263943990665111, "grad_norm": 0.8203125, "learning_rate": 0.00015193986036895276, "loss": 0.0362, "step": 6993 }, { "epoch": 3.2644107351225204, "grad_norm": 0.5859375, "learning_rate": 0.0001519273208479873, "loss": 0.0237, "step": 6994 }, { "epoch": 3.26487747957993, "grad_norm": 0.71484375, "learning_rate": 0.00015191478020896906, "loss": 0.0255, "step": 6995 }, { "epoch": 3.2653442240373396, "grad_norm": 1.0625, "learning_rate": 0.000151902238452168, "loss": 0.036, "step": 6996 }, { "epoch": 3.265810968494749, "grad_norm": 0.46875, "learning_rate": 0.0001518896955778541, "loss": 0.023, "step": 6997 }, { "epoch": 3.2662777129521587, "grad_norm": 0.75, "learning_rate": 0.00015187715158629757, "loss": 0.0209, "step": 6998 }, { "epoch": 3.266744457409568, "grad_norm": 0.83203125, "learning_rate": 0.00015186460647776835, "loss": 0.0379, "step": 6999 }, { "epoch": 3.267211201866978, "grad_norm": 0.98046875, "learning_rate": 0.0001518520602525366, "loss": 0.0356, "step": 7000 }, { "epoch": 3.2676779463243872, "grad_norm": 0.8046875, "learning_rate": 0.00015183951291087251, "loss": 0.0324, "step": 7001 }, { "epoch": 3.268144690781797, "grad_norm": 0.875, "learning_rate": 0.00015182696445304616, "loss": 0.0398, "step": 7002 }, { "epoch": 3.2686114352392064, "grad_norm": 1.046875, "learning_rate": 0.00015181441487932776, "loss": 0.045, "step": 7003 }, { "epoch": 3.269078179696616, "grad_norm": 0.8671875, "learning_rate": 0.00015180186418998755, "loss": 0.0343, "step": 7004 }, { "epoch": 3.2695449241540255, "grad_norm": 0.6875, "learning_rate": 0.00015178931238529568, "loss": 0.0357, "step": 7005 }, { "epoch": 3.2700116686114353, "grad_norm": 0.58203125, "learning_rate": 0.00015177675946552249, "loss": 0.0275, "step": 7006 }, { "epoch": 3.2704784130688447, "grad_norm": 0.76953125, "learning_rate": 0.0001517642054309382, "loss": 0.0335, "step": 7007 }, { "epoch": 3.2709451575262545, "grad_norm": 0.70703125, "learning_rate": 0.00015175165028181314, "loss": 0.0318, "step": 7008 }, { "epoch": 3.271411901983664, "grad_norm": 0.68359375, "learning_rate": 0.00015173909401841766, "loss": 0.0196, "step": 7009 }, { "epoch": 3.2718786464410736, "grad_norm": 0.6953125, "learning_rate": 0.00015172653664102202, "loss": 0.0236, "step": 7010 }, { "epoch": 3.272345390898483, "grad_norm": 1.1171875, "learning_rate": 0.0001517139781498967, "loss": 0.0324, "step": 7011 }, { "epoch": 3.2728121353558928, "grad_norm": 0.57421875, "learning_rate": 0.00015170141854531203, "loss": 0.0258, "step": 7012 }, { "epoch": 3.273278879813302, "grad_norm": 0.8671875, "learning_rate": 0.00015168885782753844, "loss": 0.0381, "step": 7013 }, { "epoch": 3.273745624270712, "grad_norm": 0.796875, "learning_rate": 0.00015167629599684642, "loss": 0.0285, "step": 7014 }, { "epoch": 3.2742123687281213, "grad_norm": 0.8984375, "learning_rate": 0.00015166373305350642, "loss": 0.0387, "step": 7015 }, { "epoch": 3.274679113185531, "grad_norm": 0.94921875, "learning_rate": 0.0001516511689977889, "loss": 0.0371, "step": 7016 }, { "epoch": 3.2751458576429404, "grad_norm": 0.69140625, "learning_rate": 0.0001516386038299644, "loss": 0.0233, "step": 7017 }, { "epoch": 3.27561260210035, "grad_norm": 0.53125, "learning_rate": 0.00015162603755030353, "loss": 0.0198, "step": 7018 }, { "epoch": 3.2760793465577596, "grad_norm": 0.61328125, "learning_rate": 0.00015161347015907675, "loss": 0.0155, "step": 7019 }, { "epoch": 3.2765460910151694, "grad_norm": 0.8203125, "learning_rate": 0.00015160090165655468, "loss": 0.0354, "step": 7020 }, { "epoch": 3.2770128354725787, "grad_norm": 0.85546875, "learning_rate": 0.00015158833204300794, "loss": 0.0443, "step": 7021 }, { "epoch": 3.2774795799299885, "grad_norm": 0.65625, "learning_rate": 0.00015157576131870717, "loss": 0.0237, "step": 7022 }, { "epoch": 3.277946324387398, "grad_norm": 0.75, "learning_rate": 0.00015156318948392303, "loss": 0.035, "step": 7023 }, { "epoch": 3.2784130688448077, "grad_norm": 0.71875, "learning_rate": 0.00015155061653892625, "loss": 0.0261, "step": 7024 }, { "epoch": 3.278879813302217, "grad_norm": 0.6328125, "learning_rate": 0.0001515380424839875, "loss": 0.03, "step": 7025 }, { "epoch": 3.2793465577596264, "grad_norm": 0.78125, "learning_rate": 0.00015152546731937746, "loss": 0.0318, "step": 7026 }, { "epoch": 3.279813302217036, "grad_norm": 0.73828125, "learning_rate": 0.000151512891045367, "loss": 0.0337, "step": 7027 }, { "epoch": 3.280280046674446, "grad_norm": 0.69921875, "learning_rate": 0.00015150031366222678, "loss": 0.0209, "step": 7028 }, { "epoch": 3.2807467911318553, "grad_norm": 0.77734375, "learning_rate": 0.0001514877351702277, "loss": 0.0352, "step": 7029 }, { "epoch": 3.2812135355892647, "grad_norm": 0.7265625, "learning_rate": 0.00015147515556964057, "loss": 0.0307, "step": 7030 }, { "epoch": 3.2816802800466744, "grad_norm": 0.6953125, "learning_rate": 0.0001514625748607362, "loss": 0.0341, "step": 7031 }, { "epoch": 3.2821470245040842, "grad_norm": 0.7421875, "learning_rate": 0.0001514499930437855, "loss": 0.0353, "step": 7032 }, { "epoch": 3.2826137689614936, "grad_norm": 0.609375, "learning_rate": 0.00015143741011905938, "loss": 0.0248, "step": 7033 }, { "epoch": 3.283080513418903, "grad_norm": 0.69921875, "learning_rate": 0.00015142482608682874, "loss": 0.0242, "step": 7034 }, { "epoch": 3.2835472578763127, "grad_norm": 0.96484375, "learning_rate": 0.00015141224094736454, "loss": 0.0451, "step": 7035 }, { "epoch": 3.284014002333722, "grad_norm": 0.69921875, "learning_rate": 0.00015139965470093773, "loss": 0.0267, "step": 7036 }, { "epoch": 3.284480746791132, "grad_norm": 0.953125, "learning_rate": 0.00015138706734781936, "loss": 0.032, "step": 7037 }, { "epoch": 3.2849474912485412, "grad_norm": 0.78515625, "learning_rate": 0.0001513744788882804, "loss": 0.0481, "step": 7038 }, { "epoch": 3.285414235705951, "grad_norm": 0.80859375, "learning_rate": 0.0001513618893225919, "loss": 0.0302, "step": 7039 }, { "epoch": 3.2858809801633604, "grad_norm": 0.703125, "learning_rate": 0.00015134929865102495, "loss": 0.0214, "step": 7040 }, { "epoch": 3.28634772462077, "grad_norm": 0.765625, "learning_rate": 0.0001513367068738506, "loss": 0.0295, "step": 7041 }, { "epoch": 3.2868144690781795, "grad_norm": 0.55859375, "learning_rate": 0.00015132411399133998, "loss": 0.0301, "step": 7042 }, { "epoch": 3.2872812135355893, "grad_norm": 0.8203125, "learning_rate": 0.0001513115200037643, "loss": 0.0339, "step": 7043 }, { "epoch": 3.2877479579929987, "grad_norm": 0.9375, "learning_rate": 0.0001512989249113946, "loss": 0.0479, "step": 7044 }, { "epoch": 3.2882147024504085, "grad_norm": 0.6015625, "learning_rate": 0.0001512863287145022, "loss": 0.0164, "step": 7045 }, { "epoch": 3.288681446907818, "grad_norm": 0.69921875, "learning_rate": 0.00015127373141335818, "loss": 0.0247, "step": 7046 }, { "epoch": 3.2891481913652276, "grad_norm": 0.671875, "learning_rate": 0.00015126113300823385, "loss": 0.0256, "step": 7047 }, { "epoch": 3.289614935822637, "grad_norm": 0.81640625, "learning_rate": 0.00015124853349940049, "loss": 0.0409, "step": 7048 }, { "epoch": 3.2900816802800468, "grad_norm": 0.76171875, "learning_rate": 0.0001512359328871293, "loss": 0.0415, "step": 7049 }, { "epoch": 3.290548424737456, "grad_norm": 0.6640625, "learning_rate": 0.00015122333117169166, "loss": 0.0287, "step": 7050 }, { "epoch": 3.291015169194866, "grad_norm": 0.80078125, "learning_rate": 0.00015121072835335884, "loss": 0.0338, "step": 7051 }, { "epoch": 3.2914819136522753, "grad_norm": 0.69921875, "learning_rate": 0.0001511981244324022, "loss": 0.0326, "step": 7052 }, { "epoch": 3.291948658109685, "grad_norm": 0.5625, "learning_rate": 0.00015118551940909315, "loss": 0.022, "step": 7053 }, { "epoch": 3.2924154025670944, "grad_norm": 0.69921875, "learning_rate": 0.00015117291328370312, "loss": 0.0303, "step": 7054 }, { "epoch": 3.292882147024504, "grad_norm": 0.83203125, "learning_rate": 0.00015116030605650345, "loss": 0.0399, "step": 7055 }, { "epoch": 3.2933488914819136, "grad_norm": 0.828125, "learning_rate": 0.00015114769772776561, "loss": 0.0422, "step": 7056 }, { "epoch": 3.2938156359393234, "grad_norm": 0.71875, "learning_rate": 0.00015113508829776113, "loss": 0.0304, "step": 7057 }, { "epoch": 3.2942823803967327, "grad_norm": 0.61328125, "learning_rate": 0.00015112247776676144, "loss": 0.0242, "step": 7058 }, { "epoch": 3.2947491248541425, "grad_norm": 0.703125, "learning_rate": 0.0001511098661350381, "loss": 0.0304, "step": 7059 }, { "epoch": 3.295215869311552, "grad_norm": 0.83203125, "learning_rate": 0.0001510972534028626, "loss": 0.0368, "step": 7060 }, { "epoch": 3.2956826137689617, "grad_norm": 0.8984375, "learning_rate": 0.00015108463957050656, "loss": 0.0419, "step": 7061 }, { "epoch": 3.296149358226371, "grad_norm": 0.75390625, "learning_rate": 0.00015107202463824154, "loss": 0.0333, "step": 7062 }, { "epoch": 3.296616102683781, "grad_norm": 0.76953125, "learning_rate": 0.00015105940860633919, "loss": 0.0328, "step": 7063 }, { "epoch": 3.29708284714119, "grad_norm": 0.84375, "learning_rate": 0.00015104679147507111, "loss": 0.0358, "step": 7064 }, { "epoch": 3.2975495915986, "grad_norm": 0.94140625, "learning_rate": 0.00015103417324470897, "loss": 0.0395, "step": 7065 }, { "epoch": 3.2980163360560093, "grad_norm": 0.83203125, "learning_rate": 0.00015102155391552447, "loss": 0.0316, "step": 7066 }, { "epoch": 3.298483080513419, "grad_norm": 0.63671875, "learning_rate": 0.0001510089334877893, "loss": 0.0259, "step": 7067 }, { "epoch": 3.2989498249708284, "grad_norm": 0.8046875, "learning_rate": 0.00015099631196177518, "loss": 0.0347, "step": 7068 }, { "epoch": 3.299416569428238, "grad_norm": 0.56640625, "learning_rate": 0.00015098368933775386, "loss": 0.021, "step": 7069 }, { "epoch": 3.2998833138856476, "grad_norm": 0.78515625, "learning_rate": 0.0001509710656159972, "loss": 0.0288, "step": 7070 }, { "epoch": 3.3003500583430574, "grad_norm": 0.66015625, "learning_rate": 0.0001509584407967769, "loss": 0.025, "step": 7071 }, { "epoch": 3.3008168028004667, "grad_norm": 0.7421875, "learning_rate": 0.00015094581488036486, "loss": 0.0276, "step": 7072 }, { "epoch": 3.301283547257876, "grad_norm": 0.578125, "learning_rate": 0.0001509331878670329, "loss": 0.0225, "step": 7073 }, { "epoch": 3.301750291715286, "grad_norm": 0.490234375, "learning_rate": 0.00015092055975705288, "loss": 0.0215, "step": 7074 }, { "epoch": 3.3022170361726957, "grad_norm": 0.69921875, "learning_rate": 0.00015090793055069668, "loss": 0.0332, "step": 7075 }, { "epoch": 3.302683780630105, "grad_norm": 0.76953125, "learning_rate": 0.00015089530024823628, "loss": 0.025, "step": 7076 }, { "epoch": 3.3031505250875144, "grad_norm": 0.734375, "learning_rate": 0.0001508826688499436, "loss": 0.0323, "step": 7077 }, { "epoch": 3.303617269544924, "grad_norm": 0.734375, "learning_rate": 0.0001508700363560906, "loss": 0.0316, "step": 7078 }, { "epoch": 3.3040840140023335, "grad_norm": 0.8984375, "learning_rate": 0.00015085740276694928, "loss": 0.036, "step": 7079 }, { "epoch": 3.3045507584597433, "grad_norm": 0.671875, "learning_rate": 0.00015084476808279167, "loss": 0.0261, "step": 7080 }, { "epoch": 3.3050175029171527, "grad_norm": 0.8359375, "learning_rate": 0.00015083213230388976, "loss": 0.0391, "step": 7081 }, { "epoch": 3.3054842473745625, "grad_norm": 0.86328125, "learning_rate": 0.00015081949543051566, "loss": 0.0501, "step": 7082 }, { "epoch": 3.305950991831972, "grad_norm": 0.73046875, "learning_rate": 0.00015080685746294144, "loss": 0.0271, "step": 7083 }, { "epoch": 3.3064177362893816, "grad_norm": 0.66015625, "learning_rate": 0.0001507942184014392, "loss": 0.0306, "step": 7084 }, { "epoch": 3.306884480746791, "grad_norm": 0.8671875, "learning_rate": 0.00015078157824628105, "loss": 0.0285, "step": 7085 }, { "epoch": 3.3073512252042008, "grad_norm": 0.85546875, "learning_rate": 0.00015076893699773924, "loss": 0.0465, "step": 7086 }, { "epoch": 3.30781796966161, "grad_norm": 0.65625, "learning_rate": 0.00015075629465608582, "loss": 0.0242, "step": 7087 }, { "epoch": 3.30828471411902, "grad_norm": 0.71875, "learning_rate": 0.00015074365122159312, "loss": 0.0315, "step": 7088 }, { "epoch": 3.3087514585764293, "grad_norm": 1.09375, "learning_rate": 0.00015073100669453328, "loss": 0.0468, "step": 7089 }, { "epoch": 3.309218203033839, "grad_norm": 0.67578125, "learning_rate": 0.0001507183610751786, "loss": 0.0313, "step": 7090 }, { "epoch": 3.3096849474912484, "grad_norm": 0.87109375, "learning_rate": 0.00015070571436380127, "loss": 0.0272, "step": 7091 }, { "epoch": 3.310151691948658, "grad_norm": 0.83203125, "learning_rate": 0.0001506930665606737, "loss": 0.0496, "step": 7092 }, { "epoch": 3.3106184364060676, "grad_norm": 0.97265625, "learning_rate": 0.00015068041766606817, "loss": 0.0423, "step": 7093 }, { "epoch": 3.3110851808634774, "grad_norm": 0.796875, "learning_rate": 0.000150667767680257, "loss": 0.0338, "step": 7094 }, { "epoch": 3.3115519253208867, "grad_norm": 0.6796875, "learning_rate": 0.00015065511660351263, "loss": 0.0336, "step": 7095 }, { "epoch": 3.3120186697782965, "grad_norm": 0.76953125, "learning_rate": 0.00015064246443610731, "loss": 0.0401, "step": 7096 }, { "epoch": 3.312485414235706, "grad_norm": 0.76953125, "learning_rate": 0.0001506298111783136, "loss": 0.0361, "step": 7097 }, { "epoch": 3.3129521586931157, "grad_norm": 0.59375, "learning_rate": 0.00015061715683040383, "loss": 0.0283, "step": 7098 }, { "epoch": 3.313418903150525, "grad_norm": 0.9609375, "learning_rate": 0.00015060450139265055, "loss": 0.0341, "step": 7099 }, { "epoch": 3.313885647607935, "grad_norm": 0.70703125, "learning_rate": 0.00015059184486532621, "loss": 0.0276, "step": 7100 }, { "epoch": 3.314352392065344, "grad_norm": 0.80859375, "learning_rate": 0.00015057918724870327, "loss": 0.0343, "step": 7101 }, { "epoch": 3.314819136522754, "grad_norm": 0.8828125, "learning_rate": 0.00015056652854305438, "loss": 0.0361, "step": 7102 }, { "epoch": 3.3152858809801633, "grad_norm": 0.59765625, "learning_rate": 0.00015055386874865198, "loss": 0.0297, "step": 7103 }, { "epoch": 3.315752625437573, "grad_norm": 0.76953125, "learning_rate": 0.0001505412078657687, "loss": 0.036, "step": 7104 }, { "epoch": 3.3162193698949824, "grad_norm": 1.171875, "learning_rate": 0.00015052854589467715, "loss": 0.0428, "step": 7105 }, { "epoch": 3.3166861143523922, "grad_norm": 1.0546875, "learning_rate": 0.00015051588283564998, "loss": 0.0469, "step": 7106 }, { "epoch": 3.3171528588098016, "grad_norm": 0.69140625, "learning_rate": 0.00015050321868895973, "loss": 0.0359, "step": 7107 }, { "epoch": 3.3176196032672114, "grad_norm": 0.828125, "learning_rate": 0.00015049055345487918, "loss": 0.0471, "step": 7108 }, { "epoch": 3.3180863477246207, "grad_norm": 0.578125, "learning_rate": 0.000150477887133681, "loss": 0.0212, "step": 7109 }, { "epoch": 3.3185530921820305, "grad_norm": 0.859375, "learning_rate": 0.00015046521972563793, "loss": 0.0431, "step": 7110 }, { "epoch": 3.31901983663944, "grad_norm": 0.78515625, "learning_rate": 0.00015045255123102264, "loss": 0.0301, "step": 7111 }, { "epoch": 3.3194865810968492, "grad_norm": 0.5859375, "learning_rate": 0.00015043988165010797, "loss": 0.023, "step": 7112 }, { "epoch": 3.319953325554259, "grad_norm": 0.73046875, "learning_rate": 0.0001504272109831667, "loss": 0.0261, "step": 7113 }, { "epoch": 3.320420070011669, "grad_norm": 0.68359375, "learning_rate": 0.00015041453923047155, "loss": 0.0274, "step": 7114 }, { "epoch": 3.320886814469078, "grad_norm": 0.71484375, "learning_rate": 0.0001504018663922955, "loss": 0.0252, "step": 7115 }, { "epoch": 3.3213535589264875, "grad_norm": 0.98828125, "learning_rate": 0.00015038919246891132, "loss": 0.0482, "step": 7116 }, { "epoch": 3.3218203033838973, "grad_norm": 0.8515625, "learning_rate": 0.00015037651746059195, "loss": 0.0515, "step": 7117 }, { "epoch": 3.322287047841307, "grad_norm": 0.8515625, "learning_rate": 0.00015036384136761022, "loss": 0.0357, "step": 7118 }, { "epoch": 3.3227537922987165, "grad_norm": 0.7109375, "learning_rate": 0.0001503511641902392, "loss": 0.0317, "step": 7119 }, { "epoch": 3.323220536756126, "grad_norm": 0.63671875, "learning_rate": 0.00015033848592875163, "loss": 0.0267, "step": 7120 }, { "epoch": 3.3236872812135356, "grad_norm": 0.86328125, "learning_rate": 0.00015032580658342067, "loss": 0.0381, "step": 7121 }, { "epoch": 3.324154025670945, "grad_norm": 0.6015625, "learning_rate": 0.00015031312615451924, "loss": 0.0208, "step": 7122 }, { "epoch": 3.3246207701283548, "grad_norm": 0.68359375, "learning_rate": 0.0001503004446423204, "loss": 0.0318, "step": 7123 }, { "epoch": 3.325087514585764, "grad_norm": 0.76953125, "learning_rate": 0.00015028776204709714, "loss": 0.0295, "step": 7124 }, { "epoch": 3.325554259043174, "grad_norm": 0.796875, "learning_rate": 0.0001502750783691226, "loss": 0.0282, "step": 7125 }, { "epoch": 3.3260210035005833, "grad_norm": 1.3203125, "learning_rate": 0.00015026239360866982, "loss": 0.0336, "step": 7126 }, { "epoch": 3.326487747957993, "grad_norm": 0.86328125, "learning_rate": 0.00015024970776601198, "loss": 0.0348, "step": 7127 }, { "epoch": 3.3269544924154024, "grad_norm": 0.7890625, "learning_rate": 0.00015023702084142215, "loss": 0.0277, "step": 7128 }, { "epoch": 3.327421236872812, "grad_norm": 0.7265625, "learning_rate": 0.00015022433283517353, "loss": 0.0278, "step": 7129 }, { "epoch": 3.3278879813302216, "grad_norm": 1.0859375, "learning_rate": 0.00015021164374753925, "loss": 0.0514, "step": 7130 }, { "epoch": 3.3283547257876314, "grad_norm": 0.76171875, "learning_rate": 0.0001501989535787926, "loss": 0.0316, "step": 7131 }, { "epoch": 3.3288214702450407, "grad_norm": 0.8203125, "learning_rate": 0.00015018626232920682, "loss": 0.033, "step": 7132 }, { "epoch": 3.3292882147024505, "grad_norm": 0.6328125, "learning_rate": 0.0001501735699990551, "loss": 0.0205, "step": 7133 }, { "epoch": 3.32975495915986, "grad_norm": 0.88671875, "learning_rate": 0.00015016087658861077, "loss": 0.037, "step": 7134 }, { "epoch": 3.3302217036172697, "grad_norm": 0.7734375, "learning_rate": 0.00015014818209814707, "loss": 0.0342, "step": 7135 }, { "epoch": 3.330688448074679, "grad_norm": 0.734375, "learning_rate": 0.0001501354865279374, "loss": 0.0372, "step": 7136 }, { "epoch": 3.331155192532089, "grad_norm": 0.7578125, "learning_rate": 0.00015012278987825507, "loss": 0.0317, "step": 7137 }, { "epoch": 3.331621936989498, "grad_norm": 0.90625, "learning_rate": 0.00015011009214937346, "loss": 0.0449, "step": 7138 }, { "epoch": 3.332088681446908, "grad_norm": 0.8671875, "learning_rate": 0.000150097393341566, "loss": 0.0375, "step": 7139 }, { "epoch": 3.3325554259043173, "grad_norm": 0.76171875, "learning_rate": 0.00015008469345510604, "loss": 0.0256, "step": 7140 }, { "epoch": 3.333022170361727, "grad_norm": 0.83984375, "learning_rate": 0.00015007199249026705, "loss": 0.0331, "step": 7141 }, { "epoch": 3.3334889148191365, "grad_norm": 0.86328125, "learning_rate": 0.00015005929044732255, "loss": 0.0303, "step": 7142 }, { "epoch": 3.3339556592765462, "grad_norm": 0.75390625, "learning_rate": 0.00015004658732654594, "loss": 0.0279, "step": 7143 }, { "epoch": 3.3344224037339556, "grad_norm": 0.734375, "learning_rate": 0.0001500338831282108, "loss": 0.036, "step": 7144 }, { "epoch": 3.3348891481913654, "grad_norm": 0.87109375, "learning_rate": 0.00015002117785259067, "loss": 0.0525, "step": 7145 }, { "epoch": 3.3353558926487747, "grad_norm": 0.75390625, "learning_rate": 0.000150008471499959, "loss": 0.0474, "step": 7146 }, { "epoch": 3.3358226371061845, "grad_norm": 0.7109375, "learning_rate": 0.00014999576407058953, "loss": 0.0345, "step": 7147 }, { "epoch": 3.336289381563594, "grad_norm": 0.66796875, "learning_rate": 0.00014998305556475575, "loss": 0.0224, "step": 7148 }, { "epoch": 3.3367561260210037, "grad_norm": 0.640625, "learning_rate": 0.00014997034598273137, "loss": 0.0256, "step": 7149 }, { "epoch": 3.337222870478413, "grad_norm": 0.765625, "learning_rate": 0.00014995763532478994, "loss": 0.0325, "step": 7150 }, { "epoch": 3.3376896149358224, "grad_norm": 0.8671875, "learning_rate": 0.00014994492359120522, "loss": 0.0308, "step": 7151 }, { "epoch": 3.338156359393232, "grad_norm": 1.0625, "learning_rate": 0.0001499322107822509, "loss": 0.0308, "step": 7152 }, { "epoch": 3.338623103850642, "grad_norm": 0.87109375, "learning_rate": 0.00014991949689820063, "loss": 0.0328, "step": 7153 }, { "epoch": 3.3390898483080513, "grad_norm": 0.62890625, "learning_rate": 0.00014990678193932825, "loss": 0.0311, "step": 7154 }, { "epoch": 3.3395565927654607, "grad_norm": 0.8125, "learning_rate": 0.00014989406590590747, "loss": 0.0299, "step": 7155 }, { "epoch": 3.3400233372228705, "grad_norm": 0.8359375, "learning_rate": 0.00014988134879821208, "loss": 0.0435, "step": 7156 }, { "epoch": 3.3404900816802803, "grad_norm": 0.828125, "learning_rate": 0.00014986863061651592, "loss": 0.0333, "step": 7157 }, { "epoch": 3.3409568261376896, "grad_norm": 0.95703125, "learning_rate": 0.00014985591136109283, "loss": 0.0269, "step": 7158 }, { "epoch": 3.341423570595099, "grad_norm": 0.80859375, "learning_rate": 0.0001498431910322166, "loss": 0.0375, "step": 7159 }, { "epoch": 3.3418903150525088, "grad_norm": 0.71484375, "learning_rate": 0.0001498304696301612, "loss": 0.0334, "step": 7160 }, { "epoch": 3.342357059509918, "grad_norm": 0.828125, "learning_rate": 0.0001498177471552005, "loss": 0.034, "step": 7161 }, { "epoch": 3.342823803967328, "grad_norm": 0.87109375, "learning_rate": 0.00014980502360760844, "loss": 0.0325, "step": 7162 }, { "epoch": 3.3432905484247373, "grad_norm": 0.83984375, "learning_rate": 0.00014979229898765895, "loss": 0.035, "step": 7163 }, { "epoch": 3.343757292882147, "grad_norm": 0.66796875, "learning_rate": 0.000149779573295626, "loss": 0.0293, "step": 7164 }, { "epoch": 3.3442240373395564, "grad_norm": 0.8203125, "learning_rate": 0.00014976684653178363, "loss": 0.0422, "step": 7165 }, { "epoch": 3.344690781796966, "grad_norm": 0.7734375, "learning_rate": 0.00014975411869640586, "loss": 0.0319, "step": 7166 }, { "epoch": 3.3451575262543756, "grad_norm": 0.82421875, "learning_rate": 0.00014974138978976668, "loss": 0.0332, "step": 7167 }, { "epoch": 3.3456242707117854, "grad_norm": 0.59765625, "learning_rate": 0.0001497286598121402, "loss": 0.0261, "step": 7168 }, { "epoch": 3.3460910151691947, "grad_norm": 0.62109375, "learning_rate": 0.00014971592876380049, "loss": 0.0232, "step": 7169 }, { "epoch": 3.3465577596266045, "grad_norm": 0.78125, "learning_rate": 0.00014970319664502166, "loss": 0.0311, "step": 7170 }, { "epoch": 3.347024504084014, "grad_norm": 0.66015625, "learning_rate": 0.00014969046345607786, "loss": 0.0272, "step": 7171 }, { "epoch": 3.3474912485414237, "grad_norm": 0.76953125, "learning_rate": 0.00014967772919724327, "loss": 0.0274, "step": 7172 }, { "epoch": 3.347957992998833, "grad_norm": 1.015625, "learning_rate": 0.00014966499386879204, "loss": 0.0427, "step": 7173 }, { "epoch": 3.348424737456243, "grad_norm": 0.76171875, "learning_rate": 0.00014965225747099842, "loss": 0.0311, "step": 7174 }, { "epoch": 3.348891481913652, "grad_norm": 0.96875, "learning_rate": 0.00014963952000413653, "loss": 0.037, "step": 7175 }, { "epoch": 3.349358226371062, "grad_norm": 0.765625, "learning_rate": 0.00014962678146848078, "loss": 0.033, "step": 7176 }, { "epoch": 3.3498249708284713, "grad_norm": 0.953125, "learning_rate": 0.00014961404186430532, "loss": 0.0417, "step": 7177 }, { "epoch": 3.350291715285881, "grad_norm": 0.71484375, "learning_rate": 0.00014960130119188452, "loss": 0.041, "step": 7178 }, { "epoch": 3.3507584597432905, "grad_norm": 0.91796875, "learning_rate": 0.00014958855945149262, "loss": 0.0283, "step": 7179 }, { "epoch": 3.3512252042007002, "grad_norm": 0.75390625, "learning_rate": 0.00014957581664340407, "loss": 0.0326, "step": 7180 }, { "epoch": 3.3516919486581096, "grad_norm": 0.98828125, "learning_rate": 0.00014956307276789313, "loss": 0.0472, "step": 7181 }, { "epoch": 3.3521586931155194, "grad_norm": 0.6484375, "learning_rate": 0.00014955032782523427, "loss": 0.033, "step": 7182 }, { "epoch": 3.3526254375729287, "grad_norm": 0.88671875, "learning_rate": 0.0001495375818157019, "loss": 0.0449, "step": 7183 }, { "epoch": 3.3530921820303385, "grad_norm": 0.796875, "learning_rate": 0.0001495248347395704, "loss": 0.0349, "step": 7184 }, { "epoch": 3.353558926487748, "grad_norm": 0.734375, "learning_rate": 0.00014951208659711427, "loss": 0.0295, "step": 7185 }, { "epoch": 3.3540256709451577, "grad_norm": 0.734375, "learning_rate": 0.00014949933738860795, "loss": 0.035, "step": 7186 }, { "epoch": 3.354492415402567, "grad_norm": 0.9375, "learning_rate": 0.000149486587114326, "loss": 0.0436, "step": 7187 }, { "epoch": 3.354959159859977, "grad_norm": 0.80078125, "learning_rate": 0.00014947383577454295, "loss": 0.0416, "step": 7188 }, { "epoch": 3.355425904317386, "grad_norm": 1.046875, "learning_rate": 0.0001494610833695333, "loss": 0.0351, "step": 7189 }, { "epoch": 3.355892648774796, "grad_norm": 0.81640625, "learning_rate": 0.00014944832989957166, "loss": 0.0395, "step": 7190 }, { "epoch": 3.3563593932322053, "grad_norm": 0.79296875, "learning_rate": 0.0001494355753649326, "loss": 0.0316, "step": 7191 }, { "epoch": 3.356826137689615, "grad_norm": 0.953125, "learning_rate": 0.00014942281976589074, "loss": 0.0324, "step": 7192 }, { "epoch": 3.3572928821470245, "grad_norm": 0.83203125, "learning_rate": 0.00014941006310272074, "loss": 0.0355, "step": 7193 }, { "epoch": 3.357759626604434, "grad_norm": 0.97265625, "learning_rate": 0.00014939730537569728, "loss": 0.0436, "step": 7194 }, { "epoch": 3.3582263710618436, "grad_norm": 0.59765625, "learning_rate": 0.00014938454658509502, "loss": 0.0291, "step": 7195 }, { "epoch": 3.3586931155192534, "grad_norm": 0.7734375, "learning_rate": 0.0001493717867311887, "loss": 0.0261, "step": 7196 }, { "epoch": 3.3591598599766628, "grad_norm": 0.7734375, "learning_rate": 0.00014935902581425297, "loss": 0.0295, "step": 7197 }, { "epoch": 3.359626604434072, "grad_norm": 0.578125, "learning_rate": 0.00014934626383456272, "loss": 0.0338, "step": 7198 }, { "epoch": 3.360093348891482, "grad_norm": 0.74609375, "learning_rate": 0.00014933350079239258, "loss": 0.0352, "step": 7199 }, { "epoch": 3.3605600933488917, "grad_norm": 0.703125, "learning_rate": 0.0001493207366880175, "loss": 0.0391, "step": 7200 }, { "epoch": 3.361026837806301, "grad_norm": 0.83984375, "learning_rate": 0.00014930797152171223, "loss": 0.0455, "step": 7201 }, { "epoch": 3.3614935822637104, "grad_norm": 0.64453125, "learning_rate": 0.0001492952052937516, "loss": 0.0289, "step": 7202 }, { "epoch": 3.36196032672112, "grad_norm": 0.58203125, "learning_rate": 0.0001492824380044105, "loss": 0.0328, "step": 7203 }, { "epoch": 3.3624270711785296, "grad_norm": 0.83203125, "learning_rate": 0.00014926966965396384, "loss": 0.0247, "step": 7204 }, { "epoch": 3.3628938156359394, "grad_norm": 0.71875, "learning_rate": 0.00014925690024268653, "loss": 0.0416, "step": 7205 }, { "epoch": 3.3633605600933487, "grad_norm": 0.8203125, "learning_rate": 0.00014924412977085352, "loss": 0.0416, "step": 7206 }, { "epoch": 3.3638273045507585, "grad_norm": 0.66796875, "learning_rate": 0.00014923135823873975, "loss": 0.0203, "step": 7207 }, { "epoch": 3.364294049008168, "grad_norm": 0.71484375, "learning_rate": 0.0001492185856466202, "loss": 0.0343, "step": 7208 }, { "epoch": 3.3647607934655777, "grad_norm": 0.87109375, "learning_rate": 0.00014920581199476992, "loss": 0.0339, "step": 7209 }, { "epoch": 3.365227537922987, "grad_norm": 0.89453125, "learning_rate": 0.00014919303728346388, "loss": 0.0399, "step": 7210 }, { "epoch": 3.365694282380397, "grad_norm": 0.953125, "learning_rate": 0.00014918026151297719, "loss": 0.038, "step": 7211 }, { "epoch": 3.366161026837806, "grad_norm": 0.82421875, "learning_rate": 0.00014916748468358487, "loss": 0.0406, "step": 7212 }, { "epoch": 3.366627771295216, "grad_norm": 0.58203125, "learning_rate": 0.0001491547067955621, "loss": 0.0268, "step": 7213 }, { "epoch": 3.3670945157526253, "grad_norm": 0.74609375, "learning_rate": 0.00014914192784918392, "loss": 0.0333, "step": 7214 }, { "epoch": 3.367561260210035, "grad_norm": 1.109375, "learning_rate": 0.00014912914784472555, "loss": 0.0474, "step": 7215 }, { "epoch": 3.3680280046674445, "grad_norm": 0.8203125, "learning_rate": 0.00014911636678246207, "loss": 0.0369, "step": 7216 }, { "epoch": 3.3684947491248542, "grad_norm": 0.6640625, "learning_rate": 0.00014910358466266877, "loss": 0.0247, "step": 7217 }, { "epoch": 3.3689614935822636, "grad_norm": 0.6640625, "learning_rate": 0.00014909080148562076, "loss": 0.0264, "step": 7218 }, { "epoch": 3.3694282380396734, "grad_norm": 0.87890625, "learning_rate": 0.00014907801725159333, "loss": 0.0411, "step": 7219 }, { "epoch": 3.3698949824970827, "grad_norm": 0.77734375, "learning_rate": 0.00014906523196086177, "loss": 0.0332, "step": 7220 }, { "epoch": 3.3703617269544925, "grad_norm": 0.58984375, "learning_rate": 0.0001490524456137013, "loss": 0.0194, "step": 7221 }, { "epoch": 3.370828471411902, "grad_norm": 0.6875, "learning_rate": 0.00014903965821038724, "loss": 0.0257, "step": 7222 }, { "epoch": 3.3712952158693117, "grad_norm": 0.6015625, "learning_rate": 0.00014902686975119497, "loss": 0.028, "step": 7223 }, { "epoch": 3.371761960326721, "grad_norm": 0.94921875, "learning_rate": 0.00014901408023639977, "loss": 0.0419, "step": 7224 }, { "epoch": 3.372228704784131, "grad_norm": 0.84765625, "learning_rate": 0.00014900128966627705, "loss": 0.0321, "step": 7225 }, { "epoch": 3.37269544924154, "grad_norm": 0.69921875, "learning_rate": 0.00014898849804110216, "loss": 0.0304, "step": 7226 }, { "epoch": 3.37316219369895, "grad_norm": 0.80859375, "learning_rate": 0.00014897570536115062, "loss": 0.0533, "step": 7227 }, { "epoch": 3.3736289381563593, "grad_norm": 0.66015625, "learning_rate": 0.00014896291162669776, "loss": 0.0246, "step": 7228 }, { "epoch": 3.374095682613769, "grad_norm": 0.91015625, "learning_rate": 0.0001489501168380191, "loss": 0.0288, "step": 7229 }, { "epoch": 3.3745624270711785, "grad_norm": 0.72265625, "learning_rate": 0.0001489373209953901, "loss": 0.0303, "step": 7230 }, { "epoch": 3.3750291715285883, "grad_norm": 0.8125, "learning_rate": 0.0001489245240990863, "loss": 0.0348, "step": 7231 }, { "epoch": 3.3754959159859976, "grad_norm": 0.76171875, "learning_rate": 0.0001489117261493832, "loss": 0.0286, "step": 7232 }, { "epoch": 3.3759626604434074, "grad_norm": 0.69921875, "learning_rate": 0.00014889892714655637, "loss": 0.0225, "step": 7233 }, { "epoch": 3.3764294049008168, "grad_norm": 0.72265625, "learning_rate": 0.0001488861270908814, "loss": 0.0322, "step": 7234 }, { "epoch": 3.3768961493582266, "grad_norm": 0.5625, "learning_rate": 0.00014887332598263384, "loss": 0.0206, "step": 7235 }, { "epoch": 3.377362893815636, "grad_norm": 0.78515625, "learning_rate": 0.00014886052382208936, "loss": 0.0356, "step": 7236 }, { "epoch": 3.3778296382730453, "grad_norm": 0.6484375, "learning_rate": 0.00014884772060952362, "loss": 0.0264, "step": 7237 }, { "epoch": 3.378296382730455, "grad_norm": 0.98828125, "learning_rate": 0.0001488349163452122, "loss": 0.0268, "step": 7238 }, { "epoch": 3.378763127187865, "grad_norm": 0.6171875, "learning_rate": 0.0001488221110294309, "loss": 0.0221, "step": 7239 }, { "epoch": 3.379229871645274, "grad_norm": 0.6796875, "learning_rate": 0.00014880930466245538, "loss": 0.0253, "step": 7240 }, { "epoch": 3.3796966161026836, "grad_norm": 0.80078125, "learning_rate": 0.00014879649724456134, "loss": 0.0284, "step": 7241 }, { "epoch": 3.3801633605600934, "grad_norm": 0.9453125, "learning_rate": 0.0001487836887760246, "loss": 0.0419, "step": 7242 }, { "epoch": 3.380630105017503, "grad_norm": 1.0078125, "learning_rate": 0.00014877087925712092, "loss": 0.0329, "step": 7243 }, { "epoch": 3.3810968494749125, "grad_norm": 0.79296875, "learning_rate": 0.00014875806868812607, "loss": 0.0286, "step": 7244 }, { "epoch": 3.381563593932322, "grad_norm": 1.4140625, "learning_rate": 0.00014874525706931593, "loss": 0.0379, "step": 7245 }, { "epoch": 3.3820303383897317, "grad_norm": 0.74609375, "learning_rate": 0.0001487324444009663, "loss": 0.0315, "step": 7246 }, { "epoch": 3.382497082847141, "grad_norm": 0.65625, "learning_rate": 0.0001487196306833531, "loss": 0.0195, "step": 7247 }, { "epoch": 3.382963827304551, "grad_norm": 0.67578125, "learning_rate": 0.00014870681591675215, "loss": 0.0263, "step": 7248 }, { "epoch": 3.38343057176196, "grad_norm": 0.671875, "learning_rate": 0.00014869400010143947, "loss": 0.0264, "step": 7249 }, { "epoch": 3.38389731621937, "grad_norm": 0.79296875, "learning_rate": 0.0001486811832376909, "loss": 0.0385, "step": 7250 }, { "epoch": 3.3843640606767793, "grad_norm": 0.75390625, "learning_rate": 0.00014866836532578242, "loss": 0.0305, "step": 7251 }, { "epoch": 3.384830805134189, "grad_norm": 0.75390625, "learning_rate": 0.00014865554636599008, "loss": 0.0298, "step": 7252 }, { "epoch": 3.3852975495915985, "grad_norm": 0.99609375, "learning_rate": 0.00014864272635858986, "loss": 0.0428, "step": 7253 }, { "epoch": 3.3857642940490082, "grad_norm": 0.70703125, "learning_rate": 0.0001486299053038577, "loss": 0.0352, "step": 7254 }, { "epoch": 3.3862310385064176, "grad_norm": 0.73046875, "learning_rate": 0.00014861708320206978, "loss": 0.0229, "step": 7255 }, { "epoch": 3.3866977829638274, "grad_norm": 0.8671875, "learning_rate": 0.00014860426005350213, "loss": 0.0377, "step": 7256 }, { "epoch": 3.3871645274212367, "grad_norm": 1.1796875, "learning_rate": 0.00014859143585843078, "loss": 0.0521, "step": 7257 }, { "epoch": 3.3876312718786465, "grad_norm": 0.65625, "learning_rate": 0.00014857861061713193, "loss": 0.0307, "step": 7258 }, { "epoch": 3.388098016336056, "grad_norm": 0.6796875, "learning_rate": 0.0001485657843298817, "loss": 0.024, "step": 7259 }, { "epoch": 3.3885647607934657, "grad_norm": 0.83203125, "learning_rate": 0.00014855295699695625, "loss": 0.0396, "step": 7260 }, { "epoch": 3.389031505250875, "grad_norm": 0.78515625, "learning_rate": 0.00014854012861863173, "loss": 0.0291, "step": 7261 }, { "epoch": 3.389498249708285, "grad_norm": 0.85546875, "learning_rate": 0.0001485272991951844, "loss": 0.0416, "step": 7262 }, { "epoch": 3.389964994165694, "grad_norm": 0.90625, "learning_rate": 0.00014851446872689053, "loss": 0.0435, "step": 7263 }, { "epoch": 3.390431738623104, "grad_norm": 0.53125, "learning_rate": 0.00014850163721402625, "loss": 0.0344, "step": 7264 }, { "epoch": 3.3908984830805133, "grad_norm": 0.71484375, "learning_rate": 0.00014848880465686797, "loss": 0.0311, "step": 7265 }, { "epoch": 3.391365227537923, "grad_norm": 1.015625, "learning_rate": 0.00014847597105569187, "loss": 0.0384, "step": 7266 }, { "epoch": 3.3918319719953325, "grad_norm": 0.7421875, "learning_rate": 0.00014846313641077435, "loss": 0.0343, "step": 7267 }, { "epoch": 3.3922987164527423, "grad_norm": 1.0625, "learning_rate": 0.00014845030072239173, "loss": 0.0346, "step": 7268 }, { "epoch": 3.3927654609101516, "grad_norm": 0.5859375, "learning_rate": 0.0001484374639908204, "loss": 0.0293, "step": 7269 }, { "epoch": 3.3932322053675614, "grad_norm": 0.72265625, "learning_rate": 0.00014842462621633668, "loss": 0.0295, "step": 7270 }, { "epoch": 3.393698949824971, "grad_norm": 0.93359375, "learning_rate": 0.00014841178739921708, "loss": 0.0446, "step": 7271 }, { "epoch": 3.3941656942823806, "grad_norm": 0.765625, "learning_rate": 0.00014839894753973794, "loss": 0.0301, "step": 7272 }, { "epoch": 3.39463243873979, "grad_norm": 0.6328125, "learning_rate": 0.0001483861066381758, "loss": 0.0268, "step": 7273 }, { "epoch": 3.3950991831971997, "grad_norm": 0.8671875, "learning_rate": 0.00014837326469480707, "loss": 0.0342, "step": 7274 }, { "epoch": 3.395565927654609, "grad_norm": 0.87109375, "learning_rate": 0.0001483604217099083, "loss": 0.0461, "step": 7275 }, { "epoch": 3.3960326721120184, "grad_norm": 0.84765625, "learning_rate": 0.000148347577683756, "loss": 0.0505, "step": 7276 }, { "epoch": 3.396499416569428, "grad_norm": 0.78125, "learning_rate": 0.00014833473261662667, "loss": 0.0315, "step": 7277 }, { "epoch": 3.396966161026838, "grad_norm": 1.078125, "learning_rate": 0.00014832188650879696, "loss": 0.0514, "step": 7278 }, { "epoch": 3.3974329054842474, "grad_norm": 0.78515625, "learning_rate": 0.00014830903936054342, "loss": 0.0358, "step": 7279 }, { "epoch": 3.3978996499416567, "grad_norm": 0.7890625, "learning_rate": 0.00014829619117214261, "loss": 0.0346, "step": 7280 }, { "epoch": 3.3983663943990665, "grad_norm": 0.74609375, "learning_rate": 0.00014828334194387125, "loss": 0.0324, "step": 7281 }, { "epoch": 3.3988331388564763, "grad_norm": 0.88671875, "learning_rate": 0.000148270491676006, "loss": 0.0396, "step": 7282 }, { "epoch": 3.3992998833138857, "grad_norm": 0.75390625, "learning_rate": 0.00014825764036882348, "loss": 0.0275, "step": 7283 }, { "epoch": 3.399766627771295, "grad_norm": 0.5859375, "learning_rate": 0.0001482447880226004, "loss": 0.0194, "step": 7284 }, { "epoch": 3.400233372228705, "grad_norm": 0.87890625, "learning_rate": 0.00014823193463761353, "loss": 0.0273, "step": 7285 }, { "epoch": 3.400700116686114, "grad_norm": 0.7265625, "learning_rate": 0.00014821908021413958, "loss": 0.0326, "step": 7286 }, { "epoch": 3.401166861143524, "grad_norm": 0.88671875, "learning_rate": 0.00014820622475245535, "loss": 0.0363, "step": 7287 }, { "epoch": 3.4016336056009333, "grad_norm": 0.71875, "learning_rate": 0.00014819336825283762, "loss": 0.0364, "step": 7288 }, { "epoch": 3.402100350058343, "grad_norm": 0.8671875, "learning_rate": 0.0001481805107155632, "loss": 0.0541, "step": 7289 }, { "epoch": 3.4025670945157525, "grad_norm": 0.79296875, "learning_rate": 0.0001481676521409089, "loss": 0.0294, "step": 7290 }, { "epoch": 3.4030338389731623, "grad_norm": 0.78125, "learning_rate": 0.0001481547925291516, "loss": 0.0273, "step": 7291 }, { "epoch": 3.4035005834305716, "grad_norm": 0.90625, "learning_rate": 0.00014814193188056823, "loss": 0.0299, "step": 7292 }, { "epoch": 3.4039673278879814, "grad_norm": 0.78515625, "learning_rate": 0.0001481290701954356, "loss": 0.0345, "step": 7293 }, { "epoch": 3.4044340723453907, "grad_norm": 0.5546875, "learning_rate": 0.00014811620747403072, "loss": 0.0268, "step": 7294 }, { "epoch": 3.4049008168028005, "grad_norm": 0.79296875, "learning_rate": 0.00014810334371663053, "loss": 0.033, "step": 7295 }, { "epoch": 3.40536756126021, "grad_norm": 0.60546875, "learning_rate": 0.00014809047892351196, "loss": 0.0192, "step": 7296 }, { "epoch": 3.4058343057176197, "grad_norm": 0.67578125, "learning_rate": 0.000148077613094952, "loss": 0.0236, "step": 7297 }, { "epoch": 3.406301050175029, "grad_norm": 0.796875, "learning_rate": 0.00014806474623122771, "loss": 0.0464, "step": 7298 }, { "epoch": 3.406767794632439, "grad_norm": 0.58984375, "learning_rate": 0.0001480518783326161, "loss": 0.0298, "step": 7299 }, { "epoch": 3.407234539089848, "grad_norm": 0.75390625, "learning_rate": 0.0001480390093993942, "loss": 0.0318, "step": 7300 }, { "epoch": 3.407701283547258, "grad_norm": 0.83984375, "learning_rate": 0.00014802613943183917, "loss": 0.0336, "step": 7301 }, { "epoch": 3.4081680280046673, "grad_norm": 0.73828125, "learning_rate": 0.00014801326843022806, "loss": 0.0299, "step": 7302 }, { "epoch": 3.408634772462077, "grad_norm": 0.6875, "learning_rate": 0.000148000396394838, "loss": 0.0354, "step": 7303 }, { "epoch": 3.4091015169194865, "grad_norm": 0.984375, "learning_rate": 0.00014798752332594617, "loss": 0.0511, "step": 7304 }, { "epoch": 3.4095682613768963, "grad_norm": 0.73046875, "learning_rate": 0.00014797464922382972, "loss": 0.0262, "step": 7305 }, { "epoch": 3.4100350058343056, "grad_norm": 0.70703125, "learning_rate": 0.00014796177408876583, "loss": 0.0307, "step": 7306 }, { "epoch": 3.4105017502917154, "grad_norm": 0.703125, "learning_rate": 0.0001479488979210317, "loss": 0.0289, "step": 7307 }, { "epoch": 3.410968494749125, "grad_norm": 0.74609375, "learning_rate": 0.00014793602072090463, "loss": 0.025, "step": 7308 }, { "epoch": 3.4114352392065346, "grad_norm": 0.90234375, "learning_rate": 0.00014792314248866185, "loss": 0.0401, "step": 7309 }, { "epoch": 3.411901983663944, "grad_norm": 0.59375, "learning_rate": 0.00014791026322458065, "loss": 0.0243, "step": 7310 }, { "epoch": 3.4123687281213537, "grad_norm": 0.859375, "learning_rate": 0.00014789738292893834, "loss": 0.0216, "step": 7311 }, { "epoch": 3.412835472578763, "grad_norm": 1.2421875, "learning_rate": 0.0001478845016020122, "loss": 0.0509, "step": 7312 }, { "epoch": 3.413302217036173, "grad_norm": 0.75390625, "learning_rate": 0.0001478716192440796, "loss": 0.0284, "step": 7313 }, { "epoch": 3.413768961493582, "grad_norm": 1.0, "learning_rate": 0.00014785873585541795, "loss": 0.0341, "step": 7314 }, { "epoch": 3.414235705950992, "grad_norm": 0.80078125, "learning_rate": 0.00014784585143630462, "loss": 0.0295, "step": 7315 }, { "epoch": 3.4147024504084014, "grad_norm": 0.71875, "learning_rate": 0.00014783296598701697, "loss": 0.0225, "step": 7316 }, { "epoch": 3.415169194865811, "grad_norm": 0.82421875, "learning_rate": 0.00014782007950783253, "loss": 0.0328, "step": 7317 }, { "epoch": 3.4156359393232205, "grad_norm": 0.87109375, "learning_rate": 0.00014780719199902875, "loss": 0.0391, "step": 7318 }, { "epoch": 3.41610268378063, "grad_norm": 0.53125, "learning_rate": 0.00014779430346088302, "loss": 0.0259, "step": 7319 }, { "epoch": 3.4165694282380397, "grad_norm": 0.7578125, "learning_rate": 0.00014778141389367298, "loss": 0.0234, "step": 7320 }, { "epoch": 3.4170361726954495, "grad_norm": 0.78515625, "learning_rate": 0.00014776852329767602, "loss": 0.0256, "step": 7321 }, { "epoch": 3.417502917152859, "grad_norm": 0.87890625, "learning_rate": 0.00014775563167316978, "loss": 0.0409, "step": 7322 }, { "epoch": 3.417969661610268, "grad_norm": 0.5078125, "learning_rate": 0.00014774273902043181, "loss": 0.0205, "step": 7323 }, { "epoch": 3.418436406067678, "grad_norm": 0.68359375, "learning_rate": 0.00014772984533973968, "loss": 0.026, "step": 7324 }, { "epoch": 3.4189031505250878, "grad_norm": 0.63671875, "learning_rate": 0.00014771695063137106, "loss": 0.0265, "step": 7325 }, { "epoch": 3.419369894982497, "grad_norm": 0.94140625, "learning_rate": 0.0001477040548956035, "loss": 0.0424, "step": 7326 }, { "epoch": 3.4198366394399065, "grad_norm": 0.89453125, "learning_rate": 0.00014769115813271475, "loss": 0.0344, "step": 7327 }, { "epoch": 3.4203033838973163, "grad_norm": 0.671875, "learning_rate": 0.00014767826034298243, "loss": 0.0229, "step": 7328 }, { "epoch": 3.4207701283547256, "grad_norm": 0.462890625, "learning_rate": 0.00014766536152668427, "loss": 0.0151, "step": 7329 }, { "epoch": 3.4212368728121354, "grad_norm": 0.65234375, "learning_rate": 0.00014765246168409798, "loss": 0.0336, "step": 7330 }, { "epoch": 3.4217036172695448, "grad_norm": 0.7421875, "learning_rate": 0.00014763956081550135, "loss": 0.0369, "step": 7331 }, { "epoch": 3.4221703617269545, "grad_norm": 0.81640625, "learning_rate": 0.0001476266589211721, "loss": 0.0337, "step": 7332 }, { "epoch": 3.422637106184364, "grad_norm": 0.51171875, "learning_rate": 0.000147613756001388, "loss": 0.0177, "step": 7333 }, { "epoch": 3.4231038506417737, "grad_norm": 0.8671875, "learning_rate": 0.00014760085205642697, "loss": 0.0338, "step": 7334 }, { "epoch": 3.423570595099183, "grad_norm": 1.0234375, "learning_rate": 0.00014758794708656675, "loss": 0.0393, "step": 7335 }, { "epoch": 3.424037339556593, "grad_norm": 0.671875, "learning_rate": 0.00014757504109208524, "loss": 0.0284, "step": 7336 }, { "epoch": 3.424504084014002, "grad_norm": 0.984375, "learning_rate": 0.00014756213407326031, "loss": 0.0333, "step": 7337 }, { "epoch": 3.424970828471412, "grad_norm": 1.0546875, "learning_rate": 0.00014754922603036987, "loss": 0.0511, "step": 7338 }, { "epoch": 3.4254375729288213, "grad_norm": 0.76953125, "learning_rate": 0.00014753631696369182, "loss": 0.0309, "step": 7339 }, { "epoch": 3.425904317386231, "grad_norm": 0.796875, "learning_rate": 0.00014752340687350412, "loss": 0.0431, "step": 7340 }, { "epoch": 3.4263710618436405, "grad_norm": 0.58203125, "learning_rate": 0.00014751049576008476, "loss": 0.0204, "step": 7341 }, { "epoch": 3.4268378063010503, "grad_norm": 0.6796875, "learning_rate": 0.00014749758362371166, "loss": 0.0245, "step": 7342 }, { "epoch": 3.4273045507584596, "grad_norm": 0.5234375, "learning_rate": 0.00014748467046466295, "loss": 0.0153, "step": 7343 }, { "epoch": 3.4277712952158694, "grad_norm": 0.6484375, "learning_rate": 0.00014747175628321656, "loss": 0.0294, "step": 7344 }, { "epoch": 3.428238039673279, "grad_norm": 0.98828125, "learning_rate": 0.0001474588410796506, "loss": 0.0494, "step": 7345 }, { "epoch": 3.4287047841306886, "grad_norm": 0.7734375, "learning_rate": 0.00014744592485424314, "loss": 0.0261, "step": 7346 }, { "epoch": 3.429171528588098, "grad_norm": 0.66015625, "learning_rate": 0.00014743300760727227, "loss": 0.029, "step": 7347 }, { "epoch": 3.4296382730455077, "grad_norm": 0.7421875, "learning_rate": 0.00014742008933901613, "loss": 0.0324, "step": 7348 }, { "epoch": 3.430105017502917, "grad_norm": 0.68359375, "learning_rate": 0.00014740717004975285, "loss": 0.0316, "step": 7349 }, { "epoch": 3.430571761960327, "grad_norm": 0.82421875, "learning_rate": 0.0001473942497397606, "loss": 0.0302, "step": 7350 }, { "epoch": 3.4310385064177362, "grad_norm": 0.71875, "learning_rate": 0.00014738132840931757, "loss": 0.0252, "step": 7351 }, { "epoch": 3.431505250875146, "grad_norm": 0.6484375, "learning_rate": 0.00014736840605870192, "loss": 0.0292, "step": 7352 }, { "epoch": 3.4319719953325554, "grad_norm": 0.78125, "learning_rate": 0.000147355482688192, "loss": 0.0281, "step": 7353 }, { "epoch": 3.432438739789965, "grad_norm": 1.0078125, "learning_rate": 0.00014734255829806597, "loss": 0.0449, "step": 7354 }, { "epoch": 3.4329054842473745, "grad_norm": 0.61328125, "learning_rate": 0.00014732963288860212, "loss": 0.0243, "step": 7355 }, { "epoch": 3.4333722287047843, "grad_norm": 0.82421875, "learning_rate": 0.00014731670646007876, "loss": 0.0301, "step": 7356 }, { "epoch": 3.4338389731621937, "grad_norm": 0.86328125, "learning_rate": 0.00014730377901277423, "loss": 0.0427, "step": 7357 }, { "epoch": 3.4343057176196035, "grad_norm": 0.78515625, "learning_rate": 0.00014729085054696685, "loss": 0.0207, "step": 7358 }, { "epoch": 3.434772462077013, "grad_norm": 0.75390625, "learning_rate": 0.00014727792106293498, "loss": 0.0311, "step": 7359 }, { "epoch": 3.4352392065344226, "grad_norm": 0.55859375, "learning_rate": 0.00014726499056095702, "loss": 0.021, "step": 7360 }, { "epoch": 3.435705950991832, "grad_norm": 0.734375, "learning_rate": 0.0001472520590413114, "loss": 0.0287, "step": 7361 }, { "epoch": 3.4361726954492413, "grad_norm": 0.765625, "learning_rate": 0.00014723912650427647, "loss": 0.0325, "step": 7362 }, { "epoch": 3.436639439906651, "grad_norm": 0.70703125, "learning_rate": 0.00014722619295013076, "loss": 0.0354, "step": 7363 }, { "epoch": 3.437106184364061, "grad_norm": 0.69140625, "learning_rate": 0.0001472132583791527, "loss": 0.0291, "step": 7364 }, { "epoch": 3.4375729288214703, "grad_norm": 0.8125, "learning_rate": 0.00014720032279162083, "loss": 0.0305, "step": 7365 }, { "epoch": 3.4380396732788796, "grad_norm": 0.73046875, "learning_rate": 0.0001471873861878136, "loss": 0.02, "step": 7366 }, { "epoch": 3.4385064177362894, "grad_norm": 0.72265625, "learning_rate": 0.00014717444856800964, "loss": 0.0366, "step": 7367 }, { "epoch": 3.438973162193699, "grad_norm": 0.703125, "learning_rate": 0.0001471615099324874, "loss": 0.0296, "step": 7368 }, { "epoch": 3.4394399066511085, "grad_norm": 0.58984375, "learning_rate": 0.00014714857028152558, "loss": 0.0256, "step": 7369 }, { "epoch": 3.439906651108518, "grad_norm": 0.7421875, "learning_rate": 0.00014713562961540274, "loss": 0.0252, "step": 7370 }, { "epoch": 3.4403733955659277, "grad_norm": 0.5703125, "learning_rate": 0.00014712268793439745, "loss": 0.0226, "step": 7371 }, { "epoch": 3.440840140023337, "grad_norm": 0.97265625, "learning_rate": 0.0001471097452387884, "loss": 0.0541, "step": 7372 }, { "epoch": 3.441306884480747, "grad_norm": 0.77734375, "learning_rate": 0.0001470968015288543, "loss": 0.0298, "step": 7373 }, { "epoch": 3.441773628938156, "grad_norm": 0.8125, "learning_rate": 0.0001470838568048738, "loss": 0.0368, "step": 7374 }, { "epoch": 3.442240373395566, "grad_norm": 0.65625, "learning_rate": 0.00014707091106712562, "loss": 0.021, "step": 7375 }, { "epoch": 3.4427071178529753, "grad_norm": 0.7578125, "learning_rate": 0.0001470579643158885, "loss": 0.0263, "step": 7376 }, { "epoch": 3.443173862310385, "grad_norm": 0.5859375, "learning_rate": 0.0001470450165514412, "loss": 0.0201, "step": 7377 }, { "epoch": 3.4436406067677945, "grad_norm": 0.80078125, "learning_rate": 0.00014703206777406247, "loss": 0.0254, "step": 7378 }, { "epoch": 3.4441073512252043, "grad_norm": 0.7265625, "learning_rate": 0.00014701911798403115, "loss": 0.0253, "step": 7379 }, { "epoch": 3.4445740956826136, "grad_norm": 0.69140625, "learning_rate": 0.00014700616718162608, "loss": 0.0306, "step": 7380 }, { "epoch": 3.4450408401400234, "grad_norm": 0.6171875, "learning_rate": 0.00014699321536712606, "loss": 0.0236, "step": 7381 }, { "epoch": 3.445507584597433, "grad_norm": 0.68359375, "learning_rate": 0.00014698026254080995, "loss": 0.0247, "step": 7382 }, { "epoch": 3.4459743290548426, "grad_norm": 0.6640625, "learning_rate": 0.00014696730870295672, "loss": 0.0331, "step": 7383 }, { "epoch": 3.446441073512252, "grad_norm": 0.9921875, "learning_rate": 0.00014695435385384515, "loss": 0.0304, "step": 7384 }, { "epoch": 3.4469078179696617, "grad_norm": 0.63671875, "learning_rate": 0.00014694139799375431, "loss": 0.019, "step": 7385 }, { "epoch": 3.447374562427071, "grad_norm": 0.61328125, "learning_rate": 0.00014692844112296305, "loss": 0.0284, "step": 7386 }, { "epoch": 3.447841306884481, "grad_norm": 0.8359375, "learning_rate": 0.0001469154832417504, "loss": 0.0395, "step": 7387 }, { "epoch": 3.4483080513418902, "grad_norm": 0.56640625, "learning_rate": 0.00014690252435039532, "loss": 0.0254, "step": 7388 }, { "epoch": 3.4487747957993, "grad_norm": 0.8046875, "learning_rate": 0.0001468895644491769, "loss": 0.0433, "step": 7389 }, { "epoch": 3.4492415402567094, "grad_norm": 0.6484375, "learning_rate": 0.00014687660353837406, "loss": 0.0264, "step": 7390 }, { "epoch": 3.449708284714119, "grad_norm": 1.125, "learning_rate": 0.00014686364161826598, "loss": 0.0552, "step": 7391 }, { "epoch": 3.4501750291715285, "grad_norm": 0.734375, "learning_rate": 0.0001468506786891317, "loss": 0.0311, "step": 7392 }, { "epoch": 3.4506417736289383, "grad_norm": 0.765625, "learning_rate": 0.00014683771475125032, "loss": 0.0316, "step": 7393 }, { "epoch": 3.4511085180863477, "grad_norm": 0.59765625, "learning_rate": 0.00014682474980490096, "loss": 0.0219, "step": 7394 }, { "epoch": 3.4515752625437575, "grad_norm": 0.7734375, "learning_rate": 0.0001468117838503628, "loss": 0.0333, "step": 7395 }, { "epoch": 3.452042007001167, "grad_norm": 0.70703125, "learning_rate": 0.000146798816887915, "loss": 0.019, "step": 7396 }, { "epoch": 3.4525087514585766, "grad_norm": 0.6875, "learning_rate": 0.00014678584891783674, "loss": 0.042, "step": 7397 }, { "epoch": 3.452975495915986, "grad_norm": 0.69921875, "learning_rate": 0.00014677287994040722, "loss": 0.0276, "step": 7398 }, { "epoch": 3.4534422403733958, "grad_norm": 0.70703125, "learning_rate": 0.00014675990995590573, "loss": 0.0363, "step": 7399 }, { "epoch": 3.453908984830805, "grad_norm": 0.69140625, "learning_rate": 0.0001467469389646115, "loss": 0.0273, "step": 7400 }, { "epoch": 3.4543757292882145, "grad_norm": 0.59765625, "learning_rate": 0.0001467339669668038, "loss": 0.0204, "step": 7401 }, { "epoch": 3.4548424737456243, "grad_norm": 0.68359375, "learning_rate": 0.00014672099396276193, "loss": 0.0227, "step": 7402 }, { "epoch": 3.455309218203034, "grad_norm": 0.7109375, "learning_rate": 0.00014670801995276525, "loss": 0.0317, "step": 7403 }, { "epoch": 3.4557759626604434, "grad_norm": 0.75, "learning_rate": 0.00014669504493709305, "loss": 0.0276, "step": 7404 }, { "epoch": 3.4562427071178528, "grad_norm": 0.92578125, "learning_rate": 0.00014668206891602472, "loss": 0.0309, "step": 7405 }, { "epoch": 3.4567094515752625, "grad_norm": 0.58984375, "learning_rate": 0.0001466690918898397, "loss": 0.0227, "step": 7406 }, { "epoch": 3.4571761960326723, "grad_norm": 0.83984375, "learning_rate": 0.00014665611385881729, "loss": 0.0399, "step": 7407 }, { "epoch": 3.4576429404900817, "grad_norm": 0.734375, "learning_rate": 0.000146643134823237, "loss": 0.0275, "step": 7408 }, { "epoch": 3.458109684947491, "grad_norm": 0.734375, "learning_rate": 0.0001466301547833783, "loss": 0.0289, "step": 7409 }, { "epoch": 3.458576429404901, "grad_norm": 0.62890625, "learning_rate": 0.0001466171737395206, "loss": 0.0298, "step": 7410 }, { "epoch": 3.45904317386231, "grad_norm": 0.5625, "learning_rate": 0.0001466041916919434, "loss": 0.0261, "step": 7411 }, { "epoch": 3.45950991831972, "grad_norm": 0.76953125, "learning_rate": 0.0001465912086409263, "loss": 0.0308, "step": 7412 }, { "epoch": 3.4599766627771293, "grad_norm": 0.609375, "learning_rate": 0.00014657822458674874, "loss": 0.0219, "step": 7413 }, { "epoch": 3.460443407234539, "grad_norm": 0.703125, "learning_rate": 0.00014656523952969033, "loss": 0.0301, "step": 7414 }, { "epoch": 3.4609101516919485, "grad_norm": 0.765625, "learning_rate": 0.00014655225347003068, "loss": 0.031, "step": 7415 }, { "epoch": 3.4613768961493583, "grad_norm": 0.60546875, "learning_rate": 0.00014653926640804935, "loss": 0.0244, "step": 7416 }, { "epoch": 3.4618436406067676, "grad_norm": 0.80859375, "learning_rate": 0.00014652627834402596, "loss": 0.0208, "step": 7417 }, { "epoch": 3.4623103850641774, "grad_norm": 0.7890625, "learning_rate": 0.00014651328927824017, "loss": 0.0357, "step": 7418 }, { "epoch": 3.462777129521587, "grad_norm": 0.89453125, "learning_rate": 0.00014650029921097168, "loss": 0.046, "step": 7419 }, { "epoch": 3.4632438739789966, "grad_norm": 0.67578125, "learning_rate": 0.00014648730814250015, "loss": 0.027, "step": 7420 }, { "epoch": 3.463710618436406, "grad_norm": 0.6640625, "learning_rate": 0.00014647431607310528, "loss": 0.0221, "step": 7421 }, { "epoch": 3.4641773628938157, "grad_norm": 0.85546875, "learning_rate": 0.00014646132300306683, "loss": 0.0341, "step": 7422 }, { "epoch": 3.464644107351225, "grad_norm": 0.73828125, "learning_rate": 0.00014644832893266459, "loss": 0.0365, "step": 7423 }, { "epoch": 3.465110851808635, "grad_norm": 0.71875, "learning_rate": 0.00014643533386217824, "loss": 0.0262, "step": 7424 }, { "epoch": 3.4655775962660442, "grad_norm": 0.76953125, "learning_rate": 0.00014642233779188764, "loss": 0.0362, "step": 7425 }, { "epoch": 3.466044340723454, "grad_norm": 0.63671875, "learning_rate": 0.00014640934072207259, "loss": 0.0257, "step": 7426 }, { "epoch": 3.4665110851808634, "grad_norm": 0.84765625, "learning_rate": 0.00014639634265301295, "loss": 0.0383, "step": 7427 }, { "epoch": 3.466977829638273, "grad_norm": 0.73046875, "learning_rate": 0.00014638334358498857, "loss": 0.0212, "step": 7428 }, { "epoch": 3.4674445740956825, "grad_norm": 0.91015625, "learning_rate": 0.00014637034351827936, "loss": 0.0544, "step": 7429 }, { "epoch": 3.4679113185530923, "grad_norm": 0.7265625, "learning_rate": 0.00014635734245316523, "loss": 0.0361, "step": 7430 }, { "epoch": 3.4683780630105017, "grad_norm": 0.66796875, "learning_rate": 0.00014634434038992604, "loss": 0.0291, "step": 7431 }, { "epoch": 3.4688448074679115, "grad_norm": 0.72265625, "learning_rate": 0.00014633133732884179, "loss": 0.0276, "step": 7432 }, { "epoch": 3.469311551925321, "grad_norm": 0.85546875, "learning_rate": 0.0001463183332701924, "loss": 0.0399, "step": 7433 }, { "epoch": 3.4697782963827306, "grad_norm": 0.482421875, "learning_rate": 0.00014630532821425797, "loss": 0.0182, "step": 7434 }, { "epoch": 3.47024504084014, "grad_norm": 0.671875, "learning_rate": 0.00014629232216131842, "loss": 0.0288, "step": 7435 }, { "epoch": 3.4707117852975498, "grad_norm": 0.6015625, "learning_rate": 0.00014627931511165384, "loss": 0.0255, "step": 7436 }, { "epoch": 3.471178529754959, "grad_norm": 0.609375, "learning_rate": 0.0001462663070655442, "loss": 0.0227, "step": 7437 }, { "epoch": 3.471645274212369, "grad_norm": 0.89453125, "learning_rate": 0.0001462532980232697, "loss": 0.0342, "step": 7438 }, { "epoch": 3.4721120186697783, "grad_norm": 0.6484375, "learning_rate": 0.00014624028798511032, "loss": 0.032, "step": 7439 }, { "epoch": 3.472578763127188, "grad_norm": 0.48828125, "learning_rate": 0.00014622727695134626, "loss": 0.0178, "step": 7440 }, { "epoch": 3.4730455075845974, "grad_norm": 0.484375, "learning_rate": 0.00014621426492225764, "loss": 0.0194, "step": 7441 }, { "epoch": 3.473512252042007, "grad_norm": 0.7890625, "learning_rate": 0.0001462012518981246, "loss": 0.0348, "step": 7442 }, { "epoch": 3.4739789964994165, "grad_norm": 0.7109375, "learning_rate": 0.0001461882378792274, "loss": 0.0328, "step": 7443 }, { "epoch": 3.474445740956826, "grad_norm": 0.91015625, "learning_rate": 0.00014617522286584613, "loss": 0.0284, "step": 7444 }, { "epoch": 3.4749124854142357, "grad_norm": 0.546875, "learning_rate": 0.0001461622068582611, "loss": 0.0234, "step": 7445 }, { "epoch": 3.4753792298716455, "grad_norm": 0.64453125, "learning_rate": 0.00014614918985675254, "loss": 0.0325, "step": 7446 }, { "epoch": 3.475845974329055, "grad_norm": 0.73046875, "learning_rate": 0.00014613617186160071, "loss": 0.0359, "step": 7447 }, { "epoch": 3.476312718786464, "grad_norm": 0.703125, "learning_rate": 0.00014612315287308595, "loss": 0.0314, "step": 7448 }, { "epoch": 3.476779463243874, "grad_norm": 0.55078125, "learning_rate": 0.0001461101328914885, "loss": 0.0236, "step": 7449 }, { "epoch": 3.477246207701284, "grad_norm": 0.6796875, "learning_rate": 0.0001460971119170887, "loss": 0.0249, "step": 7450 }, { "epoch": 3.477712952158693, "grad_norm": 0.8203125, "learning_rate": 0.000146084089950167, "loss": 0.0318, "step": 7451 }, { "epoch": 3.4781796966161025, "grad_norm": 0.421875, "learning_rate": 0.00014607106699100365, "loss": 0.0182, "step": 7452 }, { "epoch": 3.4786464410735123, "grad_norm": 0.71484375, "learning_rate": 0.00014605804303987916, "loss": 0.0338, "step": 7453 }, { "epoch": 3.4791131855309216, "grad_norm": 0.83984375, "learning_rate": 0.0001460450180970739, "loss": 0.031, "step": 7454 }, { "epoch": 3.4795799299883314, "grad_norm": 1.046875, "learning_rate": 0.0001460319921628683, "loss": 0.035, "step": 7455 }, { "epoch": 3.480046674445741, "grad_norm": 0.80859375, "learning_rate": 0.00014601896523754282, "loss": 0.0326, "step": 7456 }, { "epoch": 3.4805134189031506, "grad_norm": 0.90234375, "learning_rate": 0.00014600593732137798, "loss": 0.0326, "step": 7457 }, { "epoch": 3.48098016336056, "grad_norm": 0.73828125, "learning_rate": 0.00014599290841465424, "loss": 0.0271, "step": 7458 }, { "epoch": 3.4814469078179697, "grad_norm": 0.68359375, "learning_rate": 0.00014597987851765215, "loss": 0.0326, "step": 7459 }, { "epoch": 3.481913652275379, "grad_norm": 0.8046875, "learning_rate": 0.0001459668476306523, "loss": 0.0387, "step": 7460 }, { "epoch": 3.482380396732789, "grad_norm": 0.78515625, "learning_rate": 0.00014595381575393518, "loss": 0.0335, "step": 7461 }, { "epoch": 3.4828471411901982, "grad_norm": 0.8046875, "learning_rate": 0.00014594078288778146, "loss": 0.0436, "step": 7462 }, { "epoch": 3.483313885647608, "grad_norm": 0.5, "learning_rate": 0.00014592774903247167, "loss": 0.0149, "step": 7463 }, { "epoch": 3.4837806301050174, "grad_norm": 0.71484375, "learning_rate": 0.0001459147141882865, "loss": 0.0345, "step": 7464 }, { "epoch": 3.484247374562427, "grad_norm": 0.734375, "learning_rate": 0.0001459016783555066, "loss": 0.0318, "step": 7465 }, { "epoch": 3.4847141190198365, "grad_norm": 0.6640625, "learning_rate": 0.0001458886415344126, "loss": 0.0323, "step": 7466 }, { "epoch": 3.4851808634772463, "grad_norm": 0.7890625, "learning_rate": 0.0001458756037252853, "loss": 0.0335, "step": 7467 }, { "epoch": 3.4856476079346557, "grad_norm": 0.6953125, "learning_rate": 0.00014586256492840532, "loss": 0.0239, "step": 7468 }, { "epoch": 3.4861143523920655, "grad_norm": 0.76953125, "learning_rate": 0.00014584952514405344, "loss": 0.0222, "step": 7469 }, { "epoch": 3.486581096849475, "grad_norm": 0.95703125, "learning_rate": 0.00014583648437251035, "loss": 0.0337, "step": 7470 }, { "epoch": 3.4870478413068846, "grad_norm": 0.734375, "learning_rate": 0.00014582344261405695, "loss": 0.0331, "step": 7471 }, { "epoch": 3.487514585764294, "grad_norm": 0.6171875, "learning_rate": 0.000145810399868974, "loss": 0.023, "step": 7472 }, { "epoch": 3.4879813302217038, "grad_norm": 0.75, "learning_rate": 0.00014579735613754228, "loss": 0.0327, "step": 7473 }, { "epoch": 3.488448074679113, "grad_norm": 0.6328125, "learning_rate": 0.0001457843114200427, "loss": 0.0232, "step": 7474 }, { "epoch": 3.488914819136523, "grad_norm": 0.78515625, "learning_rate": 0.00014577126571675608, "loss": 0.0292, "step": 7475 }, { "epoch": 3.4893815635939323, "grad_norm": 0.703125, "learning_rate": 0.0001457582190279633, "loss": 0.0265, "step": 7476 }, { "epoch": 3.489848308051342, "grad_norm": 0.462890625, "learning_rate": 0.00014574517135394538, "loss": 0.0134, "step": 7477 }, { "epoch": 3.4903150525087514, "grad_norm": 0.72265625, "learning_rate": 0.00014573212269498311, "loss": 0.0288, "step": 7478 }, { "epoch": 3.490781796966161, "grad_norm": 0.87890625, "learning_rate": 0.0001457190730513575, "loss": 0.0334, "step": 7479 }, { "epoch": 3.4912485414235706, "grad_norm": 0.6796875, "learning_rate": 0.0001457060224233495, "loss": 0.0252, "step": 7480 }, { "epoch": 3.4917152858809803, "grad_norm": 1.0546875, "learning_rate": 0.00014569297081124013, "loss": 0.0461, "step": 7481 }, { "epoch": 3.4921820303383897, "grad_norm": 0.734375, "learning_rate": 0.0001456799182153104, "loss": 0.0323, "step": 7482 }, { "epoch": 3.4926487747957995, "grad_norm": 0.6875, "learning_rate": 0.00014566686463584138, "loss": 0.0266, "step": 7483 }, { "epoch": 3.493115519253209, "grad_norm": 0.73828125, "learning_rate": 0.0001456538100731141, "loss": 0.0288, "step": 7484 }, { "epoch": 3.4935822637106186, "grad_norm": 0.66015625, "learning_rate": 0.00014564075452740962, "loss": 0.02, "step": 7485 }, { "epoch": 3.494049008168028, "grad_norm": 0.7734375, "learning_rate": 0.00014562769799900902, "loss": 0.0273, "step": 7486 }, { "epoch": 3.4945157526254373, "grad_norm": 0.62890625, "learning_rate": 0.00014561464048819348, "loss": 0.0263, "step": 7487 }, { "epoch": 3.494982497082847, "grad_norm": 0.78125, "learning_rate": 0.00014560158199524413, "loss": 0.0291, "step": 7488 }, { "epoch": 3.495449241540257, "grad_norm": 0.72265625, "learning_rate": 0.00014558852252044213, "loss": 0.0281, "step": 7489 }, { "epoch": 3.4959159859976663, "grad_norm": 0.7890625, "learning_rate": 0.00014557546206406865, "loss": 0.0245, "step": 7490 }, { "epoch": 3.4963827304550756, "grad_norm": 0.921875, "learning_rate": 0.0001455624006264049, "loss": 0.0385, "step": 7491 }, { "epoch": 3.4968494749124854, "grad_norm": 0.703125, "learning_rate": 0.0001455493382077321, "loss": 0.0242, "step": 7492 }, { "epoch": 3.4973162193698952, "grad_norm": 0.7421875, "learning_rate": 0.00014553627480833154, "loss": 0.0236, "step": 7493 }, { "epoch": 3.4977829638273046, "grad_norm": 0.6640625, "learning_rate": 0.00014552321042848445, "loss": 0.0248, "step": 7494 }, { "epoch": 3.498249708284714, "grad_norm": 0.71484375, "learning_rate": 0.00014551014506847214, "loss": 0.0303, "step": 7495 }, { "epoch": 3.4987164527421237, "grad_norm": 0.8203125, "learning_rate": 0.00014549707872857587, "loss": 0.0388, "step": 7496 }, { "epoch": 3.499183197199533, "grad_norm": 0.76953125, "learning_rate": 0.00014548401140907706, "loss": 0.0213, "step": 7497 }, { "epoch": 3.499649941656943, "grad_norm": 0.71875, "learning_rate": 0.000145470943110257, "loss": 0.0293, "step": 7498 }, { "epoch": 3.5001166861143522, "grad_norm": 0.62109375, "learning_rate": 0.00014545787383239705, "loss": 0.019, "step": 7499 }, { "epoch": 3.500583430571762, "grad_norm": 0.765625, "learning_rate": 0.00014544480357577873, "loss": 0.0295, "step": 7500 }, { "epoch": 3.5010501750291714, "grad_norm": 0.578125, "learning_rate": 0.0001454317323406833, "loss": 0.0212, "step": 7501 }, { "epoch": 3.501516919486581, "grad_norm": 0.953125, "learning_rate": 0.00014541866012739228, "loss": 0.0343, "step": 7502 }, { "epoch": 3.5019836639439905, "grad_norm": 0.5859375, "learning_rate": 0.00014540558693618713, "loss": 0.0312, "step": 7503 }, { "epoch": 3.5024504084014003, "grad_norm": 0.66015625, "learning_rate": 0.0001453925127673493, "loss": 0.0262, "step": 7504 }, { "epoch": 3.5024504084014003, "eval_loss": 1.7928109169006348, "eval_runtime": 58.9301, "eval_samples_per_second": 30.613, "eval_steps_per_second": 3.835, "step": 7504 }, { "epoch": 3.5029171528588097, "grad_norm": 0.76953125, "learning_rate": 0.0001453794376211603, "loss": 0.0311, "step": 7505 }, { "epoch": 3.5033838973162195, "grad_norm": 0.5, "learning_rate": 0.00014536636149790167, "loss": 0.0155, "step": 7506 }, { "epoch": 3.503850641773629, "grad_norm": 0.765625, "learning_rate": 0.00014535328439785493, "loss": 0.029, "step": 7507 }, { "epoch": 3.5043173862310386, "grad_norm": 0.62890625, "learning_rate": 0.00014534020632130166, "loss": 0.0221, "step": 7508 }, { "epoch": 3.504784130688448, "grad_norm": 0.80078125, "learning_rate": 0.00014532712726852344, "loss": 0.0353, "step": 7509 }, { "epoch": 3.5052508751458578, "grad_norm": 0.765625, "learning_rate": 0.0001453140472398019, "loss": 0.023, "step": 7510 }, { "epoch": 3.505717619603267, "grad_norm": 0.78125, "learning_rate": 0.00014530096623541862, "loss": 0.0211, "step": 7511 }, { "epoch": 3.506184364060677, "grad_norm": 0.7578125, "learning_rate": 0.00014528788425565526, "loss": 0.0299, "step": 7512 }, { "epoch": 3.5066511085180863, "grad_norm": 0.71875, "learning_rate": 0.00014527480130079355, "loss": 0.0202, "step": 7513 }, { "epoch": 3.507117852975496, "grad_norm": 0.7265625, "learning_rate": 0.0001452617173711151, "loss": 0.0229, "step": 7514 }, { "epoch": 3.5075845974329054, "grad_norm": 0.73828125, "learning_rate": 0.00014524863246690166, "loss": 0.0286, "step": 7515 }, { "epoch": 3.508051341890315, "grad_norm": 0.8828125, "learning_rate": 0.00014523554658843498, "loss": 0.0414, "step": 7516 }, { "epoch": 3.5085180863477246, "grad_norm": 0.7109375, "learning_rate": 0.0001452224597359968, "loss": 0.0301, "step": 7517 }, { "epoch": 3.5089848308051343, "grad_norm": 0.81640625, "learning_rate": 0.00014520937190986883, "loss": 0.0356, "step": 7518 }, { "epoch": 3.5094515752625437, "grad_norm": 0.59375, "learning_rate": 0.00014519628311033297, "loss": 0.02, "step": 7519 }, { "epoch": 3.5099183197199535, "grad_norm": 0.6484375, "learning_rate": 0.000145183193337671, "loss": 0.0299, "step": 7520 }, { "epoch": 3.510385064177363, "grad_norm": 0.72265625, "learning_rate": 0.00014517010259216474, "loss": 0.0228, "step": 7521 }, { "epoch": 3.510851808634772, "grad_norm": 0.7109375, "learning_rate": 0.000145157010874096, "loss": 0.0177, "step": 7522 }, { "epoch": 3.511318553092182, "grad_norm": 1.046875, "learning_rate": 0.00014514391818374676, "loss": 0.0455, "step": 7523 }, { "epoch": 3.511785297549592, "grad_norm": 0.82421875, "learning_rate": 0.00014513082452139887, "loss": 0.0426, "step": 7524 }, { "epoch": 3.512252042007001, "grad_norm": 0.78125, "learning_rate": 0.00014511772988733422, "loss": 0.0281, "step": 7525 }, { "epoch": 3.5127187864644105, "grad_norm": 0.5859375, "learning_rate": 0.00014510463428183486, "loss": 0.0152, "step": 7526 }, { "epoch": 3.5131855309218203, "grad_norm": 0.828125, "learning_rate": 0.00014509153770518265, "loss": 0.0457, "step": 7527 }, { "epoch": 3.51365227537923, "grad_norm": 0.59765625, "learning_rate": 0.00014507844015765957, "loss": 0.0162, "step": 7528 }, { "epoch": 3.5141190198366394, "grad_norm": 0.62890625, "learning_rate": 0.00014506534163954767, "loss": 0.0182, "step": 7529 }, { "epoch": 3.514585764294049, "grad_norm": 0.8046875, "learning_rate": 0.000145052242151129, "loss": 0.0266, "step": 7530 }, { "epoch": 3.5150525087514586, "grad_norm": 0.6953125, "learning_rate": 0.00014503914169268552, "loss": 0.0282, "step": 7531 }, { "epoch": 3.5155192532088684, "grad_norm": 0.5546875, "learning_rate": 0.00014502604026449934, "loss": 0.0182, "step": 7532 }, { "epoch": 3.5159859976662777, "grad_norm": 0.7734375, "learning_rate": 0.0001450129378668526, "loss": 0.038, "step": 7533 }, { "epoch": 3.516452742123687, "grad_norm": 0.5546875, "learning_rate": 0.00014499983450002736, "loss": 0.0241, "step": 7534 }, { "epoch": 3.516919486581097, "grad_norm": 0.66015625, "learning_rate": 0.00014498673016430572, "loss": 0.0284, "step": 7535 }, { "epoch": 3.5173862310385067, "grad_norm": 0.57421875, "learning_rate": 0.0001449736248599699, "loss": 0.0178, "step": 7536 }, { "epoch": 3.517852975495916, "grad_norm": 1.3359375, "learning_rate": 0.00014496051858730203, "loss": 0.0385, "step": 7537 }, { "epoch": 3.5183197199533254, "grad_norm": 0.8671875, "learning_rate": 0.0001449474113465843, "loss": 0.0447, "step": 7538 }, { "epoch": 3.518786464410735, "grad_norm": 0.63671875, "learning_rate": 0.00014493430313809895, "loss": 0.0234, "step": 7539 }, { "epoch": 3.519253208868145, "grad_norm": 0.61328125, "learning_rate": 0.00014492119396212818, "loss": 0.0229, "step": 7540 }, { "epoch": 3.5197199533255543, "grad_norm": 0.7109375, "learning_rate": 0.00014490808381895426, "loss": 0.0223, "step": 7541 }, { "epoch": 3.5201866977829637, "grad_norm": 0.58203125, "learning_rate": 0.0001448949727088595, "loss": 0.0269, "step": 7542 }, { "epoch": 3.5206534422403735, "grad_norm": 0.828125, "learning_rate": 0.00014488186063212614, "loss": 0.0293, "step": 7543 }, { "epoch": 3.521120186697783, "grad_norm": 0.67578125, "learning_rate": 0.00014486874758903656, "loss": 0.0301, "step": 7544 }, { "epoch": 3.5215869311551926, "grad_norm": 0.7265625, "learning_rate": 0.000144855633579873, "loss": 0.0226, "step": 7545 }, { "epoch": 3.522053675612602, "grad_norm": 0.76171875, "learning_rate": 0.00014484251860491791, "loss": 0.0388, "step": 7546 }, { "epoch": 3.5225204200700118, "grad_norm": 0.578125, "learning_rate": 0.00014482940266445367, "loss": 0.0151, "step": 7547 }, { "epoch": 3.522987164527421, "grad_norm": 0.8203125, "learning_rate": 0.0001448162857587626, "loss": 0.0248, "step": 7548 }, { "epoch": 3.523453908984831, "grad_norm": 0.65234375, "learning_rate": 0.00014480316788812722, "loss": 0.0203, "step": 7549 }, { "epoch": 3.5239206534422403, "grad_norm": 0.5390625, "learning_rate": 0.00014479004905282991, "loss": 0.021, "step": 7550 }, { "epoch": 3.52438739789965, "grad_norm": 0.87890625, "learning_rate": 0.0001447769292531531, "loss": 0.0394, "step": 7551 }, { "epoch": 3.5248541423570594, "grad_norm": 0.6328125, "learning_rate": 0.0001447638084893794, "loss": 0.0218, "step": 7552 }, { "epoch": 3.525320886814469, "grad_norm": 0.56640625, "learning_rate": 0.0001447506867617912, "loss": 0.0152, "step": 7553 }, { "epoch": 3.5257876312718786, "grad_norm": 0.6015625, "learning_rate": 0.00014473756407067104, "loss": 0.0245, "step": 7554 }, { "epoch": 3.5262543757292883, "grad_norm": 0.76953125, "learning_rate": 0.00014472444041630154, "loss": 0.0237, "step": 7555 }, { "epoch": 3.5267211201866977, "grad_norm": 0.6171875, "learning_rate": 0.00014471131579896516, "loss": 0.026, "step": 7556 }, { "epoch": 3.5271878646441075, "grad_norm": 0.78515625, "learning_rate": 0.00014469819021894456, "loss": 0.0335, "step": 7557 }, { "epoch": 3.527654609101517, "grad_norm": 0.6484375, "learning_rate": 0.00014468506367652236, "loss": 0.0219, "step": 7558 }, { "epoch": 3.5281213535589266, "grad_norm": 0.78125, "learning_rate": 0.00014467193617198113, "loss": 0.0249, "step": 7559 }, { "epoch": 3.528588098016336, "grad_norm": 0.546875, "learning_rate": 0.00014465880770560358, "loss": 0.0142, "step": 7560 }, { "epoch": 3.529054842473746, "grad_norm": 0.72265625, "learning_rate": 0.00014464567827767233, "loss": 0.0303, "step": 7561 }, { "epoch": 3.529521586931155, "grad_norm": 0.84375, "learning_rate": 0.0001446325478884701, "loss": 0.0366, "step": 7562 }, { "epoch": 3.529988331388565, "grad_norm": 0.48828125, "learning_rate": 0.00014461941653827958, "loss": 0.0149, "step": 7563 }, { "epoch": 3.5304550758459743, "grad_norm": 0.5859375, "learning_rate": 0.00014460628422738352, "loss": 0.0175, "step": 7564 }, { "epoch": 3.5309218203033836, "grad_norm": 0.78515625, "learning_rate": 0.00014459315095606467, "loss": 0.0238, "step": 7565 }, { "epoch": 3.5313885647607934, "grad_norm": 0.8671875, "learning_rate": 0.0001445800167246058, "loss": 0.0333, "step": 7566 }, { "epoch": 3.5318553092182032, "grad_norm": 0.74609375, "learning_rate": 0.0001445668815332897, "loss": 0.0273, "step": 7567 }, { "epoch": 3.5323220536756126, "grad_norm": 0.6875, "learning_rate": 0.0001445537453823992, "loss": 0.023, "step": 7568 }, { "epoch": 3.532788798133022, "grad_norm": 0.69921875, "learning_rate": 0.00014454060827221715, "loss": 0.0316, "step": 7569 }, { "epoch": 3.5332555425904317, "grad_norm": 0.90234375, "learning_rate": 0.00014452747020302637, "loss": 0.0328, "step": 7570 }, { "epoch": 3.5337222870478415, "grad_norm": 0.578125, "learning_rate": 0.00014451433117510973, "loss": 0.024, "step": 7571 }, { "epoch": 3.534189031505251, "grad_norm": 2.078125, "learning_rate": 0.0001445011911887502, "loss": 0.0451, "step": 7572 }, { "epoch": 3.5346557759626602, "grad_norm": 0.5859375, "learning_rate": 0.00014448805024423059, "loss": 0.0202, "step": 7573 }, { "epoch": 3.53512252042007, "grad_norm": 0.625, "learning_rate": 0.00014447490834183395, "loss": 0.0252, "step": 7574 }, { "epoch": 3.53558926487748, "grad_norm": 0.7421875, "learning_rate": 0.0001444617654818432, "loss": 0.0333, "step": 7575 }, { "epoch": 3.536056009334889, "grad_norm": 0.546875, "learning_rate": 0.00014444862166454125, "loss": 0.0201, "step": 7576 }, { "epoch": 3.5365227537922985, "grad_norm": 0.60546875, "learning_rate": 0.00014443547689021118, "loss": 0.0261, "step": 7577 }, { "epoch": 3.5369894982497083, "grad_norm": 0.8671875, "learning_rate": 0.00014442233115913598, "loss": 0.0286, "step": 7578 }, { "epoch": 3.537456242707118, "grad_norm": 0.703125, "learning_rate": 0.00014440918447159871, "loss": 0.0273, "step": 7579 }, { "epoch": 3.5379229871645275, "grad_norm": 0.82421875, "learning_rate": 0.00014439603682788244, "loss": 0.0401, "step": 7580 }, { "epoch": 3.538389731621937, "grad_norm": 0.79296875, "learning_rate": 0.00014438288822827023, "loss": 0.0271, "step": 7581 }, { "epoch": 3.5388564760793466, "grad_norm": 0.64453125, "learning_rate": 0.0001443697386730452, "loss": 0.021, "step": 7582 }, { "epoch": 3.539323220536756, "grad_norm": 0.6875, "learning_rate": 0.00014435658816249047, "loss": 0.0247, "step": 7583 }, { "epoch": 3.5397899649941658, "grad_norm": 0.58984375, "learning_rate": 0.00014434343669688916, "loss": 0.0252, "step": 7584 }, { "epoch": 3.540256709451575, "grad_norm": 0.75390625, "learning_rate": 0.00014433028427652446, "loss": 0.0309, "step": 7585 }, { "epoch": 3.540723453908985, "grad_norm": 0.6953125, "learning_rate": 0.00014431713090167959, "loss": 0.0333, "step": 7586 }, { "epoch": 3.5411901983663943, "grad_norm": 0.74609375, "learning_rate": 0.00014430397657263767, "loss": 0.0334, "step": 7587 }, { "epoch": 3.541656942823804, "grad_norm": 0.70703125, "learning_rate": 0.000144290821289682, "loss": 0.0283, "step": 7588 }, { "epoch": 3.5421236872812134, "grad_norm": 0.58984375, "learning_rate": 0.00014427766505309585, "loss": 0.0259, "step": 7589 }, { "epoch": 3.542590431738623, "grad_norm": 0.5703125, "learning_rate": 0.00014426450786316237, "loss": 0.0172, "step": 7590 }, { "epoch": 3.5430571761960326, "grad_norm": 0.65625, "learning_rate": 0.00014425134972016496, "loss": 0.0261, "step": 7591 }, { "epoch": 3.5435239206534423, "grad_norm": 0.64453125, "learning_rate": 0.0001442381906243869, "loss": 0.0186, "step": 7592 }, { "epoch": 3.5439906651108517, "grad_norm": 0.71875, "learning_rate": 0.0001442250305761115, "loss": 0.0284, "step": 7593 }, { "epoch": 3.5444574095682615, "grad_norm": 0.7734375, "learning_rate": 0.0001442118695756221, "loss": 0.027, "step": 7594 }, { "epoch": 3.544924154025671, "grad_norm": 0.609375, "learning_rate": 0.00014419870762320214, "loss": 0.018, "step": 7595 }, { "epoch": 3.5453908984830806, "grad_norm": 0.5625, "learning_rate": 0.00014418554471913494, "loss": 0.0314, "step": 7596 }, { "epoch": 3.54585764294049, "grad_norm": 0.66796875, "learning_rate": 0.00014417238086370397, "loss": 0.0243, "step": 7597 }, { "epoch": 3.5463243873979, "grad_norm": 0.66015625, "learning_rate": 0.0001441592160571926, "loss": 0.0294, "step": 7598 }, { "epoch": 3.546791131855309, "grad_norm": 0.65625, "learning_rate": 0.00014414605029988432, "loss": 0.0241, "step": 7599 }, { "epoch": 3.547257876312719, "grad_norm": 0.46484375, "learning_rate": 0.00014413288359206258, "loss": 0.0143, "step": 7600 }, { "epoch": 3.5477246207701283, "grad_norm": 0.97265625, "learning_rate": 0.0001441197159340109, "loss": 0.0208, "step": 7601 }, { "epoch": 3.548191365227538, "grad_norm": 0.6875, "learning_rate": 0.00014410654732601282, "loss": 0.0182, "step": 7602 }, { "epoch": 3.5486581096849474, "grad_norm": 0.76953125, "learning_rate": 0.00014409337776835176, "loss": 0.0247, "step": 7603 }, { "epoch": 3.549124854142357, "grad_norm": 0.6640625, "learning_rate": 0.00014408020726131143, "loss": 0.0298, "step": 7604 }, { "epoch": 3.5495915985997666, "grad_norm": 0.6171875, "learning_rate": 0.0001440670358051753, "loss": 0.022, "step": 7605 }, { "epoch": 3.5500583430571764, "grad_norm": 0.828125, "learning_rate": 0.00014405386340022696, "loss": 0.0193, "step": 7606 }, { "epoch": 3.5505250875145857, "grad_norm": 0.59765625, "learning_rate": 0.0001440406900467501, "loss": 0.0183, "step": 7607 }, { "epoch": 3.550991831971995, "grad_norm": 0.80078125, "learning_rate": 0.00014402751574502833, "loss": 0.0197, "step": 7608 }, { "epoch": 3.551458576429405, "grad_norm": 0.7421875, "learning_rate": 0.0001440143404953453, "loss": 0.0279, "step": 7609 }, { "epoch": 3.5519253208868147, "grad_norm": 0.71875, "learning_rate": 0.00014400116429798466, "loss": 0.0292, "step": 7610 }, { "epoch": 3.552392065344224, "grad_norm": 0.71875, "learning_rate": 0.00014398798715323013, "loss": 0.0235, "step": 7611 }, { "epoch": 3.5528588098016334, "grad_norm": 0.91796875, "learning_rate": 0.00014397480906136546, "loss": 0.0454, "step": 7612 }, { "epoch": 3.553325554259043, "grad_norm": 0.83984375, "learning_rate": 0.00014396163002267432, "loss": 0.0258, "step": 7613 }, { "epoch": 3.553792298716453, "grad_norm": 0.796875, "learning_rate": 0.00014394845003744053, "loss": 0.0327, "step": 7614 }, { "epoch": 3.5542590431738623, "grad_norm": 1.03125, "learning_rate": 0.00014393526910594785, "loss": 0.0373, "step": 7615 }, { "epoch": 3.5547257876312717, "grad_norm": 0.8515625, "learning_rate": 0.00014392208722848008, "loss": 0.0298, "step": 7616 }, { "epoch": 3.5551925320886815, "grad_norm": 0.63671875, "learning_rate": 0.00014390890440532105, "loss": 0.019, "step": 7617 }, { "epoch": 3.5556592765460913, "grad_norm": 0.796875, "learning_rate": 0.0001438957206367546, "loss": 0.0375, "step": 7618 }, { "epoch": 3.5561260210035006, "grad_norm": 0.52734375, "learning_rate": 0.00014388253592306454, "loss": 0.0162, "step": 7619 }, { "epoch": 3.55659276546091, "grad_norm": 0.53125, "learning_rate": 0.0001438693502645348, "loss": 0.0157, "step": 7620 }, { "epoch": 3.5570595099183198, "grad_norm": 0.59765625, "learning_rate": 0.00014385616366144931, "loss": 0.0206, "step": 7621 }, { "epoch": 3.5575262543757296, "grad_norm": 0.6015625, "learning_rate": 0.00014384297611409194, "loss": 0.0226, "step": 7622 }, { "epoch": 3.557992998833139, "grad_norm": 0.52734375, "learning_rate": 0.00014382978762274664, "loss": 0.0214, "step": 7623 }, { "epoch": 3.5584597432905483, "grad_norm": 0.72265625, "learning_rate": 0.00014381659818769738, "loss": 0.0252, "step": 7624 }, { "epoch": 3.558926487747958, "grad_norm": 0.6953125, "learning_rate": 0.00014380340780922816, "loss": 0.0221, "step": 7625 }, { "epoch": 3.5593932322053674, "grad_norm": 0.91015625, "learning_rate": 0.00014379021648762297, "loss": 0.0252, "step": 7626 }, { "epoch": 3.559859976662777, "grad_norm": 0.8515625, "learning_rate": 0.00014377702422316583, "loss": 0.0336, "step": 7627 }, { "epoch": 3.5603267211201866, "grad_norm": 0.49609375, "learning_rate": 0.0001437638310161408, "loss": 0.0125, "step": 7628 }, { "epoch": 3.5607934655775964, "grad_norm": 0.90234375, "learning_rate": 0.00014375063686683187, "loss": 0.0376, "step": 7629 }, { "epoch": 3.5612602100350057, "grad_norm": 0.6875, "learning_rate": 0.00014373744177552324, "loss": 0.0295, "step": 7630 }, { "epoch": 3.5617269544924155, "grad_norm": 0.87890625, "learning_rate": 0.00014372424574249896, "loss": 0.0346, "step": 7631 }, { "epoch": 3.562193698949825, "grad_norm": 0.76171875, "learning_rate": 0.00014371104876804313, "loss": 0.0277, "step": 7632 }, { "epoch": 3.5626604434072346, "grad_norm": 0.59375, "learning_rate": 0.00014369785085243994, "loss": 0.0292, "step": 7633 }, { "epoch": 3.563127187864644, "grad_norm": 0.70703125, "learning_rate": 0.00014368465199597356, "loss": 0.0207, "step": 7634 }, { "epoch": 3.563593932322054, "grad_norm": 0.56640625, "learning_rate": 0.00014367145219892812, "loss": 0.0237, "step": 7635 }, { "epoch": 3.564060676779463, "grad_norm": 0.62890625, "learning_rate": 0.00014365825146158787, "loss": 0.0153, "step": 7636 }, { "epoch": 3.564527421236873, "grad_norm": 0.60546875, "learning_rate": 0.00014364504978423703, "loss": 0.0218, "step": 7637 }, { "epoch": 3.5649941656942823, "grad_norm": 0.60546875, "learning_rate": 0.00014363184716715985, "loss": 0.0222, "step": 7638 }, { "epoch": 3.565460910151692, "grad_norm": 0.9375, "learning_rate": 0.00014361864361064056, "loss": 0.0316, "step": 7639 }, { "epoch": 3.5659276546091014, "grad_norm": 0.74609375, "learning_rate": 0.00014360543911496348, "loss": 0.029, "step": 7640 }, { "epoch": 3.5663943990665112, "grad_norm": 0.77734375, "learning_rate": 0.00014359223368041297, "loss": 0.0272, "step": 7641 }, { "epoch": 3.5668611435239206, "grad_norm": 0.82421875, "learning_rate": 0.00014357902730727327, "loss": 0.0366, "step": 7642 }, { "epoch": 3.5673278879813304, "grad_norm": 0.6875, "learning_rate": 0.00014356581999582874, "loss": 0.0205, "step": 7643 }, { "epoch": 3.5677946324387397, "grad_norm": 0.546875, "learning_rate": 0.0001435526117463638, "loss": 0.0216, "step": 7644 }, { "epoch": 3.5682613768961495, "grad_norm": 0.83984375, "learning_rate": 0.00014353940255916277, "loss": 0.0286, "step": 7645 }, { "epoch": 3.568728121353559, "grad_norm": 0.7421875, "learning_rate": 0.00014352619243451013, "loss": 0.0354, "step": 7646 }, { "epoch": 3.5691948658109682, "grad_norm": 0.8984375, "learning_rate": 0.00014351298137269027, "loss": 0.0377, "step": 7647 }, { "epoch": 3.569661610268378, "grad_norm": 0.55859375, "learning_rate": 0.00014349976937398764, "loss": 0.0194, "step": 7648 }, { "epoch": 3.570128354725788, "grad_norm": 1.0390625, "learning_rate": 0.00014348655643868667, "loss": 0.0248, "step": 7649 }, { "epoch": 3.570595099183197, "grad_norm": 0.72265625, "learning_rate": 0.00014347334256707193, "loss": 0.0293, "step": 7650 }, { "epoch": 3.5710618436406065, "grad_norm": 0.515625, "learning_rate": 0.0001434601277594279, "loss": 0.019, "step": 7651 }, { "epoch": 3.5715285880980163, "grad_norm": 0.65234375, "learning_rate": 0.00014344691201603906, "loss": 0.0238, "step": 7652 }, { "epoch": 3.571995332555426, "grad_norm": 0.62890625, "learning_rate": 0.00014343369533719003, "loss": 0.0217, "step": 7653 }, { "epoch": 3.5724620770128355, "grad_norm": 0.671875, "learning_rate": 0.00014342047772316537, "loss": 0.0251, "step": 7654 }, { "epoch": 3.572928821470245, "grad_norm": 0.6953125, "learning_rate": 0.0001434072591742496, "loss": 0.0231, "step": 7655 }, { "epoch": 3.5733955659276546, "grad_norm": 0.70703125, "learning_rate": 0.0001433940396907274, "loss": 0.0253, "step": 7656 }, { "epoch": 3.5738623103850644, "grad_norm": 0.67578125, "learning_rate": 0.0001433808192728834, "loss": 0.0285, "step": 7657 }, { "epoch": 3.5743290548424738, "grad_norm": 0.6796875, "learning_rate": 0.0001433675979210022, "loss": 0.0216, "step": 7658 }, { "epoch": 3.574795799299883, "grad_norm": 0.68359375, "learning_rate": 0.00014335437563536848, "loss": 0.0197, "step": 7659 }, { "epoch": 3.575262543757293, "grad_norm": 0.76953125, "learning_rate": 0.00014334115241626698, "loss": 0.0297, "step": 7660 }, { "epoch": 3.5757292882147027, "grad_norm": 0.73828125, "learning_rate": 0.00014332792826398237, "loss": 0.0307, "step": 7661 }, { "epoch": 3.576196032672112, "grad_norm": 0.8984375, "learning_rate": 0.0001433147031787994, "loss": 0.0462, "step": 7662 }, { "epoch": 3.5766627771295214, "grad_norm": 0.6875, "learning_rate": 0.00014330147716100282, "loss": 0.0286, "step": 7663 }, { "epoch": 3.577129521586931, "grad_norm": 0.81640625, "learning_rate": 0.00014328825021087737, "loss": 0.0333, "step": 7664 }, { "epoch": 3.577596266044341, "grad_norm": 0.478515625, "learning_rate": 0.00014327502232870787, "loss": 0.0168, "step": 7665 }, { "epoch": 3.5780630105017504, "grad_norm": 0.5234375, "learning_rate": 0.00014326179351477914, "loss": 0.0206, "step": 7666 }, { "epoch": 3.5785297549591597, "grad_norm": 0.84375, "learning_rate": 0.000143248563769376, "loss": 0.0362, "step": 7667 }, { "epoch": 3.5789964994165695, "grad_norm": 0.7109375, "learning_rate": 0.00014323533309278328, "loss": 0.0223, "step": 7668 }, { "epoch": 3.579463243873979, "grad_norm": 0.59765625, "learning_rate": 0.00014322210148528586, "loss": 0.0261, "step": 7669 }, { "epoch": 3.5799299883313886, "grad_norm": 0.75390625, "learning_rate": 0.00014320886894716867, "loss": 0.0268, "step": 7670 }, { "epoch": 3.580396732788798, "grad_norm": 0.71484375, "learning_rate": 0.00014319563547871656, "loss": 0.0244, "step": 7671 }, { "epoch": 3.580863477246208, "grad_norm": 0.73046875, "learning_rate": 0.00014318240108021453, "loss": 0.0227, "step": 7672 }, { "epoch": 3.581330221703617, "grad_norm": 0.640625, "learning_rate": 0.00014316916575194744, "loss": 0.0289, "step": 7673 }, { "epoch": 3.581796966161027, "grad_norm": 0.56640625, "learning_rate": 0.0001431559294942004, "loss": 0.0243, "step": 7674 }, { "epoch": 3.5822637106184363, "grad_norm": 0.61328125, "learning_rate": 0.0001431426923072582, "loss": 0.0215, "step": 7675 }, { "epoch": 3.582730455075846, "grad_norm": 0.83203125, "learning_rate": 0.00014312945419140603, "loss": 0.037, "step": 7676 }, { "epoch": 3.5831971995332554, "grad_norm": 0.66015625, "learning_rate": 0.00014311621514692888, "loss": 0.0298, "step": 7677 }, { "epoch": 3.5836639439906652, "grad_norm": 0.69140625, "learning_rate": 0.00014310297517411172, "loss": 0.0289, "step": 7678 }, { "epoch": 3.5841306884480746, "grad_norm": 0.7421875, "learning_rate": 0.00014308973427323973, "loss": 0.0288, "step": 7679 }, { "epoch": 3.5845974329054844, "grad_norm": 0.8125, "learning_rate": 0.00014307649244459794, "loss": 0.0272, "step": 7680 }, { "epoch": 3.5850641773628937, "grad_norm": 0.65625, "learning_rate": 0.00014306324968847145, "loss": 0.0191, "step": 7681 }, { "epoch": 3.5855309218203035, "grad_norm": 0.7578125, "learning_rate": 0.00014305000600514544, "loss": 0.0236, "step": 7682 }, { "epoch": 3.585997666277713, "grad_norm": 0.498046875, "learning_rate": 0.00014303676139490503, "loss": 0.0161, "step": 7683 }, { "epoch": 3.5864644107351227, "grad_norm": 0.60546875, "learning_rate": 0.0001430235158580354, "loss": 0.0239, "step": 7684 }, { "epoch": 3.586931155192532, "grad_norm": 0.59375, "learning_rate": 0.00014301026939482175, "loss": 0.0241, "step": 7685 }, { "epoch": 3.5873978996499414, "grad_norm": 0.6484375, "learning_rate": 0.00014299702200554925, "loss": 0.0214, "step": 7686 }, { "epoch": 3.587864644107351, "grad_norm": 0.69140625, "learning_rate": 0.00014298377369050317, "loss": 0.0242, "step": 7687 }, { "epoch": 3.588331388564761, "grad_norm": 0.72265625, "learning_rate": 0.00014297052444996875, "loss": 0.0196, "step": 7688 }, { "epoch": 3.5887981330221703, "grad_norm": 0.62890625, "learning_rate": 0.00014295727428423127, "loss": 0.0139, "step": 7689 }, { "epoch": 3.5892648774795797, "grad_norm": 0.73046875, "learning_rate": 0.000142944023193576, "loss": 0.0255, "step": 7690 }, { "epoch": 3.5897316219369895, "grad_norm": 0.85546875, "learning_rate": 0.0001429307711782883, "loss": 0.0286, "step": 7691 }, { "epoch": 3.5901983663943993, "grad_norm": 0.9296875, "learning_rate": 0.00014291751823865343, "loss": 0.0306, "step": 7692 }, { "epoch": 3.5906651108518086, "grad_norm": 0.58984375, "learning_rate": 0.00014290426437495678, "loss": 0.0226, "step": 7693 }, { "epoch": 3.591131855309218, "grad_norm": 0.58984375, "learning_rate": 0.00014289100958748372, "loss": 0.016, "step": 7694 }, { "epoch": 3.5915985997666278, "grad_norm": 0.6171875, "learning_rate": 0.00014287775387651964, "loss": 0.0194, "step": 7695 }, { "epoch": 3.5920653442240376, "grad_norm": 0.80078125, "learning_rate": 0.00014286449724234995, "loss": 0.0281, "step": 7696 }, { "epoch": 3.592532088681447, "grad_norm": 0.796875, "learning_rate": 0.00014285123968526005, "loss": 0.0235, "step": 7697 }, { "epoch": 3.5929988331388563, "grad_norm": 0.83984375, "learning_rate": 0.00014283798120553543, "loss": 0.0281, "step": 7698 }, { "epoch": 3.593465577596266, "grad_norm": 0.55859375, "learning_rate": 0.00014282472180346154, "loss": 0.0198, "step": 7699 }, { "epoch": 3.593932322053676, "grad_norm": 0.5703125, "learning_rate": 0.0001428114614793239, "loss": 0.0186, "step": 7700 }, { "epoch": 3.594399066511085, "grad_norm": 0.765625, "learning_rate": 0.00014279820023340792, "loss": 0.0294, "step": 7701 }, { "epoch": 3.5948658109684946, "grad_norm": 0.6875, "learning_rate": 0.00014278493806599928, "loss": 0.0253, "step": 7702 }, { "epoch": 3.5953325554259044, "grad_norm": 0.6640625, "learning_rate": 0.00014277167497738345, "loss": 0.0175, "step": 7703 }, { "epoch": 3.595799299883314, "grad_norm": 0.44140625, "learning_rate": 0.00014275841096784595, "loss": 0.014, "step": 7704 }, { "epoch": 3.5962660443407235, "grad_norm": 0.6328125, "learning_rate": 0.00014274514603767246, "loss": 0.0276, "step": 7705 }, { "epoch": 3.596732788798133, "grad_norm": 0.65234375, "learning_rate": 0.00014273188018714853, "loss": 0.0191, "step": 7706 }, { "epoch": 3.5971995332555426, "grad_norm": 0.71484375, "learning_rate": 0.00014271861341655983, "loss": 0.0283, "step": 7707 }, { "epoch": 3.597666277712952, "grad_norm": 0.55078125, "learning_rate": 0.00014270534572619197, "loss": 0.0228, "step": 7708 }, { "epoch": 3.598133022170362, "grad_norm": 0.62890625, "learning_rate": 0.00014269207711633064, "loss": 0.0277, "step": 7709 }, { "epoch": 3.598599766627771, "grad_norm": 0.78125, "learning_rate": 0.0001426788075872615, "loss": 0.0293, "step": 7710 }, { "epoch": 3.599066511085181, "grad_norm": 0.69140625, "learning_rate": 0.0001426655371392703, "loss": 0.0305, "step": 7711 }, { "epoch": 3.5995332555425903, "grad_norm": 0.80859375, "learning_rate": 0.00014265226577264277, "loss": 0.0267, "step": 7712 }, { "epoch": 3.6, "grad_norm": 0.8046875, "learning_rate": 0.0001426389934876646, "loss": 0.0281, "step": 7713 }, { "epoch": 3.6004667444574094, "grad_norm": 0.72265625, "learning_rate": 0.0001426257202846216, "loss": 0.0246, "step": 7714 }, { "epoch": 3.6009334889148192, "grad_norm": 0.70703125, "learning_rate": 0.00014261244616379956, "loss": 0.0295, "step": 7715 }, { "epoch": 3.6014002333722286, "grad_norm": 0.56640625, "learning_rate": 0.00014259917112548426, "loss": 0.026, "step": 7716 }, { "epoch": 3.6018669778296384, "grad_norm": 0.6640625, "learning_rate": 0.00014258589516996154, "loss": 0.0239, "step": 7717 }, { "epoch": 3.6023337222870477, "grad_norm": 0.703125, "learning_rate": 0.00014257261829751726, "loss": 0.0197, "step": 7718 }, { "epoch": 3.6028004667444575, "grad_norm": 0.671875, "learning_rate": 0.00014255934050843728, "loss": 0.0212, "step": 7719 }, { "epoch": 3.603267211201867, "grad_norm": 0.578125, "learning_rate": 0.00014254606180300744, "loss": 0.0209, "step": 7720 }, { "epoch": 3.6037339556592767, "grad_norm": 0.73828125, "learning_rate": 0.00014253278218151374, "loss": 0.0207, "step": 7721 }, { "epoch": 3.604200700116686, "grad_norm": 0.84765625, "learning_rate": 0.000142519501644242, "loss": 0.0331, "step": 7722 }, { "epoch": 3.604667444574096, "grad_norm": 0.78125, "learning_rate": 0.00014250622019147826, "loss": 0.0179, "step": 7723 }, { "epoch": 3.605134189031505, "grad_norm": 0.6953125, "learning_rate": 0.0001424929378235084, "loss": 0.0234, "step": 7724 }, { "epoch": 3.605600933488915, "grad_norm": 0.466796875, "learning_rate": 0.00014247965454061844, "loss": 0.0178, "step": 7725 }, { "epoch": 3.6060676779463243, "grad_norm": 0.58203125, "learning_rate": 0.0001424663703430944, "loss": 0.0259, "step": 7726 }, { "epoch": 3.606534422403734, "grad_norm": 0.7109375, "learning_rate": 0.00014245308523122227, "loss": 0.0134, "step": 7727 }, { "epoch": 3.6070011668611435, "grad_norm": 0.703125, "learning_rate": 0.00014243979920528814, "loss": 0.0236, "step": 7728 }, { "epoch": 3.607467911318553, "grad_norm": 0.59765625, "learning_rate": 0.00014242651226557804, "loss": 0.0202, "step": 7729 }, { "epoch": 3.6079346557759626, "grad_norm": 0.72265625, "learning_rate": 0.00014241322441237804, "loss": 0.0243, "step": 7730 }, { "epoch": 3.6084014002333724, "grad_norm": 0.46875, "learning_rate": 0.00014239993564597428, "loss": 0.0095, "step": 7731 }, { "epoch": 3.6088681446907818, "grad_norm": 0.54296875, "learning_rate": 0.00014238664596665285, "loss": 0.0182, "step": 7732 }, { "epoch": 3.609334889148191, "grad_norm": 0.796875, "learning_rate": 0.0001423733553746999, "loss": 0.0291, "step": 7733 }, { "epoch": 3.609801633605601, "grad_norm": 0.6328125, "learning_rate": 0.0001423600638704016, "loss": 0.0193, "step": 7734 }, { "epoch": 3.6102683780630107, "grad_norm": 0.498046875, "learning_rate": 0.00014234677145404413, "loss": 0.0167, "step": 7735 }, { "epoch": 3.61073512252042, "grad_norm": 0.78515625, "learning_rate": 0.0001423334781259137, "loss": 0.0222, "step": 7736 }, { "epoch": 3.6112018669778294, "grad_norm": 0.69140625, "learning_rate": 0.00014232018388629647, "loss": 0.0173, "step": 7737 }, { "epoch": 3.611668611435239, "grad_norm": 0.75390625, "learning_rate": 0.00014230688873547877, "loss": 0.0213, "step": 7738 }, { "epoch": 3.612135355892649, "grad_norm": 0.765625, "learning_rate": 0.0001422935926737468, "loss": 0.0198, "step": 7739 }, { "epoch": 3.6126021003500584, "grad_norm": 0.83984375, "learning_rate": 0.00014228029570138685, "loss": 0.0279, "step": 7740 }, { "epoch": 3.6130688448074677, "grad_norm": 0.546875, "learning_rate": 0.00014226699781868523, "loss": 0.0133, "step": 7741 }, { "epoch": 3.6135355892648775, "grad_norm": 0.4453125, "learning_rate": 0.00014225369902592827, "loss": 0.0163, "step": 7742 }, { "epoch": 3.6140023337222873, "grad_norm": 0.78125, "learning_rate": 0.00014224039932340225, "loss": 0.0308, "step": 7743 }, { "epoch": 3.6144690781796966, "grad_norm": 0.5859375, "learning_rate": 0.00014222709871139358, "loss": 0.0175, "step": 7744 }, { "epoch": 3.614935822637106, "grad_norm": 0.5625, "learning_rate": 0.00014221379719018865, "loss": 0.0178, "step": 7745 }, { "epoch": 3.615402567094516, "grad_norm": 0.59375, "learning_rate": 0.00014220049476007377, "loss": 0.0154, "step": 7746 }, { "epoch": 3.6158693115519256, "grad_norm": 0.58984375, "learning_rate": 0.0001421871914213355, "loss": 0.0223, "step": 7747 }, { "epoch": 3.616336056009335, "grad_norm": 1.0625, "learning_rate": 0.00014217388717426013, "loss": 0.04, "step": 7748 }, { "epoch": 3.6168028004667443, "grad_norm": 0.5, "learning_rate": 0.00014216058201913418, "loss": 0.0147, "step": 7749 }, { "epoch": 3.617269544924154, "grad_norm": 0.7109375, "learning_rate": 0.0001421472759562441, "loss": 0.0245, "step": 7750 }, { "epoch": 3.6177362893815634, "grad_norm": 0.609375, "learning_rate": 0.00014213396898587644, "loss": 0.0206, "step": 7751 }, { "epoch": 3.6182030338389732, "grad_norm": 0.83203125, "learning_rate": 0.0001421206611083177, "loss": 0.0255, "step": 7752 }, { "epoch": 3.6186697782963826, "grad_norm": 0.54296875, "learning_rate": 0.00014210735232385435, "loss": 0.0126, "step": 7753 }, { "epoch": 3.6191365227537924, "grad_norm": 0.73046875, "learning_rate": 0.00014209404263277302, "loss": 0.0209, "step": 7754 }, { "epoch": 3.6196032672112017, "grad_norm": 0.828125, "learning_rate": 0.00014208073203536022, "loss": 0.0267, "step": 7755 }, { "epoch": 3.6200700116686115, "grad_norm": 0.66796875, "learning_rate": 0.00014206742053190257, "loss": 0.0443, "step": 7756 }, { "epoch": 3.620536756126021, "grad_norm": 0.6484375, "learning_rate": 0.0001420541081226867, "loss": 0.0222, "step": 7757 }, { "epoch": 3.6210035005834307, "grad_norm": 0.53125, "learning_rate": 0.00014204079480799924, "loss": 0.018, "step": 7758 }, { "epoch": 3.62147024504084, "grad_norm": 0.5078125, "learning_rate": 0.00014202748058812678, "loss": 0.0208, "step": 7759 }, { "epoch": 3.62193698949825, "grad_norm": 0.6796875, "learning_rate": 0.00014201416546335603, "loss": 0.0233, "step": 7760 }, { "epoch": 3.622403733955659, "grad_norm": 0.52734375, "learning_rate": 0.00014200084943397372, "loss": 0.0187, "step": 7761 }, { "epoch": 3.622870478413069, "grad_norm": 0.6484375, "learning_rate": 0.0001419875325002665, "loss": 0.021, "step": 7762 }, { "epoch": 3.6233372228704783, "grad_norm": 0.53515625, "learning_rate": 0.0001419742146625211, "loss": 0.0188, "step": 7763 }, { "epoch": 3.623803967327888, "grad_norm": 0.51171875, "learning_rate": 0.00014196089592102436, "loss": 0.0208, "step": 7764 }, { "epoch": 3.6242707117852975, "grad_norm": 0.74609375, "learning_rate": 0.0001419475762760629, "loss": 0.0246, "step": 7765 }, { "epoch": 3.6247374562427073, "grad_norm": 0.447265625, "learning_rate": 0.0001419342557279236, "loss": 0.0121, "step": 7766 }, { "epoch": 3.6252042007001166, "grad_norm": 0.72265625, "learning_rate": 0.00014192093427689326, "loss": 0.0224, "step": 7767 }, { "epoch": 3.6256709451575264, "grad_norm": 0.67578125, "learning_rate": 0.0001419076119232587, "loss": 0.0238, "step": 7768 }, { "epoch": 3.6261376896149358, "grad_norm": 0.466796875, "learning_rate": 0.0001418942886673067, "loss": 0.0124, "step": 7769 }, { "epoch": 3.6266044340723456, "grad_norm": 0.7265625, "learning_rate": 0.00014188096450932423, "loss": 0.03, "step": 7770 }, { "epoch": 3.627071178529755, "grad_norm": 0.734375, "learning_rate": 0.00014186763944959815, "loss": 0.0278, "step": 7771 }, { "epoch": 3.6275379229871643, "grad_norm": 0.515625, "learning_rate": 0.0001418543134884153, "loss": 0.0164, "step": 7772 }, { "epoch": 3.628004667444574, "grad_norm": 0.671875, "learning_rate": 0.00014184098662606265, "loss": 0.037, "step": 7773 }, { "epoch": 3.628471411901984, "grad_norm": 0.71875, "learning_rate": 0.00014182765886282717, "loss": 0.0176, "step": 7774 }, { "epoch": 3.628938156359393, "grad_norm": 0.63671875, "learning_rate": 0.00014181433019899576, "loss": 0.0206, "step": 7775 }, { "epoch": 3.6294049008168026, "grad_norm": 0.58203125, "learning_rate": 0.0001418010006348554, "loss": 0.0181, "step": 7776 }, { "epoch": 3.6298716452742124, "grad_norm": 0.59765625, "learning_rate": 0.00014178767017069315, "loss": 0.015, "step": 7777 }, { "epoch": 3.630338389731622, "grad_norm": 0.703125, "learning_rate": 0.000141774338806796, "loss": 0.0206, "step": 7778 }, { "epoch": 3.6308051341890315, "grad_norm": 0.51171875, "learning_rate": 0.00014176100654345093, "loss": 0.0162, "step": 7779 }, { "epoch": 3.631271878646441, "grad_norm": 0.76953125, "learning_rate": 0.0001417476733809451, "loss": 0.0217, "step": 7780 }, { "epoch": 3.6317386231038506, "grad_norm": 0.6953125, "learning_rate": 0.00014173433931956555, "loss": 0.0241, "step": 7781 }, { "epoch": 3.6322053675612604, "grad_norm": 0.7890625, "learning_rate": 0.00014172100435959936, "loss": 0.0221, "step": 7782 }, { "epoch": 3.63267211201867, "grad_norm": 0.6171875, "learning_rate": 0.00014170766850133364, "loss": 0.0153, "step": 7783 }, { "epoch": 3.633138856476079, "grad_norm": 0.73046875, "learning_rate": 0.00014169433174505554, "loss": 0.0146, "step": 7784 }, { "epoch": 3.633605600933489, "grad_norm": 0.8515625, "learning_rate": 0.00014168099409105227, "loss": 0.0265, "step": 7785 }, { "epoch": 3.6340723453908987, "grad_norm": 0.671875, "learning_rate": 0.0001416676555396109, "loss": 0.0224, "step": 7786 }, { "epoch": 3.634539089848308, "grad_norm": 0.6484375, "learning_rate": 0.00014165431609101868, "loss": 0.0212, "step": 7787 }, { "epoch": 3.6350058343057174, "grad_norm": 0.734375, "learning_rate": 0.00014164097574556286, "loss": 0.021, "step": 7788 }, { "epoch": 3.6354725787631272, "grad_norm": 0.99609375, "learning_rate": 0.00014162763450353057, "loss": 0.0296, "step": 7789 }, { "epoch": 3.6359393232205366, "grad_norm": 0.80859375, "learning_rate": 0.00014161429236520917, "loss": 0.022, "step": 7790 }, { "epoch": 3.6364060676779464, "grad_norm": 0.66015625, "learning_rate": 0.00014160094933088585, "loss": 0.0209, "step": 7791 }, { "epoch": 3.6368728121353557, "grad_norm": 0.765625, "learning_rate": 0.00014158760540084795, "loss": 0.0265, "step": 7792 }, { "epoch": 3.6373395565927655, "grad_norm": 0.69140625, "learning_rate": 0.00014157426057538277, "loss": 0.0264, "step": 7793 }, { "epoch": 3.637806301050175, "grad_norm": 0.75, "learning_rate": 0.00014156091485477762, "loss": 0.0231, "step": 7794 }, { "epoch": 3.6382730455075847, "grad_norm": 0.34375, "learning_rate": 0.00014154756823931986, "loss": 0.0125, "step": 7795 }, { "epoch": 3.638739789964994, "grad_norm": 1.09375, "learning_rate": 0.00014153422072929686, "loss": 0.0383, "step": 7796 }, { "epoch": 3.639206534422404, "grad_norm": 0.671875, "learning_rate": 0.00014152087232499602, "loss": 0.0251, "step": 7797 }, { "epoch": 3.639673278879813, "grad_norm": 0.53125, "learning_rate": 0.0001415075230267047, "loss": 0.0225, "step": 7798 }, { "epoch": 3.640140023337223, "grad_norm": 0.66796875, "learning_rate": 0.0001414941728347104, "loss": 0.0235, "step": 7799 }, { "epoch": 3.6406067677946323, "grad_norm": 0.70703125, "learning_rate": 0.00014148082174930048, "loss": 0.0297, "step": 7800 }, { "epoch": 3.641073512252042, "grad_norm": 0.69921875, "learning_rate": 0.00014146746977076246, "loss": 0.0199, "step": 7801 }, { "epoch": 3.6415402567094515, "grad_norm": 0.625, "learning_rate": 0.0001414541168993838, "loss": 0.0236, "step": 7802 }, { "epoch": 3.6420070011668613, "grad_norm": 0.419921875, "learning_rate": 0.000141440763135452, "loss": 0.0112, "step": 7803 }, { "epoch": 3.6424737456242706, "grad_norm": 0.60546875, "learning_rate": 0.0001414274084792546, "loss": 0.0182, "step": 7804 }, { "epoch": 3.6429404900816804, "grad_norm": 0.65625, "learning_rate": 0.00014141405293107914, "loss": 0.0336, "step": 7805 }, { "epoch": 3.6434072345390898, "grad_norm": 0.65234375, "learning_rate": 0.00014140069649121317, "loss": 0.0256, "step": 7806 }, { "epoch": 3.6438739789964996, "grad_norm": 0.443359375, "learning_rate": 0.00014138733915994428, "loss": 0.0129, "step": 7807 }, { "epoch": 3.644340723453909, "grad_norm": 0.75, "learning_rate": 0.00014137398093756005, "loss": 0.0322, "step": 7808 }, { "epoch": 3.6448074679113187, "grad_norm": 0.7578125, "learning_rate": 0.00014136062182434808, "loss": 0.0277, "step": 7809 }, { "epoch": 3.645274212368728, "grad_norm": 0.6796875, "learning_rate": 0.00014134726182059605, "loss": 0.0218, "step": 7810 }, { "epoch": 3.6457409568261374, "grad_norm": 0.609375, "learning_rate": 0.0001413339009265916, "loss": 0.0207, "step": 7811 }, { "epoch": 3.646207701283547, "grad_norm": 0.59765625, "learning_rate": 0.0001413205391426224, "loss": 0.0234, "step": 7812 }, { "epoch": 3.646674445740957, "grad_norm": 0.76171875, "learning_rate": 0.00014130717646897614, "loss": 0.0224, "step": 7813 }, { "epoch": 3.6471411901983664, "grad_norm": 0.734375, "learning_rate": 0.00014129381290594058, "loss": 0.0245, "step": 7814 }, { "epoch": 3.6476079346557757, "grad_norm": 0.55078125, "learning_rate": 0.00014128044845380334, "loss": 0.0208, "step": 7815 }, { "epoch": 3.6480746791131855, "grad_norm": 0.88671875, "learning_rate": 0.0001412670831128523, "loss": 0.0234, "step": 7816 }, { "epoch": 3.6485414235705953, "grad_norm": 0.7578125, "learning_rate": 0.00014125371688337516, "loss": 0.027, "step": 7817 }, { "epoch": 3.6490081680280047, "grad_norm": 0.66796875, "learning_rate": 0.0001412403497656597, "loss": 0.0286, "step": 7818 }, { "epoch": 3.649474912485414, "grad_norm": 0.79296875, "learning_rate": 0.00014122698175999377, "loss": 0.0286, "step": 7819 }, { "epoch": 3.649941656942824, "grad_norm": 0.6796875, "learning_rate": 0.0001412136128666652, "loss": 0.0241, "step": 7820 }, { "epoch": 3.6504084014002336, "grad_norm": 0.58984375, "learning_rate": 0.0001412002430859618, "loss": 0.0217, "step": 7821 }, { "epoch": 3.650875145857643, "grad_norm": 0.53515625, "learning_rate": 0.00014118687241817144, "loss": 0.0178, "step": 7822 }, { "epoch": 3.6513418903150523, "grad_norm": 0.69921875, "learning_rate": 0.00014117350086358203, "loss": 0.0235, "step": 7823 }, { "epoch": 3.651808634772462, "grad_norm": 0.70703125, "learning_rate": 0.0001411601284224815, "loss": 0.0192, "step": 7824 }, { "epoch": 3.652275379229872, "grad_norm": 0.8203125, "learning_rate": 0.00014114675509515766, "loss": 0.0265, "step": 7825 }, { "epoch": 3.6527421236872812, "grad_norm": 0.6328125, "learning_rate": 0.0001411333808818986, "loss": 0.0211, "step": 7826 }, { "epoch": 3.6532088681446906, "grad_norm": 1.15625, "learning_rate": 0.00014112000578299217, "loss": 0.027, "step": 7827 }, { "epoch": 3.6536756126021004, "grad_norm": 0.6875, "learning_rate": 0.0001411066297987264, "loss": 0.0204, "step": 7828 }, { "epoch": 3.65414235705951, "grad_norm": 0.5546875, "learning_rate": 0.0001410932529293893, "loss": 0.0226, "step": 7829 }, { "epoch": 3.6546091015169195, "grad_norm": 0.796875, "learning_rate": 0.00014107987517526885, "loss": 0.0223, "step": 7830 }, { "epoch": 3.655075845974329, "grad_norm": 0.953125, "learning_rate": 0.00014106649653665312, "loss": 0.0308, "step": 7831 }, { "epoch": 3.6555425904317387, "grad_norm": 0.625, "learning_rate": 0.00014105311701383014, "loss": 0.0141, "step": 7832 }, { "epoch": 3.656009334889148, "grad_norm": 0.64453125, "learning_rate": 0.00014103973660708805, "loss": 0.0196, "step": 7833 }, { "epoch": 3.656476079346558, "grad_norm": 0.640625, "learning_rate": 0.00014102635531671484, "loss": 0.018, "step": 7834 }, { "epoch": 3.656942823803967, "grad_norm": 0.68359375, "learning_rate": 0.00014101297314299868, "loss": 0.0257, "step": 7835 }, { "epoch": 3.657409568261377, "grad_norm": 0.4296875, "learning_rate": 0.00014099959008622776, "loss": 0.0127, "step": 7836 }, { "epoch": 3.6578763127187863, "grad_norm": 0.65625, "learning_rate": 0.00014098620614669017, "loss": 0.0186, "step": 7837 }, { "epoch": 3.658343057176196, "grad_norm": 0.67578125, "learning_rate": 0.00014097282132467402, "loss": 0.0218, "step": 7838 }, { "epoch": 3.6588098016336055, "grad_norm": 0.57421875, "learning_rate": 0.00014095943562046764, "loss": 0.0124, "step": 7839 }, { "epoch": 3.6592765460910153, "grad_norm": 0.62109375, "learning_rate": 0.0001409460490343591, "loss": 0.0186, "step": 7840 }, { "epoch": 3.6597432905484246, "grad_norm": 0.76171875, "learning_rate": 0.0001409326615666367, "loss": 0.0294, "step": 7841 }, { "epoch": 3.6602100350058344, "grad_norm": 0.5625, "learning_rate": 0.00014091927321758873, "loss": 0.0151, "step": 7842 }, { "epoch": 3.6606767794632438, "grad_norm": 0.703125, "learning_rate": 0.0001409058839875034, "loss": 0.0316, "step": 7843 }, { "epoch": 3.6611435239206536, "grad_norm": 0.6328125, "learning_rate": 0.00014089249387666896, "loss": 0.0248, "step": 7844 }, { "epoch": 3.661610268378063, "grad_norm": 0.7265625, "learning_rate": 0.00014087910288537378, "loss": 0.028, "step": 7845 }, { "epoch": 3.6620770128354727, "grad_norm": 0.5546875, "learning_rate": 0.00014086571101390617, "loss": 0.0172, "step": 7846 }, { "epoch": 3.662543757292882, "grad_norm": 0.6328125, "learning_rate": 0.00014085231826255445, "loss": 0.017, "step": 7847 }, { "epoch": 3.663010501750292, "grad_norm": 0.69140625, "learning_rate": 0.00014083892463160697, "loss": 0.021, "step": 7848 }, { "epoch": 3.663477246207701, "grad_norm": 0.609375, "learning_rate": 0.00014082553012135216, "loss": 0.0181, "step": 7849 }, { "epoch": 3.663943990665111, "grad_norm": 0.96875, "learning_rate": 0.0001408121347320784, "loss": 0.0269, "step": 7850 }, { "epoch": 3.6644107351225204, "grad_norm": 0.7734375, "learning_rate": 0.00014079873846407405, "loss": 0.0235, "step": 7851 }, { "epoch": 3.66487747957993, "grad_norm": 0.76953125, "learning_rate": 0.0001407853413176276, "loss": 0.0224, "step": 7852 }, { "epoch": 3.6653442240373395, "grad_norm": 0.578125, "learning_rate": 0.00014077194329302755, "loss": 0.0244, "step": 7853 }, { "epoch": 3.665810968494749, "grad_norm": 0.74609375, "learning_rate": 0.00014075854439056228, "loss": 0.0209, "step": 7854 }, { "epoch": 3.6662777129521587, "grad_norm": 1.1484375, "learning_rate": 0.00014074514461052034, "loss": 0.0238, "step": 7855 }, { "epoch": 3.6667444574095684, "grad_norm": 0.65234375, "learning_rate": 0.00014073174395319024, "loss": 0.0166, "step": 7856 }, { "epoch": 3.667211201866978, "grad_norm": 0.8359375, "learning_rate": 0.0001407183424188605, "loss": 0.0225, "step": 7857 }, { "epoch": 3.667677946324387, "grad_norm": 0.6875, "learning_rate": 0.00014070494000781967, "loss": 0.02, "step": 7858 }, { "epoch": 3.668144690781797, "grad_norm": 0.8359375, "learning_rate": 0.00014069153672035628, "loss": 0.0243, "step": 7859 }, { "epoch": 3.6686114352392067, "grad_norm": 0.5390625, "learning_rate": 0.000140678132556759, "loss": 0.0147, "step": 7860 }, { "epoch": 3.669078179696616, "grad_norm": 0.71875, "learning_rate": 0.0001406647275173164, "loss": 0.0196, "step": 7861 }, { "epoch": 3.6695449241540254, "grad_norm": 0.5859375, "learning_rate": 0.0001406513216023171, "loss": 0.023, "step": 7862 }, { "epoch": 3.6700116686114352, "grad_norm": 0.734375, "learning_rate": 0.0001406379148120497, "loss": 0.0251, "step": 7863 }, { "epoch": 3.670478413068845, "grad_norm": 0.6875, "learning_rate": 0.0001406245071468029, "loss": 0.0264, "step": 7864 }, { "epoch": 3.6709451575262544, "grad_norm": 0.78515625, "learning_rate": 0.00014061109860686544, "loss": 0.0274, "step": 7865 }, { "epoch": 3.6714119019836637, "grad_norm": 0.671875, "learning_rate": 0.00014059768919252595, "loss": 0.0312, "step": 7866 }, { "epoch": 3.6718786464410735, "grad_norm": 0.64453125, "learning_rate": 0.00014058427890407312, "loss": 0.0184, "step": 7867 }, { "epoch": 3.6723453908984833, "grad_norm": 0.67578125, "learning_rate": 0.0001405708677417958, "loss": 0.0221, "step": 7868 }, { "epoch": 3.6728121353558927, "grad_norm": 0.93359375, "learning_rate": 0.00014055745570598264, "loss": 0.0448, "step": 7869 }, { "epoch": 3.673278879813302, "grad_norm": 0.5703125, "learning_rate": 0.0001405440427969225, "loss": 0.0198, "step": 7870 }, { "epoch": 3.673745624270712, "grad_norm": 0.81640625, "learning_rate": 0.00014053062901490409, "loss": 0.026, "step": 7871 }, { "epoch": 3.6742123687281216, "grad_norm": 0.64453125, "learning_rate": 0.0001405172143602163, "loss": 0.0142, "step": 7872 }, { "epoch": 3.674679113185531, "grad_norm": 0.81640625, "learning_rate": 0.00014050379883314793, "loss": 0.0276, "step": 7873 }, { "epoch": 3.6751458576429403, "grad_norm": 0.890625, "learning_rate": 0.0001404903824339878, "loss": 0.0269, "step": 7874 }, { "epoch": 3.67561260210035, "grad_norm": 0.51953125, "learning_rate": 0.00014047696516302483, "loss": 0.0176, "step": 7875 }, { "epoch": 3.6760793465577595, "grad_norm": 0.65625, "learning_rate": 0.0001404635470205479, "loss": 0.0332, "step": 7876 }, { "epoch": 3.6765460910151693, "grad_norm": 0.83203125, "learning_rate": 0.0001404501280068459, "loss": 0.0219, "step": 7877 }, { "epoch": 3.6770128354725786, "grad_norm": 0.7265625, "learning_rate": 0.00014043670812220778, "loss": 0.0266, "step": 7878 }, { "epoch": 3.6774795799299884, "grad_norm": 0.6796875, "learning_rate": 0.00014042328736692245, "loss": 0.0249, "step": 7879 }, { "epoch": 3.6779463243873978, "grad_norm": 0.55859375, "learning_rate": 0.0001404098657412789, "loss": 0.0265, "step": 7880 }, { "epoch": 3.6784130688448076, "grad_norm": 0.70703125, "learning_rate": 0.00014039644324556608, "loss": 0.0246, "step": 7881 }, { "epoch": 3.678879813302217, "grad_norm": 0.8515625, "learning_rate": 0.00014038301988007304, "loss": 0.0235, "step": 7882 }, { "epoch": 3.6793465577596267, "grad_norm": 0.58203125, "learning_rate": 0.00014036959564508878, "loss": 0.0197, "step": 7883 }, { "epoch": 3.679813302217036, "grad_norm": 0.58203125, "learning_rate": 0.00014035617054090232, "loss": 0.0151, "step": 7884 }, { "epoch": 3.680280046674446, "grad_norm": 0.6875, "learning_rate": 0.00014034274456780276, "loss": 0.0223, "step": 7885 }, { "epoch": 3.680746791131855, "grad_norm": 0.65625, "learning_rate": 0.00014032931772607913, "loss": 0.0244, "step": 7886 }, { "epoch": 3.681213535589265, "grad_norm": 0.7421875, "learning_rate": 0.00014031589001602053, "loss": 0.0197, "step": 7887 }, { "epoch": 3.6816802800466744, "grad_norm": 0.66796875, "learning_rate": 0.0001403024614379161, "loss": 0.0239, "step": 7888 }, { "epoch": 3.682147024504084, "grad_norm": 0.59765625, "learning_rate": 0.00014028903199205496, "loss": 0.0136, "step": 7889 }, { "epoch": 3.6826137689614935, "grad_norm": 0.8671875, "learning_rate": 0.00014027560167872625, "loss": 0.0217, "step": 7890 }, { "epoch": 3.6830805134189033, "grad_norm": 0.8671875, "learning_rate": 0.00014026217049821916, "loss": 0.0264, "step": 7891 }, { "epoch": 3.6835472578763127, "grad_norm": 0.53515625, "learning_rate": 0.00014024873845082286, "loss": 0.0151, "step": 7892 }, { "epoch": 3.6840140023337224, "grad_norm": 0.75, "learning_rate": 0.00014023530553682655, "loss": 0.0322, "step": 7893 }, { "epoch": 3.684480746791132, "grad_norm": 0.62109375, "learning_rate": 0.0001402218717565195, "loss": 0.022, "step": 7894 }, { "epoch": 3.6849474912485416, "grad_norm": 0.6953125, "learning_rate": 0.0001402084371101909, "loss": 0.0227, "step": 7895 }, { "epoch": 3.685414235705951, "grad_norm": 0.6484375, "learning_rate": 0.00014019500159813007, "loss": 0.0183, "step": 7896 }, { "epoch": 3.6858809801633603, "grad_norm": 0.71484375, "learning_rate": 0.00014018156522062624, "loss": 0.0203, "step": 7897 }, { "epoch": 3.68634772462077, "grad_norm": 0.55859375, "learning_rate": 0.00014016812797796872, "loss": 0.0183, "step": 7898 }, { "epoch": 3.68681446907818, "grad_norm": 0.61328125, "learning_rate": 0.00014015468987044685, "loss": 0.0269, "step": 7899 }, { "epoch": 3.6872812135355892, "grad_norm": 0.74609375, "learning_rate": 0.00014014125089834993, "loss": 0.0275, "step": 7900 }, { "epoch": 3.6877479579929986, "grad_norm": 0.5390625, "learning_rate": 0.00014012781106196738, "loss": 0.0213, "step": 7901 }, { "epoch": 3.6882147024504084, "grad_norm": 0.94140625, "learning_rate": 0.0001401143703615885, "loss": 0.0242, "step": 7902 }, { "epoch": 3.688681446907818, "grad_norm": 0.62109375, "learning_rate": 0.0001401009287975027, "loss": 0.021, "step": 7903 }, { "epoch": 3.6891481913652275, "grad_norm": 0.63671875, "learning_rate": 0.00014008748636999945, "loss": 0.0182, "step": 7904 }, { "epoch": 3.689614935822637, "grad_norm": 0.412109375, "learning_rate": 0.00014007404307936816, "loss": 0.0105, "step": 7905 }, { "epoch": 3.6900816802800467, "grad_norm": 0.5625, "learning_rate": 0.00014006059892589825, "loss": 0.0176, "step": 7906 }, { "epoch": 3.6905484247374565, "grad_norm": 0.78515625, "learning_rate": 0.00014004715390987917, "loss": 0.0262, "step": 7907 }, { "epoch": 3.691015169194866, "grad_norm": 0.57421875, "learning_rate": 0.00014003370803160046, "loss": 0.016, "step": 7908 }, { "epoch": 3.691481913652275, "grad_norm": 0.59375, "learning_rate": 0.00014002026129135157, "loss": 0.025, "step": 7909 }, { "epoch": 3.691948658109685, "grad_norm": 0.71484375, "learning_rate": 0.00014000681368942205, "loss": 0.0309, "step": 7910 }, { "epoch": 3.6924154025670948, "grad_norm": 0.61328125, "learning_rate": 0.0001399933652261015, "loss": 0.0183, "step": 7911 }, { "epoch": 3.692882147024504, "grad_norm": 0.734375, "learning_rate": 0.00013997991590167935, "loss": 0.026, "step": 7912 }, { "epoch": 3.6933488914819135, "grad_norm": 0.7109375, "learning_rate": 0.00013996646571644527, "loss": 0.0246, "step": 7913 }, { "epoch": 3.6938156359393233, "grad_norm": 0.7421875, "learning_rate": 0.00013995301467068885, "loss": 0.0301, "step": 7914 }, { "epoch": 3.6942823803967326, "grad_norm": 0.7421875, "learning_rate": 0.00013993956276469973, "loss": 0.0267, "step": 7915 }, { "epoch": 3.6947491248541424, "grad_norm": 0.56640625, "learning_rate": 0.00013992610999876746, "loss": 0.0173, "step": 7916 }, { "epoch": 3.6952158693115518, "grad_norm": 0.62109375, "learning_rate": 0.00013991265637318176, "loss": 0.0199, "step": 7917 }, { "epoch": 3.6956826137689616, "grad_norm": 0.478515625, "learning_rate": 0.00013989920188823228, "loss": 0.0178, "step": 7918 }, { "epoch": 3.696149358226371, "grad_norm": 0.74609375, "learning_rate": 0.00013988574654420871, "loss": 0.0266, "step": 7919 }, { "epoch": 3.6966161026837807, "grad_norm": 0.5703125, "learning_rate": 0.00013987229034140077, "loss": 0.0183, "step": 7920 }, { "epoch": 3.69708284714119, "grad_norm": 0.9453125, "learning_rate": 0.0001398588332800982, "loss": 0.0347, "step": 7921 }, { "epoch": 3.6975495915986, "grad_norm": 0.88671875, "learning_rate": 0.00013984537536059071, "loss": 0.0323, "step": 7922 }, { "epoch": 3.698016336056009, "grad_norm": 0.671875, "learning_rate": 0.00013983191658316804, "loss": 0.0212, "step": 7923 }, { "epoch": 3.698483080513419, "grad_norm": 0.640625, "learning_rate": 0.00013981845694812007, "loss": 0.0248, "step": 7924 }, { "epoch": 3.6989498249708284, "grad_norm": 0.5703125, "learning_rate": 0.00013980499645573652, "loss": 0.0252, "step": 7925 }, { "epoch": 3.699416569428238, "grad_norm": 0.66796875, "learning_rate": 0.00013979153510630722, "loss": 0.0219, "step": 7926 }, { "epoch": 3.6998833138856475, "grad_norm": 0.5546875, "learning_rate": 0.00013977807290012204, "loss": 0.0178, "step": 7927 }, { "epoch": 3.7003500583430573, "grad_norm": 0.64453125, "learning_rate": 0.0001397646098374708, "loss": 0.028, "step": 7928 }, { "epoch": 3.7008168028004667, "grad_norm": 0.65234375, "learning_rate": 0.0001397511459186434, "loss": 0.0212, "step": 7929 }, { "epoch": 3.7012835472578764, "grad_norm": 0.5625, "learning_rate": 0.00013973768114392972, "loss": 0.0192, "step": 7930 }, { "epoch": 3.701750291715286, "grad_norm": 0.5234375, "learning_rate": 0.00013972421551361968, "loss": 0.0145, "step": 7931 }, { "epoch": 3.7022170361726956, "grad_norm": 0.54296875, "learning_rate": 0.0001397107490280032, "loss": 0.0183, "step": 7932 }, { "epoch": 3.702683780630105, "grad_norm": 0.6796875, "learning_rate": 0.00013969728168737025, "loss": 0.0181, "step": 7933 }, { "epoch": 3.7031505250875147, "grad_norm": 0.82421875, "learning_rate": 0.00013968381349201077, "loss": 0.0221, "step": 7934 }, { "epoch": 3.703617269544924, "grad_norm": 0.84375, "learning_rate": 0.00013967034444221474, "loss": 0.0263, "step": 7935 }, { "epoch": 3.7040840140023334, "grad_norm": 0.69921875, "learning_rate": 0.0001396568745382722, "loss": 0.0295, "step": 7936 }, { "epoch": 3.7045507584597432, "grad_norm": 0.70703125, "learning_rate": 0.00013964340378047315, "loss": 0.0174, "step": 7937 }, { "epoch": 3.705017502917153, "grad_norm": 0.7109375, "learning_rate": 0.0001396299321691076, "loss": 0.0286, "step": 7938 }, { "epoch": 3.7054842473745624, "grad_norm": 0.71484375, "learning_rate": 0.00013961645970446567, "loss": 0.0186, "step": 7939 }, { "epoch": 3.7059509918319717, "grad_norm": 0.58984375, "learning_rate": 0.0001396029863868374, "loss": 0.0202, "step": 7940 }, { "epoch": 3.7064177362893815, "grad_norm": 0.65234375, "learning_rate": 0.0001395895122165129, "loss": 0.0192, "step": 7941 }, { "epoch": 3.7068844807467913, "grad_norm": 0.6640625, "learning_rate": 0.00013957603719378224, "loss": 0.0258, "step": 7942 }, { "epoch": 3.7073512252042007, "grad_norm": 0.5234375, "learning_rate": 0.00013956256131893564, "loss": 0.0225, "step": 7943 }, { "epoch": 3.70781796966161, "grad_norm": 0.58984375, "learning_rate": 0.0001395490845922632, "loss": 0.0236, "step": 7944 }, { "epoch": 3.70828471411902, "grad_norm": 0.75, "learning_rate": 0.00013953560701405505, "loss": 0.0327, "step": 7945 }, { "epoch": 3.7087514585764296, "grad_norm": 0.6796875, "learning_rate": 0.00013952212858460143, "loss": 0.0344, "step": 7946 }, { "epoch": 3.709218203033839, "grad_norm": 0.68359375, "learning_rate": 0.00013950864930419255, "loss": 0.0198, "step": 7947 }, { "epoch": 3.7096849474912483, "grad_norm": 0.57421875, "learning_rate": 0.00013949516917311858, "loss": 0.0144, "step": 7948 }, { "epoch": 3.710151691948658, "grad_norm": 0.58203125, "learning_rate": 0.0001394816881916698, "loss": 0.0179, "step": 7949 }, { "epoch": 3.710618436406068, "grad_norm": 0.56640625, "learning_rate": 0.00013946820636013649, "loss": 0.017, "step": 7950 }, { "epoch": 3.7110851808634773, "grad_norm": 0.61328125, "learning_rate": 0.0001394547236788089, "loss": 0.0202, "step": 7951 }, { "epoch": 3.7115519253208866, "grad_norm": 0.51953125, "learning_rate": 0.00013944124014797732, "loss": 0.0184, "step": 7952 }, { "epoch": 3.7120186697782964, "grad_norm": 0.79296875, "learning_rate": 0.0001394277557679321, "loss": 0.0203, "step": 7953 }, { "epoch": 3.712485414235706, "grad_norm": 0.7265625, "learning_rate": 0.00013941427053896358, "loss": 0.0183, "step": 7954 }, { "epoch": 3.7129521586931156, "grad_norm": 0.9296875, "learning_rate": 0.00013940078446136203, "loss": 0.017, "step": 7955 }, { "epoch": 3.713418903150525, "grad_norm": 0.80859375, "learning_rate": 0.0001393872975354179, "loss": 0.028, "step": 7956 }, { "epoch": 3.7138856476079347, "grad_norm": 0.71484375, "learning_rate": 0.00013937380976142153, "loss": 0.0185, "step": 7957 }, { "epoch": 3.714352392065344, "grad_norm": 0.828125, "learning_rate": 0.00013936032113966339, "loss": 0.0277, "step": 7958 }, { "epoch": 3.714819136522754, "grad_norm": 0.73828125, "learning_rate": 0.00013934683167043382, "loss": 0.0284, "step": 7959 }, { "epoch": 3.715285880980163, "grad_norm": 0.75390625, "learning_rate": 0.00013933334135402334, "loss": 0.0246, "step": 7960 }, { "epoch": 3.715752625437573, "grad_norm": 0.7109375, "learning_rate": 0.00013931985019072237, "loss": 0.0144, "step": 7961 }, { "epoch": 3.7162193698949824, "grad_norm": 0.625, "learning_rate": 0.00013930635818082139, "loss": 0.0217, "step": 7962 }, { "epoch": 3.716686114352392, "grad_norm": 0.984375, "learning_rate": 0.0001392928653246109, "loss": 0.0235, "step": 7963 }, { "epoch": 3.7171528588098015, "grad_norm": 0.8125, "learning_rate": 0.00013927937162238146, "loss": 0.0267, "step": 7964 }, { "epoch": 3.7176196032672113, "grad_norm": 0.6484375, "learning_rate": 0.00013926587707442352, "loss": 0.016, "step": 7965 }, { "epoch": 3.7180863477246207, "grad_norm": 0.75390625, "learning_rate": 0.0001392523816810277, "loss": 0.0212, "step": 7966 }, { "epoch": 3.7185530921820305, "grad_norm": 0.86328125, "learning_rate": 0.00013923888544248458, "loss": 0.0175, "step": 7967 }, { "epoch": 3.71901983663944, "grad_norm": 0.91796875, "learning_rate": 0.0001392253883590847, "loss": 0.0244, "step": 7968 }, { "epoch": 3.7194865810968496, "grad_norm": 0.56640625, "learning_rate": 0.00013921189043111868, "loss": 0.0238, "step": 7969 }, { "epoch": 3.719953325554259, "grad_norm": 0.58203125, "learning_rate": 0.00013919839165887717, "loss": 0.0221, "step": 7970 }, { "epoch": 3.7204200700116687, "grad_norm": 0.640625, "learning_rate": 0.00013918489204265078, "loss": 0.0191, "step": 7971 }, { "epoch": 3.720886814469078, "grad_norm": 0.8984375, "learning_rate": 0.0001391713915827302, "loss": 0.034, "step": 7972 }, { "epoch": 3.721353558926488, "grad_norm": 0.58984375, "learning_rate": 0.00013915789027940607, "loss": 0.0148, "step": 7973 }, { "epoch": 3.7218203033838972, "grad_norm": 0.60546875, "learning_rate": 0.00013914438813296913, "loss": 0.0215, "step": 7974 }, { "epoch": 3.722287047841307, "grad_norm": 0.5859375, "learning_rate": 0.0001391308851437101, "loss": 0.0189, "step": 7975 }, { "epoch": 3.7227537922987164, "grad_norm": 0.6796875, "learning_rate": 0.00013911738131191966, "loss": 0.0212, "step": 7976 }, { "epoch": 3.723220536756126, "grad_norm": 0.64453125, "learning_rate": 0.00013910387663788865, "loss": 0.0205, "step": 7977 }, { "epoch": 3.7236872812135355, "grad_norm": 0.59375, "learning_rate": 0.00013909037112190773, "loss": 0.0117, "step": 7978 }, { "epoch": 3.724154025670945, "grad_norm": 0.78125, "learning_rate": 0.0001390768647642678, "loss": 0.027, "step": 7979 }, { "epoch": 3.7246207701283547, "grad_norm": 0.51953125, "learning_rate": 0.0001390633575652596, "loss": 0.0168, "step": 7980 }, { "epoch": 3.7250875145857645, "grad_norm": 0.62890625, "learning_rate": 0.00013904984952517398, "loss": 0.0153, "step": 7981 }, { "epoch": 3.725554259043174, "grad_norm": 0.6484375, "learning_rate": 0.00013903634064430176, "loss": 0.0154, "step": 7982 }, { "epoch": 3.726021003500583, "grad_norm": 0.52734375, "learning_rate": 0.0001390228309229338, "loss": 0.0171, "step": 7983 }, { "epoch": 3.726487747957993, "grad_norm": 0.7578125, "learning_rate": 0.000139009320361361, "loss": 0.0274, "step": 7984 }, { "epoch": 3.7269544924154028, "grad_norm": 0.38671875, "learning_rate": 0.0001389958089598743, "loss": 0.0265, "step": 7985 }, { "epoch": 3.727421236872812, "grad_norm": 0.6171875, "learning_rate": 0.00013898229671876447, "loss": 0.014, "step": 7986 }, { "epoch": 3.7278879813302215, "grad_norm": 0.55859375, "learning_rate": 0.0001389687836383226, "loss": 0.0167, "step": 7987 }, { "epoch": 3.7283547257876313, "grad_norm": 0.71875, "learning_rate": 0.0001389552697188396, "loss": 0.02, "step": 7988 }, { "epoch": 3.728821470245041, "grad_norm": 0.71484375, "learning_rate": 0.00013894175496060638, "loss": 0.0176, "step": 7989 }, { "epoch": 3.7292882147024504, "grad_norm": 0.6875, "learning_rate": 0.000138928239363914, "loss": 0.0272, "step": 7990 }, { "epoch": 3.7297549591598598, "grad_norm": 0.7109375, "learning_rate": 0.00013891472292905342, "loss": 0.0263, "step": 7991 }, { "epoch": 3.7302217036172696, "grad_norm": 0.6875, "learning_rate": 0.00013890120565631566, "loss": 0.0192, "step": 7992 }, { "epoch": 3.7306884480746794, "grad_norm": 0.71484375, "learning_rate": 0.00013888768754599182, "loss": 0.0245, "step": 7993 }, { "epoch": 3.7311551925320887, "grad_norm": 0.69140625, "learning_rate": 0.0001388741685983729, "loss": 0.028, "step": 7994 }, { "epoch": 3.731621936989498, "grad_norm": 0.56640625, "learning_rate": 0.00013886064881375, "loss": 0.009, "step": 7995 }, { "epoch": 3.732088681446908, "grad_norm": 0.70703125, "learning_rate": 0.00013884712819241422, "loss": 0.0182, "step": 7996 }, { "epoch": 3.7325554259043177, "grad_norm": 0.5859375, "learning_rate": 0.00013883360673465665, "loss": 0.0167, "step": 7997 }, { "epoch": 3.733022170361727, "grad_norm": 0.62890625, "learning_rate": 0.00013882008444076843, "loss": 0.0224, "step": 7998 }, { "epoch": 3.7334889148191364, "grad_norm": 0.75, "learning_rate": 0.00013880656131104075, "loss": 0.0229, "step": 7999 }, { "epoch": 3.733955659276546, "grad_norm": 0.6640625, "learning_rate": 0.00013879303734576473, "loss": 0.0193, "step": 8000 }, { "epoch": 3.7344224037339555, "grad_norm": 0.640625, "learning_rate": 0.0001387795125452316, "loss": 0.0173, "step": 8001 }, { "epoch": 3.7348891481913653, "grad_norm": 0.578125, "learning_rate": 0.00013876598690973252, "loss": 0.0156, "step": 8002 }, { "epoch": 3.7353558926487747, "grad_norm": 0.66015625, "learning_rate": 0.00013875246043955876, "loss": 0.0244, "step": 8003 }, { "epoch": 3.7358226371061845, "grad_norm": 0.52734375, "learning_rate": 0.0001387389331350015, "loss": 0.0148, "step": 8004 }, { "epoch": 3.736289381563594, "grad_norm": 0.74609375, "learning_rate": 0.00013872540499635202, "loss": 0.0214, "step": 8005 }, { "epoch": 3.7367561260210036, "grad_norm": 0.333984375, "learning_rate": 0.00013871187602390163, "loss": 0.0085, "step": 8006 }, { "epoch": 3.737222870478413, "grad_norm": 0.68359375, "learning_rate": 0.00013869834621794158, "loss": 0.0252, "step": 8007 }, { "epoch": 3.7376896149358227, "grad_norm": 0.671875, "learning_rate": 0.00013868481557876322, "loss": 0.0203, "step": 8008 }, { "epoch": 3.738156359393232, "grad_norm": 0.5703125, "learning_rate": 0.00013867128410665784, "loss": 0.018, "step": 8009 }, { "epoch": 3.738623103850642, "grad_norm": 0.5703125, "learning_rate": 0.0001386577518019168, "loss": 0.0195, "step": 8010 }, { "epoch": 3.7390898483080512, "grad_norm": 0.79296875, "learning_rate": 0.0001386442186648315, "loss": 0.0267, "step": 8011 }, { "epoch": 3.739556592765461, "grad_norm": 0.78125, "learning_rate": 0.00013863068469569326, "loss": 0.0301, "step": 8012 }, { "epoch": 3.7400233372228704, "grad_norm": 0.80078125, "learning_rate": 0.00013861714989479352, "loss": 0.0283, "step": 8013 }, { "epoch": 3.74049008168028, "grad_norm": 0.50390625, "learning_rate": 0.00013860361426242372, "loss": 0.017, "step": 8014 }, { "epoch": 3.7409568261376895, "grad_norm": 0.7109375, "learning_rate": 0.00013859007779887524, "loss": 0.0235, "step": 8015 }, { "epoch": 3.7414235705950993, "grad_norm": 0.5859375, "learning_rate": 0.0001385765405044396, "loss": 0.0186, "step": 8016 }, { "epoch": 3.7418903150525087, "grad_norm": 0.5234375, "learning_rate": 0.00013856300237940822, "loss": 0.0179, "step": 8017 }, { "epoch": 3.7423570595099185, "grad_norm": 0.60546875, "learning_rate": 0.00013854946342407265, "loss": 0.0192, "step": 8018 }, { "epoch": 3.742823803967328, "grad_norm": 0.53125, "learning_rate": 0.00013853592363872433, "loss": 0.0157, "step": 8019 }, { "epoch": 3.7432905484247376, "grad_norm": 0.59375, "learning_rate": 0.00013852238302365484, "loss": 0.0202, "step": 8020 }, { "epoch": 3.743757292882147, "grad_norm": 0.875, "learning_rate": 0.00013850884157915567, "loss": 0.0282, "step": 8021 }, { "epoch": 3.7442240373395563, "grad_norm": 0.62109375, "learning_rate": 0.00013849529930551842, "loss": 0.0174, "step": 8022 }, { "epoch": 3.744690781796966, "grad_norm": 0.65234375, "learning_rate": 0.00013848175620303468, "loss": 0.0285, "step": 8023 }, { "epoch": 3.745157526254376, "grad_norm": 0.6328125, "learning_rate": 0.00013846821227199603, "loss": 0.0231, "step": 8024 }, { "epoch": 3.7456242707117853, "grad_norm": 0.6953125, "learning_rate": 0.00013845466751269409, "loss": 0.0166, "step": 8025 }, { "epoch": 3.7460910151691946, "grad_norm": 0.7578125, "learning_rate": 0.0001384411219254205, "loss": 0.0306, "step": 8026 }, { "epoch": 3.7465577596266044, "grad_norm": 0.625, "learning_rate": 0.00013842757551046691, "loss": 0.0194, "step": 8027 }, { "epoch": 3.747024504084014, "grad_norm": 0.435546875, "learning_rate": 0.00013841402826812496, "loss": 0.0118, "step": 8028 }, { "epoch": 3.7474912485414236, "grad_norm": 0.60546875, "learning_rate": 0.00013840048019868637, "loss": 0.0211, "step": 8029 }, { "epoch": 3.747957992998833, "grad_norm": 0.59375, "learning_rate": 0.00013838693130244282, "loss": 0.0214, "step": 8030 }, { "epoch": 3.7484247374562427, "grad_norm": 1.0234375, "learning_rate": 0.00013837338157968607, "loss": 0.0331, "step": 8031 }, { "epoch": 3.7488914819136525, "grad_norm": 0.6015625, "learning_rate": 0.00013835983103070783, "loss": 0.0166, "step": 8032 }, { "epoch": 3.749358226371062, "grad_norm": 0.8046875, "learning_rate": 0.00013834627965579984, "loss": 0.0215, "step": 8033 }, { "epoch": 3.749824970828471, "grad_norm": 0.65625, "learning_rate": 0.00013833272745525393, "loss": 0.0163, "step": 8034 }, { "epoch": 3.750291715285881, "grad_norm": 0.74609375, "learning_rate": 0.00013831917442936188, "loss": 0.0261, "step": 8035 }, { "epoch": 3.750758459743291, "grad_norm": 0.63671875, "learning_rate": 0.0001383056205784155, "loss": 0.0263, "step": 8036 }, { "epoch": 3.7512252042007, "grad_norm": 0.64453125, "learning_rate": 0.00013829206590270656, "loss": 0.0181, "step": 8037 }, { "epoch": 3.7516919486581095, "grad_norm": 0.458984375, "learning_rate": 0.00013827851040252697, "loss": 0.0179, "step": 8038 }, { "epoch": 3.7521586931155193, "grad_norm": 0.6328125, "learning_rate": 0.0001382649540781686, "loss": 0.0168, "step": 8039 }, { "epoch": 3.7526254375729287, "grad_norm": 0.8359375, "learning_rate": 0.00013825139692992327, "loss": 0.0214, "step": 8040 }, { "epoch": 3.7526254375729287, "eval_loss": 1.835245966911316, "eval_runtime": 56.4633, "eval_samples_per_second": 31.95, "eval_steps_per_second": 4.003, "step": 8040 }, { "epoch": 3.7530921820303385, "grad_norm": 0.484375, "learning_rate": 0.00013823783895808298, "loss": 0.0152, "step": 8041 }, { "epoch": 3.753558926487748, "grad_norm": 0.65234375, "learning_rate": 0.00013822428016293956, "loss": 0.0214, "step": 8042 }, { "epoch": 3.7540256709451576, "grad_norm": 0.5234375, "learning_rate": 0.000138210720544785, "loss": 0.0086, "step": 8043 }, { "epoch": 3.754492415402567, "grad_norm": 0.4921875, "learning_rate": 0.0001381971601039112, "loss": 0.0161, "step": 8044 }, { "epoch": 3.7549591598599767, "grad_norm": 0.53125, "learning_rate": 0.00013818359884061019, "loss": 0.0157, "step": 8045 }, { "epoch": 3.755425904317386, "grad_norm": 0.6171875, "learning_rate": 0.00013817003675517392, "loss": 0.0177, "step": 8046 }, { "epoch": 3.755892648774796, "grad_norm": 0.96484375, "learning_rate": 0.0001381564738478944, "loss": 0.0166, "step": 8047 }, { "epoch": 3.7563593932322052, "grad_norm": 0.515625, "learning_rate": 0.00013814291011906367, "loss": 0.0166, "step": 8048 }, { "epoch": 3.756826137689615, "grad_norm": 0.55859375, "learning_rate": 0.00013812934556897376, "loss": 0.013, "step": 8049 }, { "epoch": 3.7572928821470244, "grad_norm": 0.59765625, "learning_rate": 0.00013811578019791673, "loss": 0.0159, "step": 8050 }, { "epoch": 3.757759626604434, "grad_norm": 0.7421875, "learning_rate": 0.00013810221400618468, "loss": 0.0256, "step": 8051 }, { "epoch": 3.7582263710618435, "grad_norm": 0.5546875, "learning_rate": 0.0001380886469940697, "loss": 0.0203, "step": 8052 }, { "epoch": 3.7586931155192533, "grad_norm": 0.7109375, "learning_rate": 0.00013807507916186387, "loss": 0.0184, "step": 8053 }, { "epoch": 3.7591598599766627, "grad_norm": 0.60546875, "learning_rate": 0.00013806151050985933, "loss": 0.0258, "step": 8054 }, { "epoch": 3.7596266044340725, "grad_norm": 0.828125, "learning_rate": 0.00013804794103834827, "loss": 0.022, "step": 8055 }, { "epoch": 3.760093348891482, "grad_norm": 0.61328125, "learning_rate": 0.00013803437074762277, "loss": 0.0174, "step": 8056 }, { "epoch": 3.7605600933488916, "grad_norm": 0.5625, "learning_rate": 0.0001380207996379751, "loss": 0.0211, "step": 8057 }, { "epoch": 3.761026837806301, "grad_norm": 0.63671875, "learning_rate": 0.00013800722770969745, "loss": 0.0237, "step": 8058 }, { "epoch": 3.7614935822637108, "grad_norm": 0.6484375, "learning_rate": 0.000137993654963082, "loss": 0.02, "step": 8059 }, { "epoch": 3.76196032672112, "grad_norm": 0.65234375, "learning_rate": 0.00013798008139842097, "loss": 0.0201, "step": 8060 }, { "epoch": 3.7624270711785295, "grad_norm": 0.59765625, "learning_rate": 0.00013796650701600668, "loss": 0.0186, "step": 8061 }, { "epoch": 3.7628938156359393, "grad_norm": 0.86328125, "learning_rate": 0.00013795293181613139, "loss": 0.0189, "step": 8062 }, { "epoch": 3.763360560093349, "grad_norm": 0.75, "learning_rate": 0.00013793935579908733, "loss": 0.0229, "step": 8063 }, { "epoch": 3.7638273045507584, "grad_norm": 0.62890625, "learning_rate": 0.00013792577896516687, "loss": 0.0129, "step": 8064 }, { "epoch": 3.7642940490081678, "grad_norm": 0.69921875, "learning_rate": 0.0001379122013146623, "loss": 0.0179, "step": 8065 }, { "epoch": 3.7647607934655776, "grad_norm": 0.6015625, "learning_rate": 0.00013789862284786596, "loss": 0.0212, "step": 8066 }, { "epoch": 3.7652275379229874, "grad_norm": 0.5234375, "learning_rate": 0.00013788504356507025, "loss": 0.019, "step": 8067 }, { "epoch": 3.7656942823803967, "grad_norm": 0.67578125, "learning_rate": 0.0001378714634665675, "loss": 0.0159, "step": 8068 }, { "epoch": 3.766161026837806, "grad_norm": 0.80078125, "learning_rate": 0.00013785788255265012, "loss": 0.026, "step": 8069 }, { "epoch": 3.766627771295216, "grad_norm": 0.59375, "learning_rate": 0.00013784430082361052, "loss": 0.0195, "step": 8070 }, { "epoch": 3.7670945157526257, "grad_norm": 0.75, "learning_rate": 0.00013783071827974113, "loss": 0.0234, "step": 8071 }, { "epoch": 3.767561260210035, "grad_norm": 0.546875, "learning_rate": 0.00013781713492133445, "loss": 0.0185, "step": 8072 }, { "epoch": 3.7680280046674444, "grad_norm": 0.84375, "learning_rate": 0.00013780355074868283, "loss": 0.0249, "step": 8073 }, { "epoch": 3.768494749124854, "grad_norm": 0.78125, "learning_rate": 0.00013778996576207887, "loss": 0.0276, "step": 8074 }, { "epoch": 3.768961493582264, "grad_norm": 0.9609375, "learning_rate": 0.00013777637996181504, "loss": 0.0296, "step": 8075 }, { "epoch": 3.7694282380396733, "grad_norm": 0.6015625, "learning_rate": 0.00013776279334818376, "loss": 0.0201, "step": 8076 }, { "epoch": 3.7698949824970827, "grad_norm": 0.58203125, "learning_rate": 0.00013774920592147768, "loss": 0.0137, "step": 8077 }, { "epoch": 3.7703617269544925, "grad_norm": 0.51953125, "learning_rate": 0.0001377356176819893, "loss": 0.0129, "step": 8078 }, { "epoch": 3.7708284714119022, "grad_norm": 0.65625, "learning_rate": 0.0001377220286300112, "loss": 0.0251, "step": 8079 }, { "epoch": 3.7712952158693116, "grad_norm": 0.640625, "learning_rate": 0.00013770843876583598, "loss": 0.0164, "step": 8080 }, { "epoch": 3.771761960326721, "grad_norm": 0.60546875, "learning_rate": 0.00013769484808975625, "loss": 0.0127, "step": 8081 }, { "epoch": 3.7722287047841307, "grad_norm": 0.515625, "learning_rate": 0.0001376812566020646, "loss": 0.0205, "step": 8082 }, { "epoch": 3.77269544924154, "grad_norm": 0.62890625, "learning_rate": 0.00013766766430305368, "loss": 0.0215, "step": 8083 }, { "epoch": 3.77316219369895, "grad_norm": 0.94140625, "learning_rate": 0.00013765407119301618, "loss": 0.0359, "step": 8084 }, { "epoch": 3.7736289381563592, "grad_norm": 0.7109375, "learning_rate": 0.00013764047727224475, "loss": 0.0231, "step": 8085 }, { "epoch": 3.774095682613769, "grad_norm": 0.7421875, "learning_rate": 0.00013762688254103205, "loss": 0.0314, "step": 8086 }, { "epoch": 3.7745624270711784, "grad_norm": 0.6796875, "learning_rate": 0.00013761328699967083, "loss": 0.0268, "step": 8087 }, { "epoch": 3.775029171528588, "grad_norm": 0.5390625, "learning_rate": 0.00013759969064845384, "loss": 0.0163, "step": 8088 }, { "epoch": 3.7754959159859975, "grad_norm": 0.48828125, "learning_rate": 0.00013758609348767372, "loss": 0.0162, "step": 8089 }, { "epoch": 3.7759626604434073, "grad_norm": 0.71875, "learning_rate": 0.00013757249551762336, "loss": 0.024, "step": 8090 }, { "epoch": 3.7764294049008167, "grad_norm": 0.67578125, "learning_rate": 0.0001375588967385955, "loss": 0.0357, "step": 8091 }, { "epoch": 3.7768961493582265, "grad_norm": 0.70703125, "learning_rate": 0.00013754529715088288, "loss": 0.0251, "step": 8092 }, { "epoch": 3.777362893815636, "grad_norm": 0.55859375, "learning_rate": 0.0001375316967547784, "loss": 0.0157, "step": 8093 }, { "epoch": 3.7778296382730456, "grad_norm": 0.68359375, "learning_rate": 0.0001375180955505748, "loss": 0.0355, "step": 8094 }, { "epoch": 3.778296382730455, "grad_norm": 0.62890625, "learning_rate": 0.00013750449353856501, "loss": 0.0169, "step": 8095 }, { "epoch": 3.778763127187865, "grad_norm": 0.51171875, "learning_rate": 0.00013749089071904183, "loss": 0.0159, "step": 8096 }, { "epoch": 3.779229871645274, "grad_norm": 0.62109375, "learning_rate": 0.0001374772870922982, "loss": 0.0176, "step": 8097 }, { "epoch": 3.779696616102684, "grad_norm": 0.68359375, "learning_rate": 0.000137463682658627, "loss": 0.0233, "step": 8098 }, { "epoch": 3.7801633605600933, "grad_norm": 0.498046875, "learning_rate": 0.00013745007741832114, "loss": 0.0094, "step": 8099 }, { "epoch": 3.780630105017503, "grad_norm": 0.51953125, "learning_rate": 0.00013743647137167355, "loss": 0.0181, "step": 8100 }, { "epoch": 3.7810968494749124, "grad_norm": 0.60546875, "learning_rate": 0.0001374228645189772, "loss": 0.0138, "step": 8101 }, { "epoch": 3.781563593932322, "grad_norm": 0.8046875, "learning_rate": 0.00013740925686052505, "loss": 0.0323, "step": 8102 }, { "epoch": 3.7820303383897316, "grad_norm": 0.5703125, "learning_rate": 0.00013739564839661012, "loss": 0.0134, "step": 8103 }, { "epoch": 3.782497082847141, "grad_norm": 0.62890625, "learning_rate": 0.00013738203912752538, "loss": 0.0136, "step": 8104 }, { "epoch": 3.7829638273045507, "grad_norm": 0.57421875, "learning_rate": 0.00013736842905356385, "loss": 0.017, "step": 8105 }, { "epoch": 3.7834305717619605, "grad_norm": 0.6640625, "learning_rate": 0.00013735481817501855, "loss": 0.021, "step": 8106 }, { "epoch": 3.78389731621937, "grad_norm": 0.609375, "learning_rate": 0.00013734120649218262, "loss": 0.0198, "step": 8107 }, { "epoch": 3.784364060676779, "grad_norm": 0.578125, "learning_rate": 0.00013732759400534908, "loss": 0.0183, "step": 8108 }, { "epoch": 3.784830805134189, "grad_norm": 0.53125, "learning_rate": 0.000137313980714811, "loss": 0.0147, "step": 8109 }, { "epoch": 3.785297549591599, "grad_norm": 0.82421875, "learning_rate": 0.00013730036662086152, "loss": 0.0276, "step": 8110 }, { "epoch": 3.785764294049008, "grad_norm": 0.5546875, "learning_rate": 0.00013728675172379378, "loss": 0.0169, "step": 8111 }, { "epoch": 3.7862310385064175, "grad_norm": 0.51171875, "learning_rate": 0.0001372731360239009, "loss": 0.0155, "step": 8112 }, { "epoch": 3.7866977829638273, "grad_norm": 0.68359375, "learning_rate": 0.00013725951952147603, "loss": 0.0138, "step": 8113 }, { "epoch": 3.787164527421237, "grad_norm": 0.58203125, "learning_rate": 0.00013724590221681236, "loss": 0.0208, "step": 8114 }, { "epoch": 3.7876312718786465, "grad_norm": 0.5078125, "learning_rate": 0.0001372322841102031, "loss": 0.0129, "step": 8115 }, { "epoch": 3.788098016336056, "grad_norm": 0.6953125, "learning_rate": 0.00013721866520194148, "loss": 0.0189, "step": 8116 }, { "epoch": 3.7885647607934656, "grad_norm": 0.578125, "learning_rate": 0.00013720504549232068, "loss": 0.0138, "step": 8117 }, { "epoch": 3.7890315052508754, "grad_norm": 0.62109375, "learning_rate": 0.00013719142498163394, "loss": 0.0188, "step": 8118 }, { "epoch": 3.7894982497082847, "grad_norm": 0.75390625, "learning_rate": 0.00013717780367017462, "loss": 0.029, "step": 8119 }, { "epoch": 3.789964994165694, "grad_norm": 0.388671875, "learning_rate": 0.0001371641815582359, "loss": 0.0073, "step": 8120 }, { "epoch": 3.790431738623104, "grad_norm": 0.326171875, "learning_rate": 0.00013715055864611111, "loss": 0.0079, "step": 8121 }, { "epoch": 3.7908984830805137, "grad_norm": 0.5390625, "learning_rate": 0.00013713693493409357, "loss": 0.0148, "step": 8122 }, { "epoch": 3.791365227537923, "grad_norm": 0.5234375, "learning_rate": 0.00013712331042247666, "loss": 0.0216, "step": 8123 }, { "epoch": 3.7918319719953324, "grad_norm": 0.4609375, "learning_rate": 0.00013710968511155368, "loss": 0.0111, "step": 8124 }, { "epoch": 3.792298716452742, "grad_norm": 0.82421875, "learning_rate": 0.00013709605900161795, "loss": 0.0159, "step": 8125 }, { "epoch": 3.7927654609101515, "grad_norm": 0.64453125, "learning_rate": 0.00013708243209296293, "loss": 0.0248, "step": 8126 }, { "epoch": 3.7932322053675613, "grad_norm": 0.69140625, "learning_rate": 0.00013706880438588205, "loss": 0.0185, "step": 8127 }, { "epoch": 3.7936989498249707, "grad_norm": 0.76953125, "learning_rate": 0.00013705517588066864, "loss": 0.0218, "step": 8128 }, { "epoch": 3.7941656942823805, "grad_norm": 0.5390625, "learning_rate": 0.0001370415465776162, "loss": 0.013, "step": 8129 }, { "epoch": 3.79463243873979, "grad_norm": 0.86328125, "learning_rate": 0.00013702791647701812, "loss": 0.0154, "step": 8130 }, { "epoch": 3.7950991831971996, "grad_norm": 0.5390625, "learning_rate": 0.00013701428557916795, "loss": 0.0171, "step": 8131 }, { "epoch": 3.795565927654609, "grad_norm": 0.69921875, "learning_rate": 0.00013700065388435917, "loss": 0.0169, "step": 8132 }, { "epoch": 3.796032672112019, "grad_norm": 0.6640625, "learning_rate": 0.00013698702139288522, "loss": 0.0209, "step": 8133 }, { "epoch": 3.796499416569428, "grad_norm": 0.53125, "learning_rate": 0.00013697338810503967, "loss": 0.0176, "step": 8134 }, { "epoch": 3.796966161026838, "grad_norm": 0.65625, "learning_rate": 0.00013695975402111605, "loss": 0.0172, "step": 8135 }, { "epoch": 3.7974329054842473, "grad_norm": 0.484375, "learning_rate": 0.00013694611914140793, "loss": 0.0145, "step": 8136 }, { "epoch": 3.797899649941657, "grad_norm": 0.7109375, "learning_rate": 0.00013693248346620886, "loss": 0.0185, "step": 8137 }, { "epoch": 3.7983663943990664, "grad_norm": 0.486328125, "learning_rate": 0.00013691884699581244, "loss": 0.0106, "step": 8138 }, { "epoch": 3.798833138856476, "grad_norm": 0.609375, "learning_rate": 0.0001369052097305123, "loss": 0.0158, "step": 8139 }, { "epoch": 3.7992998833138856, "grad_norm": 0.64453125, "learning_rate": 0.00013689157167060204, "loss": 0.0188, "step": 8140 }, { "epoch": 3.7997666277712954, "grad_norm": 0.7421875, "learning_rate": 0.00013687793281637532, "loss": 0.0212, "step": 8141 }, { "epoch": 3.8002333722287047, "grad_norm": 0.52734375, "learning_rate": 0.00013686429316812576, "loss": 0.0137, "step": 8142 }, { "epoch": 3.8007001166861145, "grad_norm": 0.6796875, "learning_rate": 0.0001368506527261471, "loss": 0.0146, "step": 8143 }, { "epoch": 3.801166861143524, "grad_norm": 0.6015625, "learning_rate": 0.00013683701149073298, "loss": 0.0099, "step": 8144 }, { "epoch": 3.8016336056009337, "grad_norm": 0.6640625, "learning_rate": 0.00013682336946217713, "loss": 0.0131, "step": 8145 }, { "epoch": 3.802100350058343, "grad_norm": 0.6875, "learning_rate": 0.0001368097266407733, "loss": 0.0216, "step": 8146 }, { "epoch": 3.8025670945157524, "grad_norm": 0.5859375, "learning_rate": 0.0001367960830268152, "loss": 0.015, "step": 8147 }, { "epoch": 3.803033838973162, "grad_norm": 0.6171875, "learning_rate": 0.0001367824386205966, "loss": 0.0198, "step": 8148 }, { "epoch": 3.803500583430572, "grad_norm": 0.67578125, "learning_rate": 0.00013676879342241132, "loss": 0.019, "step": 8149 }, { "epoch": 3.8039673278879813, "grad_norm": 0.8046875, "learning_rate": 0.00013675514743255308, "loss": 0.0224, "step": 8150 }, { "epoch": 3.8044340723453907, "grad_norm": 0.5546875, "learning_rate": 0.00013674150065131574, "loss": 0.0144, "step": 8151 }, { "epoch": 3.8049008168028005, "grad_norm": 0.79296875, "learning_rate": 0.00013672785307899314, "loss": 0.0179, "step": 8152 }, { "epoch": 3.8053675612602103, "grad_norm": 0.66015625, "learning_rate": 0.00013671420471587912, "loss": 0.0164, "step": 8153 }, { "epoch": 3.8058343057176196, "grad_norm": 0.7109375, "learning_rate": 0.0001367005555622675, "loss": 0.029, "step": 8154 }, { "epoch": 3.806301050175029, "grad_norm": 0.56640625, "learning_rate": 0.00013668690561845223, "loss": 0.0167, "step": 8155 }, { "epoch": 3.8067677946324388, "grad_norm": 0.57421875, "learning_rate": 0.00013667325488472717, "loss": 0.0174, "step": 8156 }, { "epoch": 3.8072345390898485, "grad_norm": 0.7421875, "learning_rate": 0.00013665960336138624, "loss": 0.0276, "step": 8157 }, { "epoch": 3.807701283547258, "grad_norm": 0.53515625, "learning_rate": 0.00013664595104872338, "loss": 0.0115, "step": 8158 }, { "epoch": 3.8081680280046672, "grad_norm": 0.67578125, "learning_rate": 0.00013663229794703253, "loss": 0.0178, "step": 8159 }, { "epoch": 3.808634772462077, "grad_norm": 0.61328125, "learning_rate": 0.0001366186440566077, "loss": 0.0279, "step": 8160 }, { "epoch": 3.809101516919487, "grad_norm": 0.5078125, "learning_rate": 0.0001366049893777428, "loss": 0.0161, "step": 8161 }, { "epoch": 3.809568261376896, "grad_norm": 0.5078125, "learning_rate": 0.00013659133391073184, "loss": 0.0136, "step": 8162 }, { "epoch": 3.8100350058343055, "grad_norm": 0.58203125, "learning_rate": 0.0001365776776558689, "loss": 0.0248, "step": 8163 }, { "epoch": 3.8105017502917153, "grad_norm": 0.6875, "learning_rate": 0.00013656402061344792, "loss": 0.0285, "step": 8164 }, { "epoch": 3.8109684947491247, "grad_norm": 0.546875, "learning_rate": 0.00013655036278376309, "loss": 0.0146, "step": 8165 }, { "epoch": 3.8114352392065345, "grad_norm": 0.5859375, "learning_rate": 0.00013653670416710837, "loss": 0.0201, "step": 8166 }, { "epoch": 3.811901983663944, "grad_norm": 0.515625, "learning_rate": 0.00013652304476377783, "loss": 0.0155, "step": 8167 }, { "epoch": 3.8123687281213536, "grad_norm": 0.7421875, "learning_rate": 0.00013650938457406564, "loss": 0.0167, "step": 8168 }, { "epoch": 3.812835472578763, "grad_norm": 0.5625, "learning_rate": 0.00013649572359826587, "loss": 0.0148, "step": 8169 }, { "epoch": 3.813302217036173, "grad_norm": 0.515625, "learning_rate": 0.00013648206183667272, "loss": 0.0117, "step": 8170 }, { "epoch": 3.813768961493582, "grad_norm": 0.6484375, "learning_rate": 0.0001364683992895803, "loss": 0.0141, "step": 8171 }, { "epoch": 3.814235705950992, "grad_norm": 0.76171875, "learning_rate": 0.00013645473595728276, "loss": 0.0209, "step": 8172 }, { "epoch": 3.8147024504084013, "grad_norm": 0.63671875, "learning_rate": 0.00013644107184007428, "loss": 0.0154, "step": 8173 }, { "epoch": 3.815169194865811, "grad_norm": 0.6953125, "learning_rate": 0.00013642740693824913, "loss": 0.0166, "step": 8174 }, { "epoch": 3.8156359393232204, "grad_norm": 0.51171875, "learning_rate": 0.00013641374125210153, "loss": 0.0123, "step": 8175 }, { "epoch": 3.8161026837806302, "grad_norm": 0.56640625, "learning_rate": 0.00013640007478192562, "loss": 0.0121, "step": 8176 }, { "epoch": 3.8165694282380396, "grad_norm": 0.53125, "learning_rate": 0.0001363864075280157, "loss": 0.0109, "step": 8177 }, { "epoch": 3.8170361726954494, "grad_norm": 0.447265625, "learning_rate": 0.00013637273949066608, "loss": 0.0105, "step": 8178 }, { "epoch": 3.8175029171528587, "grad_norm": 0.65234375, "learning_rate": 0.00013635907067017104, "loss": 0.0124, "step": 8179 }, { "epoch": 3.8179696616102685, "grad_norm": 0.74609375, "learning_rate": 0.00013634540106682483, "loss": 0.019, "step": 8180 }, { "epoch": 3.818436406067678, "grad_norm": 0.97265625, "learning_rate": 0.00013633173068092183, "loss": 0.022, "step": 8181 }, { "epoch": 3.8189031505250877, "grad_norm": 0.703125, "learning_rate": 0.00013631805951275637, "loss": 0.017, "step": 8182 }, { "epoch": 3.819369894982497, "grad_norm": 0.58203125, "learning_rate": 0.00013630438756262277, "loss": 0.0145, "step": 8183 }, { "epoch": 3.819836639439907, "grad_norm": 0.49609375, "learning_rate": 0.00013629071483081543, "loss": 0.013, "step": 8184 }, { "epoch": 3.820303383897316, "grad_norm": 0.80078125, "learning_rate": 0.00013627704131762876, "loss": 0.0255, "step": 8185 }, { "epoch": 3.8207701283547255, "grad_norm": 0.8125, "learning_rate": 0.00013626336702335712, "loss": 0.0425, "step": 8186 }, { "epoch": 3.8212368728121353, "grad_norm": 0.5390625, "learning_rate": 0.00013624969194829493, "loss": 0.0193, "step": 8187 }, { "epoch": 3.821703617269545, "grad_norm": 0.478515625, "learning_rate": 0.00013623601609273668, "loss": 0.0128, "step": 8188 }, { "epoch": 3.8221703617269545, "grad_norm": 0.77734375, "learning_rate": 0.0001362223394569768, "loss": 0.0207, "step": 8189 }, { "epoch": 3.822637106184364, "grad_norm": 0.76171875, "learning_rate": 0.00013620866204130976, "loss": 0.0238, "step": 8190 }, { "epoch": 3.8231038506417736, "grad_norm": 0.5625, "learning_rate": 0.00013619498384603005, "loss": 0.014, "step": 8191 }, { "epoch": 3.8235705950991834, "grad_norm": 0.53515625, "learning_rate": 0.00013618130487143217, "loss": 0.0117, "step": 8192 }, { "epoch": 3.8240373395565928, "grad_norm": 0.5234375, "learning_rate": 0.00013616762511781068, "loss": 0.0154, "step": 8193 }, { "epoch": 3.824504084014002, "grad_norm": 0.546875, "learning_rate": 0.00013615394458546006, "loss": 0.0196, "step": 8194 }, { "epoch": 3.824970828471412, "grad_norm": 0.455078125, "learning_rate": 0.00013614026327467494, "loss": 0.0092, "step": 8195 }, { "epoch": 3.8254375729288217, "grad_norm": 0.66796875, "learning_rate": 0.00013612658118574979, "loss": 0.024, "step": 8196 }, { "epoch": 3.825904317386231, "grad_norm": 0.79296875, "learning_rate": 0.00013611289831897932, "loss": 0.0199, "step": 8197 }, { "epoch": 3.8263710618436404, "grad_norm": 0.73828125, "learning_rate": 0.00013609921467465808, "loss": 0.0191, "step": 8198 }, { "epoch": 3.82683780630105, "grad_norm": 0.4140625, "learning_rate": 0.00013608553025308066, "loss": 0.0113, "step": 8199 }, { "epoch": 3.82730455075846, "grad_norm": 0.7109375, "learning_rate": 0.00013607184505454173, "loss": 0.0138, "step": 8200 }, { "epoch": 3.8277712952158693, "grad_norm": 0.609375, "learning_rate": 0.00013605815907933598, "loss": 0.0191, "step": 8201 }, { "epoch": 3.8282380396732787, "grad_norm": 0.55859375, "learning_rate": 0.00013604447232775808, "loss": 0.0149, "step": 8202 }, { "epoch": 3.8287047841306885, "grad_norm": 0.4453125, "learning_rate": 0.00013603078480010265, "loss": 0.0108, "step": 8203 }, { "epoch": 3.8291715285880983, "grad_norm": 0.48046875, "learning_rate": 0.00013601709649666447, "loss": 0.0162, "step": 8204 }, { "epoch": 3.8296382730455076, "grad_norm": 0.5625, "learning_rate": 0.00013600340741773824, "loss": 0.021, "step": 8205 }, { "epoch": 3.830105017502917, "grad_norm": 0.7109375, "learning_rate": 0.0001359897175636187, "loss": 0.0152, "step": 8206 }, { "epoch": 3.830571761960327, "grad_norm": 0.5703125, "learning_rate": 0.0001359760269346006, "loss": 0.0191, "step": 8207 }, { "epoch": 3.831038506417736, "grad_norm": 0.59765625, "learning_rate": 0.00013596233553097876, "loss": 0.0154, "step": 8208 }, { "epoch": 3.831505250875146, "grad_norm": 0.7578125, "learning_rate": 0.0001359486433530479, "loss": 0.0244, "step": 8209 }, { "epoch": 3.8319719953325553, "grad_norm": 0.71484375, "learning_rate": 0.00013593495040110283, "loss": 0.014, "step": 8210 }, { "epoch": 3.832438739789965, "grad_norm": 0.640625, "learning_rate": 0.0001359212566754385, "loss": 0.0131, "step": 8211 }, { "epoch": 3.8329054842473744, "grad_norm": 0.62890625, "learning_rate": 0.0001359075621763496, "loss": 0.0135, "step": 8212 }, { "epoch": 3.8333722287047842, "grad_norm": 0.70703125, "learning_rate": 0.00013589386690413105, "loss": 0.0198, "step": 8213 }, { "epoch": 3.8338389731621936, "grad_norm": 0.58203125, "learning_rate": 0.0001358801708590777, "loss": 0.0122, "step": 8214 }, { "epoch": 3.8343057176196034, "grad_norm": 0.82421875, "learning_rate": 0.0001358664740414845, "loss": 0.0282, "step": 8215 }, { "epoch": 3.8347724620770127, "grad_norm": 0.423828125, "learning_rate": 0.0001358527764516463, "loss": 0.0154, "step": 8216 }, { "epoch": 3.8352392065344225, "grad_norm": 0.6796875, "learning_rate": 0.00013583907808985805, "loss": 0.0204, "step": 8217 }, { "epoch": 3.835705950991832, "grad_norm": 0.6328125, "learning_rate": 0.0001358253789564147, "loss": 0.0223, "step": 8218 }, { "epoch": 3.8361726954492417, "grad_norm": 0.5625, "learning_rate": 0.0001358116790516112, "loss": 0.0162, "step": 8219 }, { "epoch": 3.836639439906651, "grad_norm": 0.59765625, "learning_rate": 0.00013579797837574248, "loss": 0.0192, "step": 8220 }, { "epoch": 3.837106184364061, "grad_norm": 0.6171875, "learning_rate": 0.00013578427692910358, "loss": 0.0163, "step": 8221 }, { "epoch": 3.83757292882147, "grad_norm": 0.6328125, "learning_rate": 0.0001357705747119895, "loss": 0.0163, "step": 8222 }, { "epoch": 3.83803967327888, "grad_norm": 0.52734375, "learning_rate": 0.00013575687172469526, "loss": 0.0141, "step": 8223 }, { "epoch": 3.8385064177362893, "grad_norm": 0.56640625, "learning_rate": 0.0001357431679675159, "loss": 0.0137, "step": 8224 }, { "epoch": 3.838973162193699, "grad_norm": 0.66796875, "learning_rate": 0.00013572946344074646, "loss": 0.0146, "step": 8225 }, { "epoch": 3.8394399066511085, "grad_norm": 0.458984375, "learning_rate": 0.000135715758144682, "loss": 0.0103, "step": 8226 }, { "epoch": 3.8399066511085183, "grad_norm": 0.796875, "learning_rate": 0.00013570205207961771, "loss": 0.0221, "step": 8227 }, { "epoch": 3.8403733955659276, "grad_norm": 0.55078125, "learning_rate": 0.00013568834524584859, "loss": 0.0187, "step": 8228 }, { "epoch": 3.840840140023337, "grad_norm": 0.65625, "learning_rate": 0.0001356746376436698, "loss": 0.0165, "step": 8229 }, { "epoch": 3.8413068844807468, "grad_norm": 0.55859375, "learning_rate": 0.00013566092927337647, "loss": 0.0149, "step": 8230 }, { "epoch": 3.8417736289381565, "grad_norm": 0.6953125, "learning_rate": 0.00013564722013526377, "loss": 0.0157, "step": 8231 }, { "epoch": 3.842240373395566, "grad_norm": 0.58203125, "learning_rate": 0.00013563351022962688, "loss": 0.0181, "step": 8232 }, { "epoch": 3.8427071178529753, "grad_norm": 0.62890625, "learning_rate": 0.00013561979955676099, "loss": 0.0149, "step": 8233 }, { "epoch": 3.843173862310385, "grad_norm": 0.69921875, "learning_rate": 0.0001356060881169613, "loss": 0.0165, "step": 8234 }, { "epoch": 3.843640606767795, "grad_norm": 0.66796875, "learning_rate": 0.000135592375910523, "loss": 0.0134, "step": 8235 }, { "epoch": 3.844107351225204, "grad_norm": 0.5859375, "learning_rate": 0.00013557866293774135, "loss": 0.0173, "step": 8236 }, { "epoch": 3.8445740956826135, "grad_norm": 0.62890625, "learning_rate": 0.00013556494919891163, "loss": 0.0147, "step": 8237 }, { "epoch": 3.8450408401400233, "grad_norm": 0.498046875, "learning_rate": 0.0001355512346943291, "loss": 0.0093, "step": 8238 }, { "epoch": 3.845507584597433, "grad_norm": 0.640625, "learning_rate": 0.00013553751942428904, "loss": 0.0189, "step": 8239 }, { "epoch": 3.8459743290548425, "grad_norm": 0.7265625, "learning_rate": 0.00013552380338908675, "loss": 0.0166, "step": 8240 }, { "epoch": 3.846441073512252, "grad_norm": 0.63671875, "learning_rate": 0.00013551008658901754, "loss": 0.0236, "step": 8241 }, { "epoch": 3.8469078179696616, "grad_norm": 0.63671875, "learning_rate": 0.00013549636902437682, "loss": 0.0214, "step": 8242 }, { "epoch": 3.8473745624270714, "grad_norm": 0.671875, "learning_rate": 0.00013548265069545984, "loss": 0.0159, "step": 8243 }, { "epoch": 3.847841306884481, "grad_norm": 0.6875, "learning_rate": 0.00013546893160256205, "loss": 0.0185, "step": 8244 }, { "epoch": 3.84830805134189, "grad_norm": 0.52734375, "learning_rate": 0.00013545521174597878, "loss": 0.0213, "step": 8245 }, { "epoch": 3.8487747957993, "grad_norm": 0.7421875, "learning_rate": 0.00013544149112600548, "loss": 0.0175, "step": 8246 }, { "epoch": 3.8492415402567097, "grad_norm": 0.75, "learning_rate": 0.00013542776974293758, "loss": 0.0191, "step": 8247 }, { "epoch": 3.849708284714119, "grad_norm": 0.62890625, "learning_rate": 0.00013541404759707045, "loss": 0.0151, "step": 8248 }, { "epoch": 3.8501750291715284, "grad_norm": 0.73046875, "learning_rate": 0.00013540032468869958, "loss": 0.0192, "step": 8249 }, { "epoch": 3.8506417736289382, "grad_norm": 0.6875, "learning_rate": 0.00013538660101812048, "loss": 0.0314, "step": 8250 }, { "epoch": 3.8511085180863476, "grad_norm": 0.7265625, "learning_rate": 0.00013537287658562858, "loss": 0.0158, "step": 8251 }, { "epoch": 3.8515752625437574, "grad_norm": 0.4765625, "learning_rate": 0.00013535915139151937, "loss": 0.0157, "step": 8252 }, { "epoch": 3.8520420070011667, "grad_norm": 0.89453125, "learning_rate": 0.00013534542543608844, "loss": 0.0316, "step": 8253 }, { "epoch": 3.8525087514585765, "grad_norm": 0.5859375, "learning_rate": 0.00013533169871963126, "loss": 0.0172, "step": 8254 }, { "epoch": 3.852975495915986, "grad_norm": 0.5234375, "learning_rate": 0.0001353179712424434, "loss": 0.0156, "step": 8255 }, { "epoch": 3.8534422403733957, "grad_norm": 0.52734375, "learning_rate": 0.00013530424300482044, "loss": 0.0124, "step": 8256 }, { "epoch": 3.853908984830805, "grad_norm": 0.64453125, "learning_rate": 0.000135290514007058, "loss": 0.0212, "step": 8257 }, { "epoch": 3.854375729288215, "grad_norm": 0.54296875, "learning_rate": 0.0001352767842494516, "loss": 0.0095, "step": 8258 }, { "epoch": 3.854842473745624, "grad_norm": 0.796875, "learning_rate": 0.00013526305373229687, "loss": 0.0252, "step": 8259 }, { "epoch": 3.855309218203034, "grad_norm": 0.7421875, "learning_rate": 0.00013524932245588954, "loss": 0.0215, "step": 8260 }, { "epoch": 3.8557759626604433, "grad_norm": 0.78125, "learning_rate": 0.00013523559042052515, "loss": 0.0237, "step": 8261 }, { "epoch": 3.856242707117853, "grad_norm": 0.5390625, "learning_rate": 0.00013522185762649937, "loss": 0.0183, "step": 8262 }, { "epoch": 3.8567094515752625, "grad_norm": 0.63671875, "learning_rate": 0.00013520812407410795, "loss": 0.0209, "step": 8263 }, { "epoch": 3.8571761960326723, "grad_norm": 0.59375, "learning_rate": 0.00013519438976364656, "loss": 0.0159, "step": 8264 }, { "epoch": 3.8576429404900816, "grad_norm": 0.5078125, "learning_rate": 0.00013518065469541086, "loss": 0.0209, "step": 8265 }, { "epoch": 3.8581096849474914, "grad_norm": 0.609375, "learning_rate": 0.0001351669188696967, "loss": 0.0097, "step": 8266 }, { "epoch": 3.8585764294049008, "grad_norm": 0.52734375, "learning_rate": 0.00013515318228679975, "loss": 0.0182, "step": 8267 }, { "epoch": 3.8590431738623105, "grad_norm": 0.66796875, "learning_rate": 0.00013513944494701572, "loss": 0.0196, "step": 8268 }, { "epoch": 3.85950991831972, "grad_norm": 0.61328125, "learning_rate": 0.0001351257068506405, "loss": 0.0201, "step": 8269 }, { "epoch": 3.8599766627771297, "grad_norm": 0.8125, "learning_rate": 0.00013511196799796983, "loss": 0.0329, "step": 8270 }, { "epoch": 3.860443407234539, "grad_norm": 0.78515625, "learning_rate": 0.00013509822838929952, "loss": 0.0202, "step": 8271 }, { "epoch": 3.8609101516919484, "grad_norm": 0.515625, "learning_rate": 0.00013508448802492543, "loss": 0.0136, "step": 8272 }, { "epoch": 3.861376896149358, "grad_norm": 0.52734375, "learning_rate": 0.00013507074690514339, "loss": 0.0146, "step": 8273 }, { "epoch": 3.861843640606768, "grad_norm": 0.58203125, "learning_rate": 0.00013505700503024925, "loss": 0.0187, "step": 8274 }, { "epoch": 3.8623103850641773, "grad_norm": 0.65625, "learning_rate": 0.00013504326240053888, "loss": 0.0199, "step": 8275 }, { "epoch": 3.8627771295215867, "grad_norm": 0.60546875, "learning_rate": 0.0001350295190163082, "loss": 0.0137, "step": 8276 }, { "epoch": 3.8632438739789965, "grad_norm": 0.60546875, "learning_rate": 0.00013501577487785312, "loss": 0.0163, "step": 8277 }, { "epoch": 3.8637106184364063, "grad_norm": 0.56640625, "learning_rate": 0.00013500202998546954, "loss": 0.0213, "step": 8278 }, { "epoch": 3.8641773628938156, "grad_norm": 1.1171875, "learning_rate": 0.00013498828433945343, "loss": 0.0264, "step": 8279 }, { "epoch": 3.864644107351225, "grad_norm": 0.62890625, "learning_rate": 0.00013497453794010072, "loss": 0.0136, "step": 8280 }, { "epoch": 3.865110851808635, "grad_norm": 0.66015625, "learning_rate": 0.00013496079078770742, "loss": 0.0162, "step": 8281 }, { "epoch": 3.8655775962660446, "grad_norm": 0.5703125, "learning_rate": 0.0001349470428825695, "loss": 0.0114, "step": 8282 }, { "epoch": 3.866044340723454, "grad_norm": 0.66015625, "learning_rate": 0.00013493329422498298, "loss": 0.017, "step": 8283 }, { "epoch": 3.8665110851808633, "grad_norm": 0.6953125, "learning_rate": 0.00013491954481524386, "loss": 0.0134, "step": 8284 }, { "epoch": 3.866977829638273, "grad_norm": 0.61328125, "learning_rate": 0.00013490579465364822, "loss": 0.0196, "step": 8285 }, { "epoch": 3.867444574095683, "grad_norm": 0.490234375, "learning_rate": 0.00013489204374049206, "loss": 0.0164, "step": 8286 }, { "epoch": 3.8679113185530922, "grad_norm": 0.62109375, "learning_rate": 0.00013487829207607153, "loss": 0.0233, "step": 8287 }, { "epoch": 3.8683780630105016, "grad_norm": 0.3203125, "learning_rate": 0.00013486453966068263, "loss": 0.0123, "step": 8288 }, { "epoch": 3.8688448074679114, "grad_norm": 0.58984375, "learning_rate": 0.00013485078649462152, "loss": 0.0147, "step": 8289 }, { "epoch": 3.8693115519253207, "grad_norm": 0.49609375, "learning_rate": 0.00013483703257818433, "loss": 0.0108, "step": 8290 }, { "epoch": 3.8697782963827305, "grad_norm": 0.75, "learning_rate": 0.00013482327791166716, "loss": 0.0187, "step": 8291 }, { "epoch": 3.87024504084014, "grad_norm": 0.65234375, "learning_rate": 0.00013480952249536618, "loss": 0.019, "step": 8292 }, { "epoch": 3.8707117852975497, "grad_norm": 0.52734375, "learning_rate": 0.00013479576632957757, "loss": 0.0214, "step": 8293 }, { "epoch": 3.871178529754959, "grad_norm": 0.5859375, "learning_rate": 0.00013478200941459748, "loss": 0.0194, "step": 8294 }, { "epoch": 3.871645274212369, "grad_norm": 0.62890625, "learning_rate": 0.00013476825175072214, "loss": 0.0157, "step": 8295 }, { "epoch": 3.872112018669778, "grad_norm": 0.52734375, "learning_rate": 0.00013475449333824778, "loss": 0.0132, "step": 8296 }, { "epoch": 3.872578763127188, "grad_norm": 0.74609375, "learning_rate": 0.0001347407341774706, "loss": 0.018, "step": 8297 }, { "epoch": 3.8730455075845973, "grad_norm": 0.56640625, "learning_rate": 0.00013472697426868688, "loss": 0.0166, "step": 8298 }, { "epoch": 3.873512252042007, "grad_norm": 0.734375, "learning_rate": 0.00013471321361219288, "loss": 0.0217, "step": 8299 }, { "epoch": 3.8739789964994165, "grad_norm": 0.73046875, "learning_rate": 0.00013469945220828488, "loss": 0.0242, "step": 8300 }, { "epoch": 3.8744457409568263, "grad_norm": 0.412109375, "learning_rate": 0.00013468569005725914, "loss": 0.0111, "step": 8301 }, { "epoch": 3.8749124854142356, "grad_norm": 0.55859375, "learning_rate": 0.00013467192715941204, "loss": 0.0101, "step": 8302 }, { "epoch": 3.8753792298716454, "grad_norm": 0.455078125, "learning_rate": 0.00013465816351503987, "loss": 0.0126, "step": 8303 }, { "epoch": 3.8758459743290548, "grad_norm": 0.59375, "learning_rate": 0.000134644399124439, "loss": 0.0147, "step": 8304 }, { "epoch": 3.8763127187864646, "grad_norm": 0.419921875, "learning_rate": 0.00013463063398790574, "loss": 0.0084, "step": 8305 }, { "epoch": 3.876779463243874, "grad_norm": 0.703125, "learning_rate": 0.00013461686810573655, "loss": 0.0131, "step": 8306 }, { "epoch": 3.8772462077012837, "grad_norm": 0.56640625, "learning_rate": 0.00013460310147822777, "loss": 0.0125, "step": 8307 }, { "epoch": 3.877712952158693, "grad_norm": 0.66796875, "learning_rate": 0.00013458933410567578, "loss": 0.0107, "step": 8308 }, { "epoch": 3.878179696616103, "grad_norm": 0.67578125, "learning_rate": 0.0001345755659883771, "loss": 0.0222, "step": 8309 }, { "epoch": 3.878646441073512, "grad_norm": 0.5078125, "learning_rate": 0.0001345617971266281, "loss": 0.0163, "step": 8310 }, { "epoch": 3.8791131855309215, "grad_norm": 0.57421875, "learning_rate": 0.00013454802752072527, "loss": 0.0193, "step": 8311 }, { "epoch": 3.8795799299883313, "grad_norm": 0.73828125, "learning_rate": 0.0001345342571709651, "loss": 0.0157, "step": 8312 }, { "epoch": 3.880046674445741, "grad_norm": 0.57421875, "learning_rate": 0.00013452048607764402, "loss": 0.0203, "step": 8313 }, { "epoch": 3.8805134189031505, "grad_norm": 0.54296875, "learning_rate": 0.00013450671424105858, "loss": 0.0155, "step": 8314 }, { "epoch": 3.88098016336056, "grad_norm": 0.59375, "learning_rate": 0.0001344929416615053, "loss": 0.0119, "step": 8315 }, { "epoch": 3.8814469078179696, "grad_norm": 0.51171875, "learning_rate": 0.0001344791683392807, "loss": 0.0104, "step": 8316 }, { "epoch": 3.8819136522753794, "grad_norm": 0.63671875, "learning_rate": 0.00013446539427468138, "loss": 0.0208, "step": 8317 }, { "epoch": 3.882380396732789, "grad_norm": 0.5390625, "learning_rate": 0.00013445161946800386, "loss": 0.0122, "step": 8318 }, { "epoch": 3.882847141190198, "grad_norm": 0.54296875, "learning_rate": 0.00013443784391954476, "loss": 0.0155, "step": 8319 }, { "epoch": 3.883313885647608, "grad_norm": 0.5703125, "learning_rate": 0.00013442406762960064, "loss": 0.0146, "step": 8320 }, { "epoch": 3.8837806301050177, "grad_norm": 0.76171875, "learning_rate": 0.0001344102905984682, "loss": 0.023, "step": 8321 }, { "epoch": 3.884247374562427, "grad_norm": 0.578125, "learning_rate": 0.000134396512826444, "loss": 0.0185, "step": 8322 }, { "epoch": 3.8847141190198364, "grad_norm": 0.4453125, "learning_rate": 0.0001343827343138247, "loss": 0.0092, "step": 8323 }, { "epoch": 3.8851808634772462, "grad_norm": 0.515625, "learning_rate": 0.00013436895506090699, "loss": 0.0136, "step": 8324 }, { "epoch": 3.885647607934656, "grad_norm": 0.53515625, "learning_rate": 0.00013435517506798757, "loss": 0.0122, "step": 8325 }, { "epoch": 3.8861143523920654, "grad_norm": 0.443359375, "learning_rate": 0.0001343413943353631, "loss": 0.0092, "step": 8326 }, { "epoch": 3.8865810968494747, "grad_norm": 0.65625, "learning_rate": 0.0001343276128633303, "loss": 0.0245, "step": 8327 }, { "epoch": 3.8870478413068845, "grad_norm": 0.71875, "learning_rate": 0.0001343138306521859, "loss": 0.0163, "step": 8328 }, { "epoch": 3.8875145857642943, "grad_norm": 0.431640625, "learning_rate": 0.00013430004770222665, "loss": 0.0152, "step": 8329 }, { "epoch": 3.8879813302217037, "grad_norm": 0.5703125, "learning_rate": 0.00013428626401374932, "loss": 0.0122, "step": 8330 }, { "epoch": 3.888448074679113, "grad_norm": 0.65234375, "learning_rate": 0.00013427247958705071, "loss": 0.017, "step": 8331 }, { "epoch": 3.888914819136523, "grad_norm": 0.66796875, "learning_rate": 0.00013425869442242757, "loss": 0.0171, "step": 8332 }, { "epoch": 3.889381563593932, "grad_norm": 0.625, "learning_rate": 0.00013424490852017676, "loss": 0.0222, "step": 8333 }, { "epoch": 3.889848308051342, "grad_norm": 0.63671875, "learning_rate": 0.00013423112188059504, "loss": 0.0174, "step": 8334 }, { "epoch": 3.8903150525087513, "grad_norm": 0.470703125, "learning_rate": 0.0001342173345039793, "loss": 0.0091, "step": 8335 }, { "epoch": 3.890781796966161, "grad_norm": 0.796875, "learning_rate": 0.00013420354639062635, "loss": 0.0189, "step": 8336 }, { "epoch": 3.8912485414235705, "grad_norm": 0.55859375, "learning_rate": 0.00013418975754083312, "loss": 0.0137, "step": 8337 }, { "epoch": 3.8917152858809803, "grad_norm": 0.5703125, "learning_rate": 0.00013417596795489652, "loss": 0.0095, "step": 8338 }, { "epoch": 3.8921820303383896, "grad_norm": 0.55078125, "learning_rate": 0.00013416217763311336, "loss": 0.0138, "step": 8339 }, { "epoch": 3.8926487747957994, "grad_norm": 0.71875, "learning_rate": 0.0001341483865757806, "loss": 0.0214, "step": 8340 }, { "epoch": 3.8931155192532088, "grad_norm": 0.435546875, "learning_rate": 0.00013413459478319522, "loss": 0.0118, "step": 8341 }, { "epoch": 3.8935822637106186, "grad_norm": 0.64453125, "learning_rate": 0.00013412080225565413, "loss": 0.0226, "step": 8342 }, { "epoch": 3.894049008168028, "grad_norm": 0.734375, "learning_rate": 0.0001341070089934543, "loss": 0.014, "step": 8343 }, { "epoch": 3.8945157526254377, "grad_norm": 0.5859375, "learning_rate": 0.00013409321499689274, "loss": 0.0128, "step": 8344 }, { "epoch": 3.894982497082847, "grad_norm": 0.6171875, "learning_rate": 0.00013407942026626642, "loss": 0.0142, "step": 8345 }, { "epoch": 3.895449241540257, "grad_norm": 0.546875, "learning_rate": 0.0001340656248018724, "loss": 0.0144, "step": 8346 }, { "epoch": 3.895915985997666, "grad_norm": 0.54296875, "learning_rate": 0.00013405182860400764, "loss": 0.0181, "step": 8347 }, { "epoch": 3.896382730455076, "grad_norm": 0.44140625, "learning_rate": 0.00013403803167296927, "loss": 0.0168, "step": 8348 }, { "epoch": 3.8968494749124853, "grad_norm": 0.40234375, "learning_rate": 0.00013402423400905427, "loss": 0.0156, "step": 8349 }, { "epoch": 3.897316219369895, "grad_norm": 0.61328125, "learning_rate": 0.0001340104356125598, "loss": 0.0128, "step": 8350 }, { "epoch": 3.8977829638273045, "grad_norm": 0.87890625, "learning_rate": 0.00013399663648378288, "loss": 0.0193, "step": 8351 }, { "epoch": 3.8982497082847143, "grad_norm": 0.51953125, "learning_rate": 0.0001339828366230207, "loss": 0.0123, "step": 8352 }, { "epoch": 3.8987164527421236, "grad_norm": 0.69921875, "learning_rate": 0.0001339690360305703, "loss": 0.0151, "step": 8353 }, { "epoch": 3.899183197199533, "grad_norm": 0.73046875, "learning_rate": 0.00013395523470672888, "loss": 0.0172, "step": 8354 }, { "epoch": 3.899649941656943, "grad_norm": 0.43359375, "learning_rate": 0.0001339414326517936, "loss": 0.0092, "step": 8355 }, { "epoch": 3.9001166861143526, "grad_norm": 0.703125, "learning_rate": 0.0001339276298660616, "loss": 0.0193, "step": 8356 }, { "epoch": 3.900583430571762, "grad_norm": 0.6171875, "learning_rate": 0.00013391382634983008, "loss": 0.0206, "step": 8357 }, { "epoch": 3.9010501750291713, "grad_norm": 0.5546875, "learning_rate": 0.00013390002210339626, "loss": 0.0206, "step": 8358 }, { "epoch": 3.901516919486581, "grad_norm": 0.50390625, "learning_rate": 0.00013388621712705735, "loss": 0.0133, "step": 8359 }, { "epoch": 3.901983663943991, "grad_norm": 0.7109375, "learning_rate": 0.00013387241142111058, "loss": 0.0206, "step": 8360 }, { "epoch": 3.9024504084014002, "grad_norm": 0.53125, "learning_rate": 0.00013385860498585323, "loss": 0.0154, "step": 8361 }, { "epoch": 3.9029171528588096, "grad_norm": 0.53515625, "learning_rate": 0.00013384479782158255, "loss": 0.0107, "step": 8362 }, { "epoch": 3.9033838973162194, "grad_norm": 0.6953125, "learning_rate": 0.00013383098992859579, "loss": 0.0177, "step": 8363 }, { "epoch": 3.903850641773629, "grad_norm": 0.515625, "learning_rate": 0.0001338171813071903, "loss": 0.0092, "step": 8364 }, { "epoch": 3.9043173862310385, "grad_norm": 0.51171875, "learning_rate": 0.0001338033719576634, "loss": 0.0103, "step": 8365 }, { "epoch": 3.904784130688448, "grad_norm": 0.8203125, "learning_rate": 0.00013378956188031234, "loss": 0.0228, "step": 8366 }, { "epoch": 3.9052508751458577, "grad_norm": 0.55859375, "learning_rate": 0.00013377575107543455, "loss": 0.0199, "step": 8367 }, { "epoch": 3.9057176196032675, "grad_norm": 0.64453125, "learning_rate": 0.0001337619395433274, "loss": 0.0201, "step": 8368 }, { "epoch": 3.906184364060677, "grad_norm": 0.76953125, "learning_rate": 0.00013374812728428817, "loss": 0.022, "step": 8369 }, { "epoch": 3.906651108518086, "grad_norm": 0.5390625, "learning_rate": 0.00013373431429861436, "loss": 0.0135, "step": 8370 }, { "epoch": 3.907117852975496, "grad_norm": 0.58203125, "learning_rate": 0.0001337205005866033, "loss": 0.0154, "step": 8371 }, { "epoch": 3.9075845974329058, "grad_norm": 0.65234375, "learning_rate": 0.00013370668614855248, "loss": 0.0217, "step": 8372 }, { "epoch": 3.908051341890315, "grad_norm": 0.60546875, "learning_rate": 0.00013369287098475928, "loss": 0.0177, "step": 8373 }, { "epoch": 3.9085180863477245, "grad_norm": 0.5546875, "learning_rate": 0.0001336790550955212, "loss": 0.0167, "step": 8374 }, { "epoch": 3.9089848308051343, "grad_norm": 0.447265625, "learning_rate": 0.00013366523848113573, "loss": 0.0143, "step": 8375 }, { "epoch": 3.9094515752625436, "grad_norm": 0.46484375, "learning_rate": 0.00013365142114190025, "loss": 0.0146, "step": 8376 }, { "epoch": 3.9099183197199534, "grad_norm": 0.44921875, "learning_rate": 0.0001336376030781124, "loss": 0.0103, "step": 8377 }, { "epoch": 3.9103850641773628, "grad_norm": 0.5234375, "learning_rate": 0.0001336237842900696, "loss": 0.0143, "step": 8378 }, { "epoch": 3.9108518086347726, "grad_norm": 0.4453125, "learning_rate": 0.00013360996477806942, "loss": 0.0123, "step": 8379 }, { "epoch": 3.911318553092182, "grad_norm": 0.6015625, "learning_rate": 0.00013359614454240944, "loss": 0.0203, "step": 8380 }, { "epoch": 3.9117852975495917, "grad_norm": 0.33984375, "learning_rate": 0.00013358232358338715, "loss": 0.0104, "step": 8381 }, { "epoch": 3.912252042007001, "grad_norm": 0.640625, "learning_rate": 0.0001335685019013002, "loss": 0.0224, "step": 8382 }, { "epoch": 3.912718786464411, "grad_norm": 0.423828125, "learning_rate": 0.00013355467949644615, "loss": 0.0166, "step": 8383 }, { "epoch": 3.91318553092182, "grad_norm": 0.498046875, "learning_rate": 0.0001335408563691226, "loss": 0.0148, "step": 8384 }, { "epoch": 3.91365227537923, "grad_norm": 0.69140625, "learning_rate": 0.00013352703251962725, "loss": 0.0202, "step": 8385 }, { "epoch": 3.9141190198366393, "grad_norm": 0.51953125, "learning_rate": 0.00013351320794825766, "loss": 0.0136, "step": 8386 }, { "epoch": 3.914585764294049, "grad_norm": 0.58203125, "learning_rate": 0.00013349938265531152, "loss": 0.0211, "step": 8387 }, { "epoch": 3.9150525087514585, "grad_norm": 0.68359375, "learning_rate": 0.0001334855566410865, "loss": 0.0239, "step": 8388 }, { "epoch": 3.9155192532088683, "grad_norm": 0.8828125, "learning_rate": 0.0001334717299058803, "loss": 0.0209, "step": 8389 }, { "epoch": 3.9159859976662776, "grad_norm": 0.5625, "learning_rate": 0.00013345790244999062, "loss": 0.015, "step": 8390 }, { "epoch": 3.9164527421236874, "grad_norm": 0.50390625, "learning_rate": 0.00013344407427371517, "loss": 0.0067, "step": 8391 }, { "epoch": 3.916919486581097, "grad_norm": 0.392578125, "learning_rate": 0.00013343024537735169, "loss": 0.0098, "step": 8392 }, { "epoch": 3.9173862310385066, "grad_norm": 0.55078125, "learning_rate": 0.00013341641576119796, "loss": 0.017, "step": 8393 }, { "epoch": 3.917852975495916, "grad_norm": 0.38671875, "learning_rate": 0.0001334025854255517, "loss": 0.0122, "step": 8394 }, { "epoch": 3.9183197199533257, "grad_norm": 0.52734375, "learning_rate": 0.0001333887543707107, "loss": 0.0168, "step": 8395 }, { "epoch": 3.918786464410735, "grad_norm": 0.55859375, "learning_rate": 0.0001333749225969728, "loss": 0.0097, "step": 8396 }, { "epoch": 3.9192532088681444, "grad_norm": 0.50390625, "learning_rate": 0.00013336109010463577, "loss": 0.0169, "step": 8397 }, { "epoch": 3.9197199533255542, "grad_norm": 0.73046875, "learning_rate": 0.00013334725689399747, "loss": 0.0274, "step": 8398 }, { "epoch": 3.920186697782964, "grad_norm": 0.427734375, "learning_rate": 0.0001333334229653557, "loss": 0.0134, "step": 8399 }, { "epoch": 3.9206534422403734, "grad_norm": 0.75390625, "learning_rate": 0.00013331958831900838, "loss": 0.02, "step": 8400 }, { "epoch": 3.9211201866977827, "grad_norm": 0.333984375, "learning_rate": 0.00013330575295525336, "loss": 0.0079, "step": 8401 }, { "epoch": 3.9215869311551925, "grad_norm": 0.486328125, "learning_rate": 0.00013329191687438851, "loss": 0.0203, "step": 8402 }, { "epoch": 3.9220536756126023, "grad_norm": 0.58203125, "learning_rate": 0.00013327808007671178, "loss": 0.0109, "step": 8403 }, { "epoch": 3.9225204200700117, "grad_norm": 0.66015625, "learning_rate": 0.000133264242562521, "loss": 0.016, "step": 8404 }, { "epoch": 3.922987164527421, "grad_norm": 0.4765625, "learning_rate": 0.0001332504043321142, "loss": 0.0119, "step": 8405 }, { "epoch": 3.923453908984831, "grad_norm": 0.5859375, "learning_rate": 0.00013323656538578933, "loss": 0.0148, "step": 8406 }, { "epoch": 3.9239206534422406, "grad_norm": 0.396484375, "learning_rate": 0.0001332227257238443, "loss": 0.0086, "step": 8407 }, { "epoch": 3.92438739789965, "grad_norm": 0.703125, "learning_rate": 0.00013320888534657717, "loss": 0.019, "step": 8408 }, { "epoch": 3.9248541423570593, "grad_norm": 0.369140625, "learning_rate": 0.00013319504425428585, "loss": 0.0081, "step": 8409 }, { "epoch": 3.925320886814469, "grad_norm": 0.56640625, "learning_rate": 0.0001331812024472684, "loss": 0.0157, "step": 8410 }, { "epoch": 3.925787631271879, "grad_norm": 0.52734375, "learning_rate": 0.00013316735992582282, "loss": 0.0116, "step": 8411 }, { "epoch": 3.9262543757292883, "grad_norm": 0.48828125, "learning_rate": 0.00013315351669024726, "loss": 0.0093, "step": 8412 }, { "epoch": 3.9267211201866976, "grad_norm": 0.58203125, "learning_rate": 0.00013313967274083965, "loss": 0.0153, "step": 8413 }, { "epoch": 3.9271878646441074, "grad_norm": 0.8984375, "learning_rate": 0.0001331258280778981, "loss": 0.0222, "step": 8414 }, { "epoch": 3.9276546091015168, "grad_norm": 0.7265625, "learning_rate": 0.00013311198270172074, "loss": 0.0207, "step": 8415 }, { "epoch": 3.9281213535589266, "grad_norm": 0.341796875, "learning_rate": 0.00013309813661260567, "loss": 0.0067, "step": 8416 }, { "epoch": 3.928588098016336, "grad_norm": 0.4765625, "learning_rate": 0.00013308428981085096, "loss": 0.0081, "step": 8417 }, { "epoch": 3.9290548424737457, "grad_norm": 0.58984375, "learning_rate": 0.0001330704422967548, "loss": 0.0149, "step": 8418 }, { "epoch": 3.929521586931155, "grad_norm": 0.68359375, "learning_rate": 0.0001330565940706153, "loss": 0.0187, "step": 8419 }, { "epoch": 3.929988331388565, "grad_norm": 0.6484375, "learning_rate": 0.0001330427451327307, "loss": 0.0128, "step": 8420 }, { "epoch": 3.930455075845974, "grad_norm": 0.490234375, "learning_rate": 0.00013302889548339914, "loss": 0.0082, "step": 8421 }, { "epoch": 3.930921820303384, "grad_norm": 0.55859375, "learning_rate": 0.00013301504512291875, "loss": 0.0115, "step": 8422 }, { "epoch": 3.9313885647607933, "grad_norm": 0.5703125, "learning_rate": 0.00013300119405158786, "loss": 0.013, "step": 8423 }, { "epoch": 3.931855309218203, "grad_norm": 0.5625, "learning_rate": 0.00013298734226970465, "loss": 0.014, "step": 8424 }, { "epoch": 3.9323220536756125, "grad_norm": 0.60546875, "learning_rate": 0.00013297348977756731, "loss": 0.0112, "step": 8425 }, { "epoch": 3.9327887981330223, "grad_norm": 0.44921875, "learning_rate": 0.00013295963657547418, "loss": 0.0096, "step": 8426 }, { "epoch": 3.9332555425904316, "grad_norm": 0.62890625, "learning_rate": 0.00013294578266372353, "loss": 0.0219, "step": 8427 }, { "epoch": 3.9337222870478414, "grad_norm": 0.52734375, "learning_rate": 0.00013293192804261357, "loss": 0.0153, "step": 8428 }, { "epoch": 3.934189031505251, "grad_norm": 0.640625, "learning_rate": 0.00013291807271244272, "loss": 0.0095, "step": 8429 }, { "epoch": 3.9346557759626606, "grad_norm": 0.5859375, "learning_rate": 0.00013290421667350923, "loss": 0.0138, "step": 8430 }, { "epoch": 3.93512252042007, "grad_norm": 0.59765625, "learning_rate": 0.00013289035992611144, "loss": 0.0098, "step": 8431 }, { "epoch": 3.9355892648774797, "grad_norm": 0.703125, "learning_rate": 0.00013287650247054765, "loss": 0.0165, "step": 8432 }, { "epoch": 3.936056009334889, "grad_norm": 0.59765625, "learning_rate": 0.00013286264430711637, "loss": 0.0121, "step": 8433 }, { "epoch": 3.936522753792299, "grad_norm": 0.77734375, "learning_rate": 0.00013284878543611589, "loss": 0.0228, "step": 8434 }, { "epoch": 3.9369894982497082, "grad_norm": 0.5703125, "learning_rate": 0.00013283492585784457, "loss": 0.0148, "step": 8435 }, { "epoch": 3.9374562427071176, "grad_norm": 0.6015625, "learning_rate": 0.00013282106557260088, "loss": 0.016, "step": 8436 }, { "epoch": 3.9379229871645274, "grad_norm": 0.57421875, "learning_rate": 0.00013280720458068324, "loss": 0.0113, "step": 8437 }, { "epoch": 3.938389731621937, "grad_norm": 0.53515625, "learning_rate": 0.00013279334288239007, "loss": 0.0154, "step": 8438 }, { "epoch": 3.9388564760793465, "grad_norm": 0.5625, "learning_rate": 0.00013277948047801984, "loss": 0.0176, "step": 8439 }, { "epoch": 3.939323220536756, "grad_norm": 0.84375, "learning_rate": 0.00013276561736787107, "loss": 0.0229, "step": 8440 }, { "epoch": 3.9397899649941657, "grad_norm": 0.466796875, "learning_rate": 0.00013275175355224213, "loss": 0.0103, "step": 8441 }, { "epoch": 3.9402567094515755, "grad_norm": 0.9765625, "learning_rate": 0.00013273788903143166, "loss": 0.0351, "step": 8442 }, { "epoch": 3.940723453908985, "grad_norm": 0.5703125, "learning_rate": 0.0001327240238057381, "loss": 0.0092, "step": 8443 }, { "epoch": 3.941190198366394, "grad_norm": 0.53125, "learning_rate": 0.00013271015787545996, "loss": 0.0102, "step": 8444 }, { "epoch": 3.941656942823804, "grad_norm": 0.8671875, "learning_rate": 0.00013269629124089587, "loss": 0.0202, "step": 8445 }, { "epoch": 3.9421236872812138, "grad_norm": 0.78515625, "learning_rate": 0.00013268242390234437, "loss": 0.012, "step": 8446 }, { "epoch": 3.942590431738623, "grad_norm": 0.427734375, "learning_rate": 0.00013266855586010397, "loss": 0.0066, "step": 8447 }, { "epoch": 3.9430571761960325, "grad_norm": 0.5546875, "learning_rate": 0.00013265468711447335, "loss": 0.0109, "step": 8448 }, { "epoch": 3.9435239206534423, "grad_norm": 0.4921875, "learning_rate": 0.00013264081766575108, "loss": 0.0104, "step": 8449 }, { "epoch": 3.943990665110852, "grad_norm": 0.318359375, "learning_rate": 0.0001326269475142358, "loss": 0.0081, "step": 8450 }, { "epoch": 3.9444574095682614, "grad_norm": 0.498046875, "learning_rate": 0.0001326130766602261, "loss": 0.0131, "step": 8451 }, { "epoch": 3.9449241540256708, "grad_norm": 0.546875, "learning_rate": 0.00013259920510402072, "loss": 0.009, "step": 8452 }, { "epoch": 3.9453908984830806, "grad_norm": 0.75390625, "learning_rate": 0.00013258533284591826, "loss": 0.0197, "step": 8453 }, { "epoch": 3.9458576429404904, "grad_norm": 0.57421875, "learning_rate": 0.0001325714598862174, "loss": 0.0131, "step": 8454 }, { "epoch": 3.9463243873978997, "grad_norm": 0.48828125, "learning_rate": 0.00013255758622521694, "loss": 0.0127, "step": 8455 }, { "epoch": 3.946791131855309, "grad_norm": 0.6328125, "learning_rate": 0.00013254371186321548, "loss": 0.0184, "step": 8456 }, { "epoch": 3.947257876312719, "grad_norm": 0.6796875, "learning_rate": 0.00013252983680051182, "loss": 0.0183, "step": 8457 }, { "epoch": 3.947724620770128, "grad_norm": 0.60546875, "learning_rate": 0.00013251596103740467, "loss": 0.0222, "step": 8458 }, { "epoch": 3.948191365227538, "grad_norm": 0.6953125, "learning_rate": 0.0001325020845741928, "loss": 0.0162, "step": 8459 }, { "epoch": 3.9486581096849473, "grad_norm": 0.53515625, "learning_rate": 0.00013248820741117498, "loss": 0.0151, "step": 8460 }, { "epoch": 3.949124854142357, "grad_norm": 0.54296875, "learning_rate": 0.00013247432954865005, "loss": 0.0141, "step": 8461 }, { "epoch": 3.9495915985997665, "grad_norm": 0.5625, "learning_rate": 0.00013246045098691672, "loss": 0.0133, "step": 8462 }, { "epoch": 3.9500583430571763, "grad_norm": 0.84765625, "learning_rate": 0.0001324465717262739, "loss": 0.0164, "step": 8463 }, { "epoch": 3.9505250875145856, "grad_norm": 0.6953125, "learning_rate": 0.00013243269176702036, "loss": 0.0142, "step": 8464 }, { "epoch": 3.9509918319719954, "grad_norm": 0.494140625, "learning_rate": 0.000132418811109455, "loss": 0.0106, "step": 8465 }, { "epoch": 3.951458576429405, "grad_norm": 0.49609375, "learning_rate": 0.00013240492975387667, "loss": 0.013, "step": 8466 }, { "epoch": 3.9519253208868146, "grad_norm": 0.5703125, "learning_rate": 0.00013239104770058425, "loss": 0.0112, "step": 8467 }, { "epoch": 3.952392065344224, "grad_norm": 0.53515625, "learning_rate": 0.00013237716494987663, "loss": 0.0181, "step": 8468 }, { "epoch": 3.9528588098016337, "grad_norm": 0.52734375, "learning_rate": 0.0001323632815020527, "loss": 0.0127, "step": 8469 }, { "epoch": 3.953325554259043, "grad_norm": 0.63671875, "learning_rate": 0.00013234939735741146, "loss": 0.019, "step": 8470 }, { "epoch": 3.953792298716453, "grad_norm": 0.69140625, "learning_rate": 0.00013233551251625177, "loss": 0.027, "step": 8471 }, { "epoch": 3.9542590431738622, "grad_norm": 0.76953125, "learning_rate": 0.0001323216269788726, "loss": 0.0181, "step": 8472 }, { "epoch": 3.954725787631272, "grad_norm": 0.44921875, "learning_rate": 0.00013230774074557297, "loss": 0.0088, "step": 8473 }, { "epoch": 3.9551925320886814, "grad_norm": 0.51171875, "learning_rate": 0.00013229385381665184, "loss": 0.0128, "step": 8474 }, { "epoch": 3.955659276546091, "grad_norm": 0.59375, "learning_rate": 0.00013227996619240818, "loss": 0.0125, "step": 8475 }, { "epoch": 3.9561260210035005, "grad_norm": 0.609375, "learning_rate": 0.00013226607787314107, "loss": 0.0149, "step": 8476 }, { "epoch": 3.9565927654609103, "grad_norm": 0.796875, "learning_rate": 0.00013225218885914948, "loss": 0.0219, "step": 8477 }, { "epoch": 3.9570595099183197, "grad_norm": 0.67578125, "learning_rate": 0.0001322382991507325, "loss": 0.0198, "step": 8478 }, { "epoch": 3.957526254375729, "grad_norm": 0.61328125, "learning_rate": 0.00013222440874818914, "loss": 0.0142, "step": 8479 }, { "epoch": 3.957992998833139, "grad_norm": 0.8125, "learning_rate": 0.0001322105176518185, "loss": 0.023, "step": 8480 }, { "epoch": 3.9584597432905486, "grad_norm": 0.5859375, "learning_rate": 0.00013219662586191972, "loss": 0.0095, "step": 8481 }, { "epoch": 3.958926487747958, "grad_norm": 0.47265625, "learning_rate": 0.00013218273337879186, "loss": 0.0105, "step": 8482 }, { "epoch": 3.9593932322053673, "grad_norm": 0.62890625, "learning_rate": 0.000132168840202734, "loss": 0.0214, "step": 8483 }, { "epoch": 3.959859976662777, "grad_norm": 0.5546875, "learning_rate": 0.00013215494633404536, "loss": 0.0215, "step": 8484 }, { "epoch": 3.960326721120187, "grad_norm": 0.68359375, "learning_rate": 0.00013214105177302503, "loss": 0.0197, "step": 8485 }, { "epoch": 3.9607934655775963, "grad_norm": 0.50390625, "learning_rate": 0.00013212715651997217, "loss": 0.0151, "step": 8486 }, { "epoch": 3.9612602100350056, "grad_norm": 0.4375, "learning_rate": 0.000132113260575186, "loss": 0.0123, "step": 8487 }, { "epoch": 3.9617269544924154, "grad_norm": 0.52734375, "learning_rate": 0.00013209936393896574, "loss": 0.0156, "step": 8488 }, { "epoch": 3.962193698949825, "grad_norm": 0.74609375, "learning_rate": 0.0001320854666116105, "loss": 0.0209, "step": 8489 }, { "epoch": 3.9626604434072346, "grad_norm": 0.384765625, "learning_rate": 0.0001320715685934196, "loss": 0.0119, "step": 8490 }, { "epoch": 3.963127187864644, "grad_norm": 0.8359375, "learning_rate": 0.00013205766988469225, "loss": 0.0206, "step": 8491 }, { "epoch": 3.9635939323220537, "grad_norm": 0.54296875, "learning_rate": 0.00013204377048572766, "loss": 0.0102, "step": 8492 }, { "epoch": 3.9640606767794635, "grad_norm": 0.5390625, "learning_rate": 0.00013202987039682515, "loss": 0.016, "step": 8493 }, { "epoch": 3.964527421236873, "grad_norm": 0.5546875, "learning_rate": 0.00013201596961828402, "loss": 0.0159, "step": 8494 }, { "epoch": 3.964994165694282, "grad_norm": 0.53125, "learning_rate": 0.00013200206815040355, "loss": 0.0159, "step": 8495 }, { "epoch": 3.965460910151692, "grad_norm": 0.51953125, "learning_rate": 0.000131988165993483, "loss": 0.0111, "step": 8496 }, { "epoch": 3.965927654609102, "grad_norm": 0.59375, "learning_rate": 0.00013197426314782176, "loss": 0.0163, "step": 8497 }, { "epoch": 3.966394399066511, "grad_norm": 0.5625, "learning_rate": 0.00013196035961371918, "loss": 0.0165, "step": 8498 }, { "epoch": 3.9668611435239205, "grad_norm": 0.83203125, "learning_rate": 0.00013194645539147458, "loss": 0.0233, "step": 8499 }, { "epoch": 3.9673278879813303, "grad_norm": 0.357421875, "learning_rate": 0.00013193255048138736, "loss": 0.0085, "step": 8500 }, { "epoch": 3.9677946324387396, "grad_norm": 0.7109375, "learning_rate": 0.00013191864488375688, "loss": 0.0133, "step": 8501 }, { "epoch": 3.9682613768961494, "grad_norm": 0.6484375, "learning_rate": 0.00013190473859888256, "loss": 0.0197, "step": 8502 }, { "epoch": 3.968728121353559, "grad_norm": 0.67578125, "learning_rate": 0.00013189083162706383, "loss": 0.021, "step": 8503 }, { "epoch": 3.9691948658109686, "grad_norm": 0.56640625, "learning_rate": 0.00013187692396860015, "loss": 0.0156, "step": 8504 }, { "epoch": 3.969661610268378, "grad_norm": 0.859375, "learning_rate": 0.0001318630156237909, "loss": 0.0165, "step": 8505 }, { "epoch": 3.9701283547257877, "grad_norm": 0.50390625, "learning_rate": 0.00013184910659293553, "loss": 0.0115, "step": 8506 }, { "epoch": 3.970595099183197, "grad_norm": 0.6015625, "learning_rate": 0.0001318351968763336, "loss": 0.0149, "step": 8507 }, { "epoch": 3.971061843640607, "grad_norm": 0.49609375, "learning_rate": 0.00013182128647428457, "loss": 0.0135, "step": 8508 }, { "epoch": 3.9715285880980162, "grad_norm": 0.546875, "learning_rate": 0.0001318073753870879, "loss": 0.0131, "step": 8509 }, { "epoch": 3.971995332555426, "grad_norm": 1.015625, "learning_rate": 0.00013179346361504317, "loss": 0.0212, "step": 8510 }, { "epoch": 3.9724620770128354, "grad_norm": 0.65625, "learning_rate": 0.00013177955115844988, "loss": 0.0212, "step": 8511 }, { "epoch": 3.972928821470245, "grad_norm": 0.6171875, "learning_rate": 0.00013176563801760763, "loss": 0.0152, "step": 8512 }, { "epoch": 3.9733955659276545, "grad_norm": 0.421875, "learning_rate": 0.0001317517241928159, "loss": 0.0172, "step": 8513 }, { "epoch": 3.9738623103850643, "grad_norm": 0.515625, "learning_rate": 0.00013173780968437436, "loss": 0.0126, "step": 8514 }, { "epoch": 3.9743290548424737, "grad_norm": 0.455078125, "learning_rate": 0.00013172389449258253, "loss": 0.0169, "step": 8515 }, { "epoch": 3.9747957992998835, "grad_norm": 0.578125, "learning_rate": 0.00013170997861774005, "loss": 0.0187, "step": 8516 }, { "epoch": 3.975262543757293, "grad_norm": 0.4296875, "learning_rate": 0.00013169606206014654, "loss": 0.008, "step": 8517 }, { "epoch": 3.975729288214702, "grad_norm": 0.70703125, "learning_rate": 0.00013168214482010167, "loss": 0.016, "step": 8518 }, { "epoch": 3.976196032672112, "grad_norm": 0.404296875, "learning_rate": 0.00013166822689790507, "loss": 0.0131, "step": 8519 }, { "epoch": 3.9766627771295218, "grad_norm": 0.70703125, "learning_rate": 0.00013165430829385644, "loss": 0.021, "step": 8520 }, { "epoch": 3.977129521586931, "grad_norm": 0.58984375, "learning_rate": 0.00013164038900825538, "loss": 0.0199, "step": 8521 }, { "epoch": 3.9775962660443405, "grad_norm": 0.54296875, "learning_rate": 0.00013162646904140167, "loss": 0.0099, "step": 8522 }, { "epoch": 3.9780630105017503, "grad_norm": 0.53125, "learning_rate": 0.00013161254839359497, "loss": 0.0179, "step": 8523 }, { "epoch": 3.97852975495916, "grad_norm": 0.625, "learning_rate": 0.00013159862706513503, "loss": 0.0141, "step": 8524 }, { "epoch": 3.9789964994165694, "grad_norm": 0.470703125, "learning_rate": 0.00013158470505632161, "loss": 0.0122, "step": 8525 }, { "epoch": 3.9794632438739788, "grad_norm": 0.48828125, "learning_rate": 0.00013157078236745442, "loss": 0.0122, "step": 8526 }, { "epoch": 3.9799299883313886, "grad_norm": 0.609375, "learning_rate": 0.00013155685899883329, "loss": 0.0164, "step": 8527 }, { "epoch": 3.9803967327887984, "grad_norm": 0.69921875, "learning_rate": 0.00013154293495075794, "loss": 0.0131, "step": 8528 }, { "epoch": 3.9808634772462077, "grad_norm": 0.486328125, "learning_rate": 0.0001315290102235282, "loss": 0.0135, "step": 8529 }, { "epoch": 3.981330221703617, "grad_norm": 0.76953125, "learning_rate": 0.0001315150848174439, "loss": 0.02, "step": 8530 }, { "epoch": 3.981796966161027, "grad_norm": 0.46484375, "learning_rate": 0.0001315011587328049, "loss": 0.0092, "step": 8531 }, { "epoch": 3.9822637106184366, "grad_norm": 0.59765625, "learning_rate": 0.00013148723196991095, "loss": 0.0114, "step": 8532 }, { "epoch": 3.982730455075846, "grad_norm": 0.7578125, "learning_rate": 0.00013147330452906197, "loss": 0.0229, "step": 8533 }, { "epoch": 3.9831971995332554, "grad_norm": 0.40234375, "learning_rate": 0.00013145937641055784, "loss": 0.0102, "step": 8534 }, { "epoch": 3.983663943990665, "grad_norm": 0.61328125, "learning_rate": 0.00013144544761469843, "loss": 0.0143, "step": 8535 }, { "epoch": 3.984130688448075, "grad_norm": 0.78515625, "learning_rate": 0.0001314315181417836, "loss": 0.02, "step": 8536 }, { "epoch": 3.9845974329054843, "grad_norm": 0.25390625, "learning_rate": 0.00013141758799211338, "loss": 0.0117, "step": 8537 }, { "epoch": 3.9850641773628936, "grad_norm": 0.5859375, "learning_rate": 0.00013140365716598758, "loss": 0.0158, "step": 8538 }, { "epoch": 3.9855309218203034, "grad_norm": 0.55078125, "learning_rate": 0.0001313897256637062, "loss": 0.0107, "step": 8539 }, { "epoch": 3.985997666277713, "grad_norm": 0.46875, "learning_rate": 0.0001313757934855692, "loss": 0.0118, "step": 8540 }, { "epoch": 3.9864644107351226, "grad_norm": 0.51953125, "learning_rate": 0.0001313618606318766, "loss": 0.009, "step": 8541 }, { "epoch": 3.986931155192532, "grad_norm": 0.8515625, "learning_rate": 0.00013134792710292828, "loss": 0.0123, "step": 8542 }, { "epoch": 3.9873978996499417, "grad_norm": 0.330078125, "learning_rate": 0.00013133399289902436, "loss": 0.0061, "step": 8543 }, { "epoch": 3.987864644107351, "grad_norm": 0.70703125, "learning_rate": 0.00013132005802046477, "loss": 0.0093, "step": 8544 }, { "epoch": 3.988331388564761, "grad_norm": 0.4765625, "learning_rate": 0.00013130612246754962, "loss": 0.0096, "step": 8545 }, { "epoch": 3.9887981330221702, "grad_norm": 0.6953125, "learning_rate": 0.00013129218624057887, "loss": 0.0154, "step": 8546 }, { "epoch": 3.98926487747958, "grad_norm": 0.65625, "learning_rate": 0.00013127824933985264, "loss": 0.0166, "step": 8547 }, { "epoch": 3.9897316219369894, "grad_norm": 0.498046875, "learning_rate": 0.00013126431176567103, "loss": 0.0104, "step": 8548 }, { "epoch": 3.990198366394399, "grad_norm": 0.6171875, "learning_rate": 0.00013125037351833407, "loss": 0.0172, "step": 8549 }, { "epoch": 3.9906651108518085, "grad_norm": 0.6015625, "learning_rate": 0.0001312364345981419, "loss": 0.0134, "step": 8550 }, { "epoch": 3.9911318553092183, "grad_norm": 0.65625, "learning_rate": 0.00013122249500539468, "loss": 0.0148, "step": 8551 }, { "epoch": 3.9915985997666277, "grad_norm": 0.4453125, "learning_rate": 0.0001312085547403924, "loss": 0.0118, "step": 8552 }, { "epoch": 3.9920653442240375, "grad_norm": 0.458984375, "learning_rate": 0.00013119461380343544, "loss": 0.013, "step": 8553 }, { "epoch": 3.992532088681447, "grad_norm": 0.51171875, "learning_rate": 0.00013118067219482378, "loss": 0.0104, "step": 8554 }, { "epoch": 3.9929988331388566, "grad_norm": 0.63671875, "learning_rate": 0.0001311667299148576, "loss": 0.0138, "step": 8555 }, { "epoch": 3.993465577596266, "grad_norm": 0.63671875, "learning_rate": 0.0001311527869638372, "loss": 0.0142, "step": 8556 }, { "epoch": 3.9939323220536758, "grad_norm": 0.625, "learning_rate": 0.00013113884334206273, "loss": 0.011, "step": 8557 }, { "epoch": 3.994399066511085, "grad_norm": 0.59375, "learning_rate": 0.00013112489904983438, "loss": 0.0159, "step": 8558 }, { "epoch": 3.994865810968495, "grad_norm": 0.70703125, "learning_rate": 0.00013111095408745244, "loss": 0.013, "step": 8559 }, { "epoch": 3.9953325554259043, "grad_norm": 0.490234375, "learning_rate": 0.00013109700845521715, "loss": 0.0115, "step": 8560 }, { "epoch": 3.9957992998833136, "grad_norm": 0.7421875, "learning_rate": 0.00013108306215342877, "loss": 0.0231, "step": 8561 }, { "epoch": 3.9962660443407234, "grad_norm": 0.640625, "learning_rate": 0.00013106911518238754, "loss": 0.015, "step": 8562 }, { "epoch": 3.996732788798133, "grad_norm": 0.5625, "learning_rate": 0.0001310551675423938, "loss": 0.0244, "step": 8563 }, { "epoch": 3.9971995332555426, "grad_norm": 0.4296875, "learning_rate": 0.0001310412192337479, "loss": 0.009, "step": 8564 }, { "epoch": 3.997666277712952, "grad_norm": 0.51953125, "learning_rate": 0.00013102727025675, "loss": 0.0139, "step": 8565 }, { "epoch": 3.9981330221703617, "grad_norm": 0.51953125, "learning_rate": 0.00013101332061170063, "loss": 0.0125, "step": 8566 }, { "epoch": 3.9985997666277715, "grad_norm": 0.625, "learning_rate": 0.00013099937029890005, "loss": 0.0176, "step": 8567 }, { "epoch": 3.999066511085181, "grad_norm": 0.65234375, "learning_rate": 0.00013098541931864862, "loss": 0.0176, "step": 8568 }, { "epoch": 3.99953325554259, "grad_norm": 0.435546875, "learning_rate": 0.00013097146767124672, "loss": 0.0079, "step": 8569 }, { "epoch": 4.0, "grad_norm": 0.474609375, "learning_rate": 0.00013095751535699478, "loss": 0.0104, "step": 8570 }, { "epoch": 4.00046674445741, "grad_norm": 0.51171875, "learning_rate": 0.00013094356237619318, "loss": 0.0116, "step": 8571 }, { "epoch": 4.000933488914819, "grad_norm": 0.3359375, "learning_rate": 0.00013092960872914232, "loss": 0.0063, "step": 8572 }, { "epoch": 4.0014002333722285, "grad_norm": 0.482421875, "learning_rate": 0.0001309156544161427, "loss": 0.0091, "step": 8573 }, { "epoch": 4.001866977829638, "grad_norm": 0.48828125, "learning_rate": 0.00013090169943749476, "loss": 0.0108, "step": 8574 }, { "epoch": 4.002333722287048, "grad_norm": 0.64453125, "learning_rate": 0.0001308877437934989, "loss": 0.0154, "step": 8575 }, { "epoch": 4.002800466744457, "grad_norm": 0.48828125, "learning_rate": 0.00013087378748445567, "loss": 0.0134, "step": 8576 }, { "epoch": 4.002800466744457, "eval_loss": 1.9588065147399902, "eval_runtime": 55.9281, "eval_samples_per_second": 32.256, "eval_steps_per_second": 4.041, "step": 8576 }, { "epoch": 4.003267211201867, "grad_norm": 0.5703125, "learning_rate": 0.00013085983051066554, "loss": 0.0153, "step": 8577 }, { "epoch": 4.003733955659277, "grad_norm": 0.310546875, "learning_rate": 0.00013084587287242902, "loss": 0.0075, "step": 8578 }, { "epoch": 4.004200700116686, "grad_norm": 0.55078125, "learning_rate": 0.00013083191457004663, "loss": 0.0146, "step": 8579 }, { "epoch": 4.004667444574095, "grad_norm": 0.6171875, "learning_rate": 0.0001308179556038189, "loss": 0.0105, "step": 8580 }, { "epoch": 4.005134189031505, "grad_norm": 0.70703125, "learning_rate": 0.0001308039959740464, "loss": 0.0206, "step": 8581 }, { "epoch": 4.005600933488915, "grad_norm": 0.427734375, "learning_rate": 0.0001307900356810297, "loss": 0.0049, "step": 8582 }, { "epoch": 4.006067677946325, "grad_norm": 0.474609375, "learning_rate": 0.00013077607472506938, "loss": 0.0154, "step": 8583 }, { "epoch": 4.006534422403734, "grad_norm": 0.44140625, "learning_rate": 0.00013076211310646599, "loss": 0.0071, "step": 8584 }, { "epoch": 4.007001166861143, "grad_norm": 0.55859375, "learning_rate": 0.00013074815082552015, "loss": 0.0124, "step": 8585 }, { "epoch": 4.007467911318553, "grad_norm": 0.63671875, "learning_rate": 0.00013073418788253256, "loss": 0.0116, "step": 8586 }, { "epoch": 4.007934655775963, "grad_norm": 0.380859375, "learning_rate": 0.0001307202242778038, "loss": 0.0046, "step": 8587 }, { "epoch": 4.008401400233372, "grad_norm": 0.50390625, "learning_rate": 0.0001307062600116345, "loss": 0.0066, "step": 8588 }, { "epoch": 4.008868144690782, "grad_norm": 0.55859375, "learning_rate": 0.00013069229508432537, "loss": 0.0205, "step": 8589 }, { "epoch": 4.0093348891481915, "grad_norm": 0.640625, "learning_rate": 0.0001306783294961771, "loss": 0.0109, "step": 8590 }, { "epoch": 4.009801633605601, "grad_norm": 0.34375, "learning_rate": 0.0001306643632474903, "loss": 0.007, "step": 8591 }, { "epoch": 4.01026837806301, "grad_norm": 0.5703125, "learning_rate": 0.00013065039633856579, "loss": 0.0132, "step": 8592 }, { "epoch": 4.01073512252042, "grad_norm": 0.447265625, "learning_rate": 0.0001306364287697042, "loss": 0.0073, "step": 8593 }, { "epoch": 4.01120186697783, "grad_norm": 0.53515625, "learning_rate": 0.00013062246054120632, "loss": 0.0086, "step": 8594 }, { "epoch": 4.01166861143524, "grad_norm": 0.59375, "learning_rate": 0.00013060849165337287, "loss": 0.0124, "step": 8595 }, { "epoch": 4.0121353558926485, "grad_norm": 0.5, "learning_rate": 0.00013059452210650467, "loss": 0.0092, "step": 8596 }, { "epoch": 4.012602100350058, "grad_norm": 0.44921875, "learning_rate": 0.00013058055190090246, "loss": 0.0086, "step": 8597 }, { "epoch": 4.013068844807468, "grad_norm": 0.546875, "learning_rate": 0.000130566581036867, "loss": 0.0099, "step": 8598 }, { "epoch": 4.013535589264878, "grad_norm": 0.421875, "learning_rate": 0.00013055260951469916, "loss": 0.0106, "step": 8599 }, { "epoch": 4.014002333722287, "grad_norm": 0.400390625, "learning_rate": 0.00013053863733469978, "loss": 0.0102, "step": 8600 }, { "epoch": 4.014469078179697, "grad_norm": 0.42578125, "learning_rate": 0.0001305246644971696, "loss": 0.0108, "step": 8601 }, { "epoch": 4.014935822637106, "grad_norm": 0.255859375, "learning_rate": 0.00013051069100240956, "loss": 0.0038, "step": 8602 }, { "epoch": 4.015402567094516, "grad_norm": 0.41015625, "learning_rate": 0.00013049671685072046, "loss": 0.0073, "step": 8603 }, { "epoch": 4.015869311551925, "grad_norm": 0.494140625, "learning_rate": 0.0001304827420424032, "loss": 0.0077, "step": 8604 }, { "epoch": 4.016336056009335, "grad_norm": 0.5, "learning_rate": 0.00013046876657775872, "loss": 0.0104, "step": 8605 }, { "epoch": 4.016802800466745, "grad_norm": 0.71484375, "learning_rate": 0.0001304547904570879, "loss": 0.0232, "step": 8606 }, { "epoch": 4.0172695449241544, "grad_norm": 0.21875, "learning_rate": 0.0001304408136806916, "loss": 0.0029, "step": 8607 }, { "epoch": 4.017736289381563, "grad_norm": 0.4921875, "learning_rate": 0.00013042683624887085, "loss": 0.0099, "step": 8608 }, { "epoch": 4.018203033838973, "grad_norm": 0.328125, "learning_rate": 0.00013041285816192653, "loss": 0.0059, "step": 8609 }, { "epoch": 4.018669778296383, "grad_norm": 0.458984375, "learning_rate": 0.00013039887942015965, "loss": 0.0104, "step": 8610 }, { "epoch": 4.019136522753793, "grad_norm": 0.474609375, "learning_rate": 0.00013038490002387115, "loss": 0.0083, "step": 8611 }, { "epoch": 4.019603267211202, "grad_norm": 0.396484375, "learning_rate": 0.00013037091997336204, "loss": 0.0106, "step": 8612 }, { "epoch": 4.020070011668611, "grad_norm": 0.373046875, "learning_rate": 0.00013035693926893334, "loss": 0.0092, "step": 8613 }, { "epoch": 4.020536756126021, "grad_norm": 0.5859375, "learning_rate": 0.000130342957910886, "loss": 0.0135, "step": 8614 }, { "epoch": 4.02100350058343, "grad_norm": 0.66796875, "learning_rate": 0.0001303289758995212, "loss": 0.014, "step": 8615 }, { "epoch": 4.02147024504084, "grad_norm": 0.4609375, "learning_rate": 0.00013031499323513986, "loss": 0.0092, "step": 8616 }, { "epoch": 4.02193698949825, "grad_norm": 0.57421875, "learning_rate": 0.00013030100991804307, "loss": 0.0099, "step": 8617 }, { "epoch": 4.0224037339556595, "grad_norm": 0.5078125, "learning_rate": 0.0001302870259485319, "loss": 0.0095, "step": 8618 }, { "epoch": 4.022870478413068, "grad_norm": 0.458984375, "learning_rate": 0.0001302730413269075, "loss": 0.0083, "step": 8619 }, { "epoch": 4.023337222870478, "grad_norm": 0.75390625, "learning_rate": 0.00013025905605347092, "loss": 0.0142, "step": 8620 }, { "epoch": 4.023803967327888, "grad_norm": 0.3515625, "learning_rate": 0.00013024507012852327, "loss": 0.0087, "step": 8621 }, { "epoch": 4.024270711785298, "grad_norm": 0.38671875, "learning_rate": 0.00013023108355236573, "loss": 0.0081, "step": 8622 }, { "epoch": 4.024737456242707, "grad_norm": 0.36328125, "learning_rate": 0.00013021709632529943, "loss": 0.0041, "step": 8623 }, { "epoch": 4.0252042007001165, "grad_norm": 0.40234375, "learning_rate": 0.00013020310844762543, "loss": 0.0079, "step": 8624 }, { "epoch": 4.025670945157526, "grad_norm": 0.296875, "learning_rate": 0.0001301891199196451, "loss": 0.0187, "step": 8625 }, { "epoch": 4.026137689614936, "grad_norm": 0.39453125, "learning_rate": 0.0001301751307416595, "loss": 0.0051, "step": 8626 }, { "epoch": 4.026604434072345, "grad_norm": 0.578125, "learning_rate": 0.0001301611409139698, "loss": 0.0111, "step": 8627 }, { "epoch": 4.027071178529755, "grad_norm": 0.4296875, "learning_rate": 0.00013014715043687734, "loss": 0.0062, "step": 8628 }, { "epoch": 4.027537922987165, "grad_norm": 0.408203125, "learning_rate": 0.00013013315931068324, "loss": 0.0068, "step": 8629 }, { "epoch": 4.028004667444574, "grad_norm": 0.50390625, "learning_rate": 0.0001301191675356888, "loss": 0.0062, "step": 8630 }, { "epoch": 4.028471411901983, "grad_norm": 0.54296875, "learning_rate": 0.00013010517511219527, "loss": 0.0134, "step": 8631 }, { "epoch": 4.028938156359393, "grad_norm": 0.4140625, "learning_rate": 0.0001300911820405039, "loss": 0.0115, "step": 8632 }, { "epoch": 4.029404900816803, "grad_norm": 0.453125, "learning_rate": 0.000130077188320916, "loss": 0.007, "step": 8633 }, { "epoch": 4.029871645274213, "grad_norm": 0.326171875, "learning_rate": 0.00013006319395373285, "loss": 0.0074, "step": 8634 }, { "epoch": 4.030338389731622, "grad_norm": 0.5390625, "learning_rate": 0.00013004919893925583, "loss": 0.01, "step": 8635 }, { "epoch": 4.030805134189031, "grad_norm": 0.4296875, "learning_rate": 0.00013003520327778618, "loss": 0.0094, "step": 8636 }, { "epoch": 4.031271878646441, "grad_norm": 0.400390625, "learning_rate": 0.00013002120696962524, "loss": 0.0073, "step": 8637 }, { "epoch": 4.031738623103851, "grad_norm": 0.369140625, "learning_rate": 0.00013000721001507447, "loss": 0.0058, "step": 8638 }, { "epoch": 4.03220536756126, "grad_norm": 0.328125, "learning_rate": 0.00012999321241443515, "loss": 0.0078, "step": 8639 }, { "epoch": 4.03267211201867, "grad_norm": 0.5234375, "learning_rate": 0.0001299792141680087, "loss": 0.0093, "step": 8640 }, { "epoch": 4.0331388564760795, "grad_norm": 0.546875, "learning_rate": 0.0001299652152760965, "loss": 0.0109, "step": 8641 }, { "epoch": 4.033605600933489, "grad_norm": 0.43359375, "learning_rate": 0.00012995121573899996, "loss": 0.0065, "step": 8642 }, { "epoch": 4.034072345390898, "grad_norm": 0.158203125, "learning_rate": 0.00012993721555702053, "loss": 0.0022, "step": 8643 }, { "epoch": 4.034539089848308, "grad_norm": 0.353515625, "learning_rate": 0.00012992321473045962, "loss": 0.0064, "step": 8644 }, { "epoch": 4.035005834305718, "grad_norm": 0.310546875, "learning_rate": 0.0001299092132596187, "loss": 0.0072, "step": 8645 }, { "epoch": 4.035472578763128, "grad_norm": 0.400390625, "learning_rate": 0.00012989521114479922, "loss": 0.0082, "step": 8646 }, { "epoch": 4.0359393232205365, "grad_norm": 0.5078125, "learning_rate": 0.0001298812083863027, "loss": 0.008, "step": 8647 }, { "epoch": 4.036406067677946, "grad_norm": 0.50390625, "learning_rate": 0.0001298672049844306, "loss": 0.0156, "step": 8648 }, { "epoch": 4.036872812135356, "grad_norm": 0.431640625, "learning_rate": 0.00012985320093948446, "loss": 0.0111, "step": 8649 }, { "epoch": 4.037339556592766, "grad_norm": 0.515625, "learning_rate": 0.00012983919625176574, "loss": 0.0095, "step": 8650 }, { "epoch": 4.037806301050175, "grad_norm": 0.53515625, "learning_rate": 0.00012982519092157605, "loss": 0.011, "step": 8651 }, { "epoch": 4.038273045507585, "grad_norm": 0.59765625, "learning_rate": 0.00012981118494921688, "loss": 0.0093, "step": 8652 }, { "epoch": 4.038739789964994, "grad_norm": 0.52734375, "learning_rate": 0.0001297971783349898, "loss": 0.0077, "step": 8653 }, { "epoch": 4.039206534422403, "grad_norm": 0.58984375, "learning_rate": 0.00012978317107919645, "loss": 0.0169, "step": 8654 }, { "epoch": 4.039673278879813, "grad_norm": 0.54296875, "learning_rate": 0.00012976916318213839, "loss": 0.0119, "step": 8655 }, { "epoch": 4.040140023337223, "grad_norm": 0.49609375, "learning_rate": 0.00012975515464411718, "loss": 0.0139, "step": 8656 }, { "epoch": 4.040606767794633, "grad_norm": 0.86328125, "learning_rate": 0.00012974114546543451, "loss": 0.0143, "step": 8657 }, { "epoch": 4.041073512252042, "grad_norm": 0.5625, "learning_rate": 0.000129727135646392, "loss": 0.0121, "step": 8658 }, { "epoch": 4.041540256709451, "grad_norm": 0.435546875, "learning_rate": 0.00012971312518729125, "loss": 0.0075, "step": 8659 }, { "epoch": 4.042007001166861, "grad_norm": 0.6328125, "learning_rate": 0.00012969911408843392, "loss": 0.0133, "step": 8660 }, { "epoch": 4.042473745624271, "grad_norm": 0.50390625, "learning_rate": 0.00012968510235012172, "loss": 0.0089, "step": 8661 }, { "epoch": 4.04294049008168, "grad_norm": 0.431640625, "learning_rate": 0.00012967108997265635, "loss": 0.0063, "step": 8662 }, { "epoch": 4.04340723453909, "grad_norm": 0.330078125, "learning_rate": 0.00012965707695633948, "loss": 0.0106, "step": 8663 }, { "epoch": 4.0438739789964995, "grad_norm": 0.41015625, "learning_rate": 0.00012964306330147287, "loss": 0.0065, "step": 8664 }, { "epoch": 4.044340723453909, "grad_norm": 0.427734375, "learning_rate": 0.00012962904900835816, "loss": 0.0128, "step": 8665 }, { "epoch": 4.044807467911318, "grad_norm": 0.458984375, "learning_rate": 0.00012961503407729722, "loss": 0.0079, "step": 8666 }, { "epoch": 4.045274212368728, "grad_norm": 0.451171875, "learning_rate": 0.00012960101850859168, "loss": 0.0112, "step": 8667 }, { "epoch": 4.045740956826138, "grad_norm": 0.4140625, "learning_rate": 0.0001295870023025434, "loss": 0.0096, "step": 8668 }, { "epoch": 4.046207701283548, "grad_norm": 0.349609375, "learning_rate": 0.00012957298545945414, "loss": 0.0045, "step": 8669 }, { "epoch": 4.0466744457409565, "grad_norm": 0.287109375, "learning_rate": 0.00012955896797962562, "loss": 0.0078, "step": 8670 }, { "epoch": 4.047141190198366, "grad_norm": 0.36328125, "learning_rate": 0.0001295449498633598, "loss": 0.0117, "step": 8671 }, { "epoch": 4.047607934655776, "grad_norm": 0.63671875, "learning_rate": 0.00012953093111095842, "loss": 0.009, "step": 8672 }, { "epoch": 4.048074679113186, "grad_norm": 0.6015625, "learning_rate": 0.00012951691172272334, "loss": 0.01, "step": 8673 }, { "epoch": 4.048541423570595, "grad_norm": 0.52734375, "learning_rate": 0.00012950289169895638, "loss": 0.0086, "step": 8674 }, { "epoch": 4.049008168028005, "grad_norm": 0.193359375, "learning_rate": 0.0001294888710399594, "loss": 0.0029, "step": 8675 }, { "epoch": 4.049474912485414, "grad_norm": 0.53515625, "learning_rate": 0.00012947484974603433, "loss": 0.0082, "step": 8676 }, { "epoch": 4.049941656942824, "grad_norm": 0.318359375, "learning_rate": 0.00012946082781748304, "loss": 0.0051, "step": 8677 }, { "epoch": 4.050408401400233, "grad_norm": 0.408203125, "learning_rate": 0.00012944680525460745, "loss": 0.0105, "step": 8678 }, { "epoch": 4.050875145857643, "grad_norm": 0.53515625, "learning_rate": 0.00012943278205770946, "loss": 0.011, "step": 8679 }, { "epoch": 4.051341890315053, "grad_norm": 0.4296875, "learning_rate": 0.00012941875822709102, "loss": 0.0105, "step": 8680 }, { "epoch": 4.0518086347724624, "grad_norm": 0.1728515625, "learning_rate": 0.0001294047337630541, "loss": 0.0057, "step": 8681 }, { "epoch": 4.052275379229871, "grad_norm": 0.5078125, "learning_rate": 0.00012939070866590057, "loss": 0.01, "step": 8682 }, { "epoch": 4.052742123687281, "grad_norm": 0.61328125, "learning_rate": 0.00012937668293593253, "loss": 0.0084, "step": 8683 }, { "epoch": 4.053208868144691, "grad_norm": 0.421875, "learning_rate": 0.0001293626565734519, "loss": 0.0118, "step": 8684 }, { "epoch": 4.053675612602101, "grad_norm": 0.38671875, "learning_rate": 0.0001293486295787607, "loss": 0.0092, "step": 8685 }, { "epoch": 4.05414235705951, "grad_norm": 0.6015625, "learning_rate": 0.0001293346019521609, "loss": 0.0112, "step": 8686 }, { "epoch": 4.0546091015169194, "grad_norm": 0.57421875, "learning_rate": 0.00012932057369395463, "loss": 0.0179, "step": 8687 }, { "epoch": 4.055075845974329, "grad_norm": 0.66015625, "learning_rate": 0.00012930654480444387, "loss": 0.0121, "step": 8688 }, { "epoch": 4.055542590431739, "grad_norm": 0.51171875, "learning_rate": 0.00012929251528393066, "loss": 0.015, "step": 8689 }, { "epoch": 4.056009334889148, "grad_norm": 0.326171875, "learning_rate": 0.0001292784851327171, "loss": 0.0065, "step": 8690 }, { "epoch": 4.056476079346558, "grad_norm": 0.6640625, "learning_rate": 0.00012926445435110527, "loss": 0.0132, "step": 8691 }, { "epoch": 4.0569428238039675, "grad_norm": 0.6015625, "learning_rate": 0.00012925042293939726, "loss": 0.0123, "step": 8692 }, { "epoch": 4.057409568261377, "grad_norm": 0.451171875, "learning_rate": 0.0001292363908978952, "loss": 0.011, "step": 8693 }, { "epoch": 4.057876312718786, "grad_norm": 0.34765625, "learning_rate": 0.00012922235822690123, "loss": 0.0045, "step": 8694 }, { "epoch": 4.058343057176196, "grad_norm": 0.4375, "learning_rate": 0.00012920832492671744, "loss": 0.0064, "step": 8695 }, { "epoch": 4.058809801633606, "grad_norm": 0.478515625, "learning_rate": 0.00012919429099764602, "loss": 0.0111, "step": 8696 }, { "epoch": 4.059276546091015, "grad_norm": 0.56640625, "learning_rate": 0.00012918025643998912, "loss": 0.0107, "step": 8697 }, { "epoch": 4.0597432905484245, "grad_norm": 0.419921875, "learning_rate": 0.00012916622125404889, "loss": 0.0063, "step": 8698 }, { "epoch": 4.060210035005834, "grad_norm": 0.3515625, "learning_rate": 0.00012915218544012758, "loss": 0.0052, "step": 8699 }, { "epoch": 4.060676779463244, "grad_norm": 0.36328125, "learning_rate": 0.0001291381489985274, "loss": 0.0051, "step": 8700 }, { "epoch": 4.061143523920653, "grad_norm": 0.33984375, "learning_rate": 0.00012912411192955053, "loss": 0.0056, "step": 8701 }, { "epoch": 4.061610268378063, "grad_norm": 0.453125, "learning_rate": 0.0001291100742334992, "loss": 0.0054, "step": 8702 }, { "epoch": 4.062077012835473, "grad_norm": 0.73046875, "learning_rate": 0.00012909603591067565, "loss": 0.0065, "step": 8703 }, { "epoch": 4.062543757292882, "grad_norm": 0.373046875, "learning_rate": 0.0001290819969613822, "loss": 0.0062, "step": 8704 }, { "epoch": 4.063010501750291, "grad_norm": 0.56640625, "learning_rate": 0.00012906795738592107, "loss": 0.0089, "step": 8705 }, { "epoch": 4.063477246207701, "grad_norm": 0.5234375, "learning_rate": 0.0001290539171845946, "loss": 0.0112, "step": 8706 }, { "epoch": 4.063943990665111, "grad_norm": 0.39453125, "learning_rate": 0.00012903987635770503, "loss": 0.0077, "step": 8707 }, { "epoch": 4.064410735122521, "grad_norm": 0.396484375, "learning_rate": 0.0001290258349055547, "loss": 0.0069, "step": 8708 }, { "epoch": 4.06487747957993, "grad_norm": 0.283203125, "learning_rate": 0.00012901179282844594, "loss": 0.0035, "step": 8709 }, { "epoch": 4.065344224037339, "grad_norm": 0.515625, "learning_rate": 0.0001289977501266811, "loss": 0.0093, "step": 8710 }, { "epoch": 4.065810968494749, "grad_norm": 0.6328125, "learning_rate": 0.00012898370680056252, "loss": 0.0138, "step": 8711 }, { "epoch": 4.066277712952159, "grad_norm": 0.3359375, "learning_rate": 0.00012896966285039257, "loss": 0.0054, "step": 8712 }, { "epoch": 4.066744457409568, "grad_norm": 0.57421875, "learning_rate": 0.00012895561827647365, "loss": 0.0126, "step": 8713 }, { "epoch": 4.067211201866978, "grad_norm": 0.453125, "learning_rate": 0.00012894157307910813, "loss": 0.0053, "step": 8714 }, { "epoch": 4.0676779463243875, "grad_norm": 0.421875, "learning_rate": 0.00012892752725859842, "loss": 0.0054, "step": 8715 }, { "epoch": 4.068144690781797, "grad_norm": 0.361328125, "learning_rate": 0.00012891348081524697, "loss": 0.0085, "step": 8716 }, { "epoch": 4.068611435239206, "grad_norm": 0.416015625, "learning_rate": 0.0001288994337493562, "loss": 0.0087, "step": 8717 }, { "epoch": 4.069078179696616, "grad_norm": 0.419921875, "learning_rate": 0.00012888538606122856, "loss": 0.0067, "step": 8718 }, { "epoch": 4.069544924154026, "grad_norm": 0.4296875, "learning_rate": 0.0001288713377511665, "loss": 0.0049, "step": 8719 }, { "epoch": 4.070011668611436, "grad_norm": 0.375, "learning_rate": 0.00012885728881947253, "loss": 0.0051, "step": 8720 }, { "epoch": 4.0704784130688445, "grad_norm": 0.50390625, "learning_rate": 0.00012884323926644906, "loss": 0.0062, "step": 8721 }, { "epoch": 4.070945157526254, "grad_norm": 0.41796875, "learning_rate": 0.0001288291890923987, "loss": 0.007, "step": 8722 }, { "epoch": 4.071411901983664, "grad_norm": 0.37890625, "learning_rate": 0.00012881513829762387, "loss": 0.0059, "step": 8723 }, { "epoch": 4.071878646441074, "grad_norm": 0.462890625, "learning_rate": 0.00012880108688242717, "loss": 0.0107, "step": 8724 }, { "epoch": 4.072345390898483, "grad_norm": 0.375, "learning_rate": 0.0001287870348471111, "loss": 0.0069, "step": 8725 }, { "epoch": 4.072812135355893, "grad_norm": 0.451171875, "learning_rate": 0.00012877298219197825, "loss": 0.0154, "step": 8726 }, { "epoch": 4.073278879813302, "grad_norm": 0.4140625, "learning_rate": 0.00012875892891733113, "loss": 0.0066, "step": 8727 }, { "epoch": 4.073745624270712, "grad_norm": 0.453125, "learning_rate": 0.00012874487502347238, "loss": 0.0112, "step": 8728 }, { "epoch": 4.074212368728121, "grad_norm": 0.5390625, "learning_rate": 0.0001287308205107046, "loss": 0.0095, "step": 8729 }, { "epoch": 4.074679113185531, "grad_norm": 0.466796875, "learning_rate": 0.00012871676537933038, "loss": 0.0082, "step": 8730 }, { "epoch": 4.075145857642941, "grad_norm": 0.494140625, "learning_rate": 0.00012870270962965228, "loss": 0.0152, "step": 8731 }, { "epoch": 4.0756126021003505, "grad_norm": 0.30859375, "learning_rate": 0.00012868865326197307, "loss": 0.0048, "step": 8732 }, { "epoch": 4.076079346557759, "grad_norm": 0.50390625, "learning_rate": 0.00012867459627659527, "loss": 0.0059, "step": 8733 }, { "epoch": 4.076546091015169, "grad_norm": 0.50390625, "learning_rate": 0.00012866053867382164, "loss": 0.0093, "step": 8734 }, { "epoch": 4.077012835472579, "grad_norm": 0.396484375, "learning_rate": 0.00012864648045395475, "loss": 0.0126, "step": 8735 }, { "epoch": 4.077479579929989, "grad_norm": 0.8046875, "learning_rate": 0.00012863242161729741, "loss": 0.013, "step": 8736 }, { "epoch": 4.077946324387398, "grad_norm": 0.55859375, "learning_rate": 0.00012861836216415226, "loss": 0.007, "step": 8737 }, { "epoch": 4.0784130688448075, "grad_norm": 0.546875, "learning_rate": 0.000128604302094822, "loss": 0.0119, "step": 8738 }, { "epoch": 4.078879813302217, "grad_norm": 0.408203125, "learning_rate": 0.00012859024140960938, "loss": 0.0058, "step": 8739 }, { "epoch": 4.079346557759626, "grad_norm": 0.6328125, "learning_rate": 0.00012857618010881717, "loss": 0.0109, "step": 8740 }, { "epoch": 4.079813302217036, "grad_norm": 0.41015625, "learning_rate": 0.00012856211819274805, "loss": 0.0089, "step": 8741 }, { "epoch": 4.080280046674446, "grad_norm": 0.53125, "learning_rate": 0.00012854805566170485, "loss": 0.0108, "step": 8742 }, { "epoch": 4.080746791131856, "grad_norm": 0.4921875, "learning_rate": 0.00012853399251599033, "loss": 0.0132, "step": 8743 }, { "epoch": 4.0812135355892645, "grad_norm": 0.53125, "learning_rate": 0.00012851992875590734, "loss": 0.0055, "step": 8744 }, { "epoch": 4.081680280046674, "grad_norm": 0.6484375, "learning_rate": 0.0001285058643817586, "loss": 0.0143, "step": 8745 }, { "epoch": 4.082147024504084, "grad_norm": 0.376953125, "learning_rate": 0.000128491799393847, "loss": 0.0101, "step": 8746 }, { "epoch": 4.082613768961494, "grad_norm": 0.515625, "learning_rate": 0.00012847773379247532, "loss": 0.0112, "step": 8747 }, { "epoch": 4.083080513418903, "grad_norm": 0.2890625, "learning_rate": 0.00012846366757794642, "loss": 0.0049, "step": 8748 }, { "epoch": 4.083547257876313, "grad_norm": 0.330078125, "learning_rate": 0.00012844960075056322, "loss": 0.0041, "step": 8749 }, { "epoch": 4.084014002333722, "grad_norm": 0.6328125, "learning_rate": 0.00012843553331062855, "loss": 0.014, "step": 8750 }, { "epoch": 4.084480746791132, "grad_norm": 0.5390625, "learning_rate": 0.00012842146525844522, "loss": 0.0063, "step": 8751 }, { "epoch": 4.084947491248541, "grad_norm": 0.404296875, "learning_rate": 0.00012840739659431629, "loss": 0.0066, "step": 8752 }, { "epoch": 4.085414235705951, "grad_norm": 0.431640625, "learning_rate": 0.00012839332731854456, "loss": 0.0098, "step": 8753 }, { "epoch": 4.085880980163361, "grad_norm": 0.48046875, "learning_rate": 0.000128379257431433, "loss": 0.0059, "step": 8754 }, { "epoch": 4.0863477246207704, "grad_norm": 0.205078125, "learning_rate": 0.00012836518693328455, "loss": 0.003, "step": 8755 }, { "epoch": 4.086814469078179, "grad_norm": 0.6015625, "learning_rate": 0.00012835111582440214, "loss": 0.0112, "step": 8756 }, { "epoch": 4.087281213535589, "grad_norm": 0.33203125, "learning_rate": 0.00012833704410508874, "loss": 0.0055, "step": 8757 }, { "epoch": 4.087747957992999, "grad_norm": 0.52734375, "learning_rate": 0.00012832297177564733, "loss": 0.0095, "step": 8758 }, { "epoch": 4.088214702450409, "grad_norm": 0.412109375, "learning_rate": 0.00012830889883638094, "loss": 0.0066, "step": 8759 }, { "epoch": 4.088681446907818, "grad_norm": 0.703125, "learning_rate": 0.00012829482528759257, "loss": 0.0158, "step": 8760 }, { "epoch": 4.0891481913652274, "grad_norm": 0.283203125, "learning_rate": 0.00012828075112958516, "loss": 0.0059, "step": 8761 }, { "epoch": 4.089614935822637, "grad_norm": 0.44921875, "learning_rate": 0.00012826667636266185, "loss": 0.0066, "step": 8762 }, { "epoch": 4.090081680280047, "grad_norm": 0.49609375, "learning_rate": 0.0001282526009871256, "loss": 0.0084, "step": 8763 }, { "epoch": 4.090548424737456, "grad_norm": 0.6015625, "learning_rate": 0.00012823852500327954, "loss": 0.0101, "step": 8764 }, { "epoch": 4.091015169194866, "grad_norm": 0.30859375, "learning_rate": 0.0001282244484114267, "loss": 0.0077, "step": 8765 }, { "epoch": 4.0914819136522755, "grad_norm": 0.6015625, "learning_rate": 0.00012821037121187013, "loss": 0.0133, "step": 8766 }, { "epoch": 4.091948658109685, "grad_norm": 0.466796875, "learning_rate": 0.000128196293404913, "loss": 0.0092, "step": 8767 }, { "epoch": 4.092415402567094, "grad_norm": 0.5546875, "learning_rate": 0.0001281822149908584, "loss": 0.0165, "step": 8768 }, { "epoch": 4.092882147024504, "grad_norm": 0.5234375, "learning_rate": 0.00012816813597000943, "loss": 0.0082, "step": 8769 }, { "epoch": 4.093348891481914, "grad_norm": 0.5234375, "learning_rate": 0.00012815405634266922, "loss": 0.008, "step": 8770 }, { "epoch": 4.093815635939324, "grad_norm": 0.42578125, "learning_rate": 0.00012813997610914096, "loss": 0.009, "step": 8771 }, { "epoch": 4.0942823803967325, "grad_norm": 0.474609375, "learning_rate": 0.00012812589526972777, "loss": 0.0078, "step": 8772 }, { "epoch": 4.094749124854142, "grad_norm": 0.279296875, "learning_rate": 0.0001281118138247329, "loss": 0.0033, "step": 8773 }, { "epoch": 4.095215869311552, "grad_norm": 0.451171875, "learning_rate": 0.00012809773177445942, "loss": 0.0059, "step": 8774 }, { "epoch": 4.095682613768962, "grad_norm": 0.439453125, "learning_rate": 0.00012808364911921064, "loss": 0.0086, "step": 8775 }, { "epoch": 4.096149358226371, "grad_norm": 0.326171875, "learning_rate": 0.00012806956585928976, "loss": 0.0057, "step": 8776 }, { "epoch": 4.096616102683781, "grad_norm": 0.34375, "learning_rate": 0.00012805548199499992, "loss": 0.0055, "step": 8777 }, { "epoch": 4.09708284714119, "grad_norm": 0.3203125, "learning_rate": 0.00012804139752664446, "loss": 0.0034, "step": 8778 }, { "epoch": 4.0975495915986, "grad_norm": 0.53515625, "learning_rate": 0.0001280273124545266, "loss": 0.0075, "step": 8779 }, { "epoch": 4.098016336056009, "grad_norm": 0.447265625, "learning_rate": 0.0001280132267789496, "loss": 0.0125, "step": 8780 }, { "epoch": 4.098483080513419, "grad_norm": 0.62109375, "learning_rate": 0.00012799914050021672, "loss": 0.0105, "step": 8781 }, { "epoch": 4.098949824970829, "grad_norm": 0.5390625, "learning_rate": 0.00012798505361863135, "loss": 0.008, "step": 8782 }, { "epoch": 4.099416569428238, "grad_norm": 0.75390625, "learning_rate": 0.0001279709661344967, "loss": 0.0098, "step": 8783 }, { "epoch": 4.099883313885647, "grad_norm": 0.31640625, "learning_rate": 0.00012795687804811605, "loss": 0.0058, "step": 8784 }, { "epoch": 4.100350058343057, "grad_norm": 0.318359375, "learning_rate": 0.00012794278935979284, "loss": 0.0071, "step": 8785 }, { "epoch": 4.100816802800467, "grad_norm": 0.53515625, "learning_rate": 0.0001279287000698304, "loss": 0.0098, "step": 8786 }, { "epoch": 4.101283547257876, "grad_norm": 0.337890625, "learning_rate": 0.00012791461017853202, "loss": 0.0061, "step": 8787 }, { "epoch": 4.101750291715286, "grad_norm": 0.30859375, "learning_rate": 0.00012790051968620113, "loss": 0.012, "step": 8788 }, { "epoch": 4.1022170361726955, "grad_norm": 0.5625, "learning_rate": 0.0001278864285931411, "loss": 0.0086, "step": 8789 }, { "epoch": 4.102683780630105, "grad_norm": 0.5234375, "learning_rate": 0.0001278723368996553, "loss": 0.0106, "step": 8790 }, { "epoch": 4.103150525087514, "grad_norm": 0.42578125, "learning_rate": 0.00012785824460604718, "loss": 0.0062, "step": 8791 }, { "epoch": 4.103617269544924, "grad_norm": 0.451171875, "learning_rate": 0.00012784415171262017, "loss": 0.0104, "step": 8792 }, { "epoch": 4.104084014002334, "grad_norm": 0.419921875, "learning_rate": 0.00012783005821967764, "loss": 0.0087, "step": 8793 }, { "epoch": 4.104550758459744, "grad_norm": 0.41796875, "learning_rate": 0.00012781596412752308, "loss": 0.0072, "step": 8794 }, { "epoch": 4.1050175029171525, "grad_norm": 0.6640625, "learning_rate": 0.00012780186943645995, "loss": 0.0109, "step": 8795 }, { "epoch": 4.105484247374562, "grad_norm": 0.330078125, "learning_rate": 0.00012778777414679173, "loss": 0.0057, "step": 8796 }, { "epoch": 4.105950991831972, "grad_norm": 0.447265625, "learning_rate": 0.0001277736782588219, "loss": 0.0094, "step": 8797 }, { "epoch": 4.106417736289382, "grad_norm": 0.453125, "learning_rate": 0.00012775958177285398, "loss": 0.0047, "step": 8798 }, { "epoch": 4.106884480746791, "grad_norm": 0.447265625, "learning_rate": 0.00012774548468919144, "loss": 0.0061, "step": 8799 }, { "epoch": 4.107351225204201, "grad_norm": 0.337890625, "learning_rate": 0.00012773138700813784, "loss": 0.0081, "step": 8800 }, { "epoch": 4.10781796966161, "grad_norm": 0.3984375, "learning_rate": 0.0001277172887299967, "loss": 0.0062, "step": 8801 }, { "epoch": 4.10828471411902, "grad_norm": 0.33203125, "learning_rate": 0.0001277031898550716, "loss": 0.0039, "step": 8802 }, { "epoch": 4.108751458576429, "grad_norm": 0.65234375, "learning_rate": 0.00012768909038366608, "loss": 0.0126, "step": 8803 }, { "epoch": 4.109218203033839, "grad_norm": 0.3515625, "learning_rate": 0.00012767499031608373, "loss": 0.006, "step": 8804 }, { "epoch": 4.109684947491249, "grad_norm": 0.494140625, "learning_rate": 0.00012766088965262813, "loss": 0.0071, "step": 8805 }, { "epoch": 4.1101516919486585, "grad_norm": 0.66015625, "learning_rate": 0.00012764678839360285, "loss": 0.012, "step": 8806 }, { "epoch": 4.110618436406067, "grad_norm": 0.58203125, "learning_rate": 0.00012763268653931157, "loss": 0.0174, "step": 8807 }, { "epoch": 4.111085180863477, "grad_norm": 0.427734375, "learning_rate": 0.0001276185840900579, "loss": 0.0044, "step": 8808 }, { "epoch": 4.111551925320887, "grad_norm": 0.4453125, "learning_rate": 0.00012760448104614546, "loss": 0.0053, "step": 8809 }, { "epoch": 4.112018669778297, "grad_norm": 0.56640625, "learning_rate": 0.0001275903774078779, "loss": 0.0056, "step": 8810 }, { "epoch": 4.112485414235706, "grad_norm": 0.77734375, "learning_rate": 0.00012757627317555893, "loss": 0.0129, "step": 8811 }, { "epoch": 4.1129521586931155, "grad_norm": 0.4765625, "learning_rate": 0.0001275621683494922, "loss": 0.0103, "step": 8812 }, { "epoch": 4.113418903150525, "grad_norm": 0.2890625, "learning_rate": 0.00012754806292998135, "loss": 0.005, "step": 8813 }, { "epoch": 4.113885647607935, "grad_norm": 0.49609375, "learning_rate": 0.00012753395691733022, "loss": 0.0142, "step": 8814 }, { "epoch": 4.114352392065344, "grad_norm": 0.3359375, "learning_rate": 0.0001275198503118424, "loss": 0.0044, "step": 8815 }, { "epoch": 4.114819136522754, "grad_norm": 0.412109375, "learning_rate": 0.00012750574311382168, "loss": 0.0068, "step": 8816 }, { "epoch": 4.115285880980164, "grad_norm": 0.470703125, "learning_rate": 0.00012749163532357183, "loss": 0.007, "step": 8817 }, { "epoch": 4.115752625437573, "grad_norm": 0.640625, "learning_rate": 0.00012747752694139652, "loss": 0.0161, "step": 8818 }, { "epoch": 4.116219369894982, "grad_norm": 0.73828125, "learning_rate": 0.00012746341796759957, "loss": 0.0089, "step": 8819 }, { "epoch": 4.116686114352392, "grad_norm": 0.361328125, "learning_rate": 0.00012744930840248478, "loss": 0.0046, "step": 8820 }, { "epoch": 4.117152858809802, "grad_norm": 0.2314453125, "learning_rate": 0.00012743519824635592, "loss": 0.0059, "step": 8821 }, { "epoch": 4.117619603267211, "grad_norm": 0.345703125, "learning_rate": 0.00012742108749951679, "loss": 0.005, "step": 8822 }, { "epoch": 4.118086347724621, "grad_norm": 0.3046875, "learning_rate": 0.0001274069761622712, "loss": 0.0077, "step": 8823 }, { "epoch": 4.11855309218203, "grad_norm": 0.578125, "learning_rate": 0.00012739286423492304, "loss": 0.0138, "step": 8824 }, { "epoch": 4.11901983663944, "grad_norm": 0.474609375, "learning_rate": 0.00012737875171777615, "loss": 0.0063, "step": 8825 }, { "epoch": 4.119486581096849, "grad_norm": 0.6328125, "learning_rate": 0.00012736463861113428, "loss": 0.0126, "step": 8826 }, { "epoch": 4.119953325554259, "grad_norm": 0.263671875, "learning_rate": 0.00012735052491530144, "loss": 0.0043, "step": 8827 }, { "epoch": 4.120420070011669, "grad_norm": 0.361328125, "learning_rate": 0.00012733641063058143, "loss": 0.0066, "step": 8828 }, { "epoch": 4.1208868144690785, "grad_norm": 0.55859375, "learning_rate": 0.00012732229575727818, "loss": 0.0092, "step": 8829 }, { "epoch": 4.121353558926487, "grad_norm": 0.421875, "learning_rate": 0.00012730818029569558, "loss": 0.0074, "step": 8830 }, { "epoch": 4.121820303383897, "grad_norm": 0.490234375, "learning_rate": 0.00012729406424613754, "loss": 0.0078, "step": 8831 }, { "epoch": 4.122287047841307, "grad_norm": 0.451171875, "learning_rate": 0.00012727994760890806, "loss": 0.0082, "step": 8832 }, { "epoch": 4.122753792298717, "grad_norm": 0.27734375, "learning_rate": 0.000127265830384311, "loss": 0.0042, "step": 8833 }, { "epoch": 4.123220536756126, "grad_norm": 0.353515625, "learning_rate": 0.00012725171257265037, "loss": 0.0057, "step": 8834 }, { "epoch": 4.1236872812135354, "grad_norm": 0.396484375, "learning_rate": 0.00012723759417423012, "loss": 0.0064, "step": 8835 }, { "epoch": 4.124154025670945, "grad_norm": 0.302734375, "learning_rate": 0.00012722347518935427, "loss": 0.0058, "step": 8836 }, { "epoch": 4.124620770128355, "grad_norm": 0.388671875, "learning_rate": 0.00012720935561832679, "loss": 0.0046, "step": 8837 }, { "epoch": 4.125087514585764, "grad_norm": 0.515625, "learning_rate": 0.0001271952354614517, "loss": 0.0093, "step": 8838 }, { "epoch": 4.125554259043174, "grad_norm": 0.5078125, "learning_rate": 0.00012718111471903297, "loss": 0.007, "step": 8839 }, { "epoch": 4.1260210035005835, "grad_norm": 0.6640625, "learning_rate": 0.00012716699339137476, "loss": 0.0168, "step": 8840 }, { "epoch": 4.126487747957993, "grad_norm": 0.61328125, "learning_rate": 0.00012715287147878102, "loss": 0.0103, "step": 8841 }, { "epoch": 4.126954492415402, "grad_norm": 0.427734375, "learning_rate": 0.0001271387489815558, "loss": 0.0087, "step": 8842 }, { "epoch": 4.127421236872812, "grad_norm": 0.79296875, "learning_rate": 0.0001271246259000032, "loss": 0.0152, "step": 8843 }, { "epoch": 4.127887981330222, "grad_norm": 0.4609375, "learning_rate": 0.00012711050223442733, "loss": 0.0143, "step": 8844 }, { "epoch": 4.128354725787632, "grad_norm": 0.58203125, "learning_rate": 0.00012709637798513225, "loss": 0.0098, "step": 8845 }, { "epoch": 4.1288214702450405, "grad_norm": 0.498046875, "learning_rate": 0.0001270822531524221, "loss": 0.0124, "step": 8846 }, { "epoch": 4.12928821470245, "grad_norm": 0.56640625, "learning_rate": 0.00012706812773660098, "loss": 0.0063, "step": 8847 }, { "epoch": 4.12975495915986, "grad_norm": 0.359375, "learning_rate": 0.00012705400173797305, "loss": 0.0045, "step": 8848 }, { "epoch": 4.13022170361727, "grad_norm": 0.578125, "learning_rate": 0.00012703987515684244, "loss": 0.0086, "step": 8849 }, { "epoch": 4.130688448074679, "grad_norm": 1.171875, "learning_rate": 0.00012702574799351333, "loss": 0.0146, "step": 8850 }, { "epoch": 4.131155192532089, "grad_norm": 0.640625, "learning_rate": 0.00012701162024828988, "loss": 0.0111, "step": 8851 }, { "epoch": 4.131621936989498, "grad_norm": 0.734375, "learning_rate": 0.00012699749192147624, "loss": 0.0105, "step": 8852 }, { "epoch": 4.132088681446908, "grad_norm": 0.2470703125, "learning_rate": 0.00012698336301337668, "loss": 0.0031, "step": 8853 }, { "epoch": 4.132555425904317, "grad_norm": 0.447265625, "learning_rate": 0.00012696923352429538, "loss": 0.0098, "step": 8854 }, { "epoch": 4.133022170361727, "grad_norm": 0.54296875, "learning_rate": 0.00012695510345453652, "loss": 0.0169, "step": 8855 }, { "epoch": 4.133488914819137, "grad_norm": 0.52734375, "learning_rate": 0.0001269409728044044, "loss": 0.0116, "step": 8856 }, { "epoch": 4.1339556592765465, "grad_norm": 0.462890625, "learning_rate": 0.00012692684157420325, "loss": 0.006, "step": 8857 }, { "epoch": 4.134422403733955, "grad_norm": 0.51171875, "learning_rate": 0.0001269127097642373, "loss": 0.0073, "step": 8858 }, { "epoch": 4.134889148191365, "grad_norm": 0.55078125, "learning_rate": 0.00012689857737481088, "loss": 0.012, "step": 8859 }, { "epoch": 4.135355892648775, "grad_norm": 0.451171875, "learning_rate": 0.00012688444440622825, "loss": 0.0092, "step": 8860 }, { "epoch": 4.135822637106184, "grad_norm": 0.62890625, "learning_rate": 0.00012687031085879369, "loss": 0.0106, "step": 8861 }, { "epoch": 4.136289381563594, "grad_norm": 0.3359375, "learning_rate": 0.0001268561767328115, "loss": 0.0058, "step": 8862 }, { "epoch": 4.1367561260210035, "grad_norm": 0.3984375, "learning_rate": 0.00012684204202858605, "loss": 0.0052, "step": 8863 }, { "epoch": 4.137222870478413, "grad_norm": 0.486328125, "learning_rate": 0.00012682790674642166, "loss": 0.0058, "step": 8864 }, { "epoch": 4.137689614935822, "grad_norm": 0.408203125, "learning_rate": 0.00012681377088662266, "loss": 0.0092, "step": 8865 }, { "epoch": 4.138156359393232, "grad_norm": 0.447265625, "learning_rate": 0.00012679963444949343, "loss": 0.012, "step": 8866 }, { "epoch": 4.138623103850642, "grad_norm": 0.37890625, "learning_rate": 0.00012678549743533834, "loss": 0.0079, "step": 8867 }, { "epoch": 4.139089848308052, "grad_norm": 0.416015625, "learning_rate": 0.00012677135984446173, "loss": 0.0102, "step": 8868 }, { "epoch": 4.1395565927654605, "grad_norm": 0.71875, "learning_rate": 0.00012675722167716807, "loss": 0.0084, "step": 8869 }, { "epoch": 4.14002333722287, "grad_norm": 0.263671875, "learning_rate": 0.00012674308293376176, "loss": 0.0035, "step": 8870 }, { "epoch": 4.14049008168028, "grad_norm": 0.54296875, "learning_rate": 0.00012672894361454717, "loss": 0.0064, "step": 8871 }, { "epoch": 4.14095682613769, "grad_norm": 0.4453125, "learning_rate": 0.00012671480371982877, "loss": 0.0071, "step": 8872 }, { "epoch": 4.141423570595099, "grad_norm": 0.2294921875, "learning_rate": 0.00012670066324991104, "loss": 0.0077, "step": 8873 }, { "epoch": 4.141890315052509, "grad_norm": 0.53125, "learning_rate": 0.00012668652220509838, "loss": 0.0073, "step": 8874 }, { "epoch": 4.142357059509918, "grad_norm": 0.51953125, "learning_rate": 0.0001266723805856953, "loss": 0.0107, "step": 8875 }, { "epoch": 4.142823803967328, "grad_norm": 0.490234375, "learning_rate": 0.00012665823839200626, "loss": 0.0092, "step": 8876 }, { "epoch": 4.143290548424737, "grad_norm": 0.482421875, "learning_rate": 0.00012664409562433576, "loss": 0.0146, "step": 8877 }, { "epoch": 4.143757292882147, "grad_norm": 0.35546875, "learning_rate": 0.00012662995228298831, "loss": 0.0052, "step": 8878 }, { "epoch": 4.144224037339557, "grad_norm": 0.349609375, "learning_rate": 0.00012661580836826846, "loss": 0.0045, "step": 8879 }, { "epoch": 4.1446907817969665, "grad_norm": 0.462890625, "learning_rate": 0.00012660166388048073, "loss": 0.0071, "step": 8880 }, { "epoch": 4.145157526254375, "grad_norm": 0.4140625, "learning_rate": 0.00012658751881992967, "loss": 0.0094, "step": 8881 }, { "epoch": 4.145624270711785, "grad_norm": 0.5, "learning_rate": 0.0001265733731869198, "loss": 0.0123, "step": 8882 }, { "epoch": 4.146091015169195, "grad_norm": 0.498046875, "learning_rate": 0.00012655922698175576, "loss": 0.0063, "step": 8883 }, { "epoch": 4.146557759626605, "grad_norm": 0.92578125, "learning_rate": 0.00012654508020474206, "loss": 0.017, "step": 8884 }, { "epoch": 4.147024504084014, "grad_norm": 0.52734375, "learning_rate": 0.00012653093285618333, "loss": 0.0096, "step": 8885 }, { "epoch": 4.1474912485414235, "grad_norm": 0.30078125, "learning_rate": 0.00012651678493638422, "loss": 0.0039, "step": 8886 }, { "epoch": 4.147957992998833, "grad_norm": 0.384765625, "learning_rate": 0.0001265026364456493, "loss": 0.0059, "step": 8887 }, { "epoch": 4.148424737456243, "grad_norm": 0.5390625, "learning_rate": 0.00012648848738428317, "loss": 0.0067, "step": 8888 }, { "epoch": 4.148891481913652, "grad_norm": 0.50390625, "learning_rate": 0.00012647433775259055, "loss": 0.0097, "step": 8889 }, { "epoch": 4.149358226371062, "grad_norm": 0.515625, "learning_rate": 0.00012646018755087607, "loss": 0.0085, "step": 8890 }, { "epoch": 4.149824970828472, "grad_norm": 0.474609375, "learning_rate": 0.00012644603677944437, "loss": 0.0085, "step": 8891 }, { "epoch": 4.150291715285881, "grad_norm": 0.474609375, "learning_rate": 0.0001264318854386002, "loss": 0.0061, "step": 8892 }, { "epoch": 4.15075845974329, "grad_norm": 0.361328125, "learning_rate": 0.00012641773352864817, "loss": 0.0066, "step": 8893 }, { "epoch": 4.1512252042007, "grad_norm": 0.4375, "learning_rate": 0.00012640358104989302, "loss": 0.0075, "step": 8894 }, { "epoch": 4.15169194865811, "grad_norm": 0.373046875, "learning_rate": 0.0001263894280026395, "loss": 0.0086, "step": 8895 }, { "epoch": 4.15215869311552, "grad_norm": 0.296875, "learning_rate": 0.0001263752743871923, "loss": 0.0052, "step": 8896 }, { "epoch": 4.152625437572929, "grad_norm": 0.4453125, "learning_rate": 0.00012636112020385616, "loss": 0.0092, "step": 8897 }, { "epoch": 4.153092182030338, "grad_norm": 0.4140625, "learning_rate": 0.00012634696545293589, "loss": 0.0113, "step": 8898 }, { "epoch": 4.153558926487748, "grad_norm": 0.53515625, "learning_rate": 0.0001263328101347362, "loss": 0.0091, "step": 8899 }, { "epoch": 4.154025670945158, "grad_norm": 0.486328125, "learning_rate": 0.0001263186542495619, "loss": 0.0081, "step": 8900 }, { "epoch": 4.154492415402567, "grad_norm": 0.41796875, "learning_rate": 0.00012630449779771775, "loss": 0.0089, "step": 8901 }, { "epoch": 4.154959159859977, "grad_norm": 0.439453125, "learning_rate": 0.0001262903407795086, "loss": 0.0085, "step": 8902 }, { "epoch": 4.1554259043173865, "grad_norm": 0.42578125, "learning_rate": 0.00012627618319523926, "loss": 0.0118, "step": 8903 }, { "epoch": 4.155892648774795, "grad_norm": 0.451171875, "learning_rate": 0.00012626202504521452, "loss": 0.0062, "step": 8904 }, { "epoch": 4.156359393232205, "grad_norm": 0.4296875, "learning_rate": 0.00012624786632973924, "loss": 0.0107, "step": 8905 }, { "epoch": 4.156826137689615, "grad_norm": 0.62109375, "learning_rate": 0.00012623370704911826, "loss": 0.0203, "step": 8906 }, { "epoch": 4.157292882147025, "grad_norm": 0.4921875, "learning_rate": 0.00012621954720365652, "loss": 0.0103, "step": 8907 }, { "epoch": 4.157759626604434, "grad_norm": 0.265625, "learning_rate": 0.0001262053867936588, "loss": 0.0109, "step": 8908 }, { "epoch": 4.1582263710618435, "grad_norm": 0.349609375, "learning_rate": 0.00012619122581943002, "loss": 0.0055, "step": 8909 }, { "epoch": 4.158693115519253, "grad_norm": 0.298828125, "learning_rate": 0.0001261770642812751, "loss": 0.006, "step": 8910 }, { "epoch": 4.159159859976663, "grad_norm": 0.55078125, "learning_rate": 0.00012616290217949894, "loss": 0.0103, "step": 8911 }, { "epoch": 4.159626604434072, "grad_norm": 0.5, "learning_rate": 0.00012614873951440647, "loss": 0.0067, "step": 8912 }, { "epoch": 4.160093348891482, "grad_norm": 0.369140625, "learning_rate": 0.00012613457628630264, "loss": 0.0072, "step": 8913 }, { "epoch": 4.1605600933488915, "grad_norm": 0.50390625, "learning_rate": 0.00012612041249549235, "loss": 0.0096, "step": 8914 }, { "epoch": 4.161026837806301, "grad_norm": 0.53125, "learning_rate": 0.0001261062481422806, "loss": 0.0073, "step": 8915 }, { "epoch": 4.16149358226371, "grad_norm": 0.3984375, "learning_rate": 0.0001260920832269724, "loss": 0.0051, "step": 8916 }, { "epoch": 4.16196032672112, "grad_norm": 0.6953125, "learning_rate": 0.00012607791774987264, "loss": 0.0093, "step": 8917 }, { "epoch": 4.16242707117853, "grad_norm": 0.451171875, "learning_rate": 0.00012606375171128641, "loss": 0.0071, "step": 8918 }, { "epoch": 4.16289381563594, "grad_norm": 0.515625, "learning_rate": 0.0001260495851115187, "loss": 0.0089, "step": 8919 }, { "epoch": 4.1633605600933485, "grad_norm": 0.40625, "learning_rate": 0.0001260354179508745, "loss": 0.0106, "step": 8920 }, { "epoch": 4.163827304550758, "grad_norm": 0.39453125, "learning_rate": 0.00012602125022965888, "loss": 0.009, "step": 8921 }, { "epoch": 4.164294049008168, "grad_norm": 0.5234375, "learning_rate": 0.00012600708194817687, "loss": 0.0063, "step": 8922 }, { "epoch": 4.164760793465578, "grad_norm": 0.66015625, "learning_rate": 0.00012599291310673354, "loss": 0.011, "step": 8923 }, { "epoch": 4.165227537922987, "grad_norm": 0.734375, "learning_rate": 0.00012597874370563392, "loss": 0.0126, "step": 8924 }, { "epoch": 4.165694282380397, "grad_norm": 0.4453125, "learning_rate": 0.00012596457374518315, "loss": 0.0043, "step": 8925 }, { "epoch": 4.166161026837806, "grad_norm": 0.34375, "learning_rate": 0.0001259504032256863, "loss": 0.0093, "step": 8926 }, { "epoch": 4.166627771295216, "grad_norm": 0.625, "learning_rate": 0.00012593623214744842, "loss": 0.0123, "step": 8927 }, { "epoch": 4.167094515752625, "grad_norm": 0.546875, "learning_rate": 0.00012592206051077474, "loss": 0.0077, "step": 8928 }, { "epoch": 4.167561260210035, "grad_norm": 0.5078125, "learning_rate": 0.00012590788831597037, "loss": 0.0078, "step": 8929 }, { "epoch": 4.168028004667445, "grad_norm": 0.51953125, "learning_rate": 0.00012589371556334035, "loss": 0.0175, "step": 8930 }, { "epoch": 4.1684947491248545, "grad_norm": 0.482421875, "learning_rate": 0.00012587954225318993, "loss": 0.0075, "step": 8931 }, { "epoch": 4.168961493582263, "grad_norm": 0.578125, "learning_rate": 0.00012586536838582425, "loss": 0.009, "step": 8932 }, { "epoch": 4.169428238039673, "grad_norm": 0.51953125, "learning_rate": 0.0001258511939615485, "loss": 0.0119, "step": 8933 }, { "epoch": 4.169894982497083, "grad_norm": 0.72265625, "learning_rate": 0.00012583701898066785, "loss": 0.0116, "step": 8934 }, { "epoch": 4.170361726954493, "grad_norm": 0.51953125, "learning_rate": 0.00012582284344348754, "loss": 0.0106, "step": 8935 }, { "epoch": 4.170828471411902, "grad_norm": 0.267578125, "learning_rate": 0.00012580866735031272, "loss": 0.0036, "step": 8936 }, { "epoch": 4.1712952158693115, "grad_norm": 0.60546875, "learning_rate": 0.0001257944907014487, "loss": 0.0097, "step": 8937 }, { "epoch": 4.171761960326721, "grad_norm": 0.486328125, "learning_rate": 0.00012578031349720064, "loss": 0.0101, "step": 8938 }, { "epoch": 4.172228704784131, "grad_norm": 0.46875, "learning_rate": 0.00012576613573787382, "loss": 0.0087, "step": 8939 }, { "epoch": 4.17269544924154, "grad_norm": 0.6328125, "learning_rate": 0.00012575195742377355, "loss": 0.0145, "step": 8940 }, { "epoch": 4.17316219369895, "grad_norm": 0.447265625, "learning_rate": 0.00012573777855520504, "loss": 0.0088, "step": 8941 }, { "epoch": 4.17362893815636, "grad_norm": 0.50390625, "learning_rate": 0.0001257235991324736, "loss": 0.0085, "step": 8942 }, { "epoch": 4.174095682613769, "grad_norm": 0.421875, "learning_rate": 0.00012570941915588453, "loss": 0.0157, "step": 8943 }, { "epoch": 4.174562427071178, "grad_norm": 0.6484375, "learning_rate": 0.00012569523862574316, "loss": 0.0107, "step": 8944 }, { "epoch": 4.175029171528588, "grad_norm": 0.41015625, "learning_rate": 0.00012568105754235477, "loss": 0.0095, "step": 8945 }, { "epoch": 4.175495915985998, "grad_norm": 0.3984375, "learning_rate": 0.00012566687590602474, "loss": 0.0056, "step": 8946 }, { "epoch": 4.175962660443407, "grad_norm": 0.5078125, "learning_rate": 0.00012565269371705835, "loss": 0.008, "step": 8947 }, { "epoch": 4.176429404900817, "grad_norm": 0.42578125, "learning_rate": 0.00012563851097576102, "loss": 0.0091, "step": 8948 }, { "epoch": 4.176896149358226, "grad_norm": 0.376953125, "learning_rate": 0.00012562432768243814, "loss": 0.0078, "step": 8949 }, { "epoch": 4.177362893815636, "grad_norm": 0.400390625, "learning_rate": 0.000125610143837395, "loss": 0.0082, "step": 8950 }, { "epoch": 4.177829638273045, "grad_norm": 0.6875, "learning_rate": 0.0001255959594409371, "loss": 0.0127, "step": 8951 }, { "epoch": 4.178296382730455, "grad_norm": 0.484375, "learning_rate": 0.00012558177449336976, "loss": 0.0134, "step": 8952 }, { "epoch": 4.178763127187865, "grad_norm": 0.357421875, "learning_rate": 0.0001255675889949984, "loss": 0.0053, "step": 8953 }, { "epoch": 4.1792298716452745, "grad_norm": 0.40625, "learning_rate": 0.00012555340294612854, "loss": 0.0065, "step": 8954 }, { "epoch": 4.179696616102683, "grad_norm": 0.36328125, "learning_rate": 0.00012553921634706557, "loss": 0.0052, "step": 8955 }, { "epoch": 4.180163360560093, "grad_norm": 0.609375, "learning_rate": 0.0001255250291981149, "loss": 0.0086, "step": 8956 }, { "epoch": 4.180630105017503, "grad_norm": 0.65234375, "learning_rate": 0.00012551084149958202, "loss": 0.0096, "step": 8957 }, { "epoch": 4.181096849474913, "grad_norm": 0.369140625, "learning_rate": 0.00012549665325177248, "loss": 0.0077, "step": 8958 }, { "epoch": 4.181563593932322, "grad_norm": 0.357421875, "learning_rate": 0.00012548246445499167, "loss": 0.0071, "step": 8959 }, { "epoch": 4.1820303383897315, "grad_norm": 0.33984375, "learning_rate": 0.00012546827510954512, "loss": 0.0048, "step": 8960 }, { "epoch": 4.182497082847141, "grad_norm": 0.30078125, "learning_rate": 0.00012545408521573832, "loss": 0.0044, "step": 8961 }, { "epoch": 4.182963827304551, "grad_norm": 0.3828125, "learning_rate": 0.00012543989477387688, "loss": 0.0055, "step": 8962 }, { "epoch": 4.18343057176196, "grad_norm": 0.353515625, "learning_rate": 0.00012542570378426624, "loss": 0.0054, "step": 8963 }, { "epoch": 4.18389731621937, "grad_norm": 0.75390625, "learning_rate": 0.000125411512247212, "loss": 0.0101, "step": 8964 }, { "epoch": 4.18436406067678, "grad_norm": 0.287109375, "learning_rate": 0.00012539732016301973, "loss": 0.0047, "step": 8965 }, { "epoch": 4.184830805134189, "grad_norm": 0.4140625, "learning_rate": 0.00012538312753199494, "loss": 0.0044, "step": 8966 }, { "epoch": 4.185297549591598, "grad_norm": 0.2890625, "learning_rate": 0.00012536893435444326, "loss": 0.0053, "step": 8967 }, { "epoch": 4.185764294049008, "grad_norm": 0.625, "learning_rate": 0.0001253547406306703, "loss": 0.0118, "step": 8968 }, { "epoch": 4.186231038506418, "grad_norm": 0.578125, "learning_rate": 0.0001253405463609816, "loss": 0.014, "step": 8969 }, { "epoch": 4.186697782963828, "grad_norm": 0.640625, "learning_rate": 0.00012532635154568288, "loss": 0.0125, "step": 8970 }, { "epoch": 4.187164527421237, "grad_norm": 0.58984375, "learning_rate": 0.0001253121561850797, "loss": 0.0132, "step": 8971 }, { "epoch": 4.187631271878646, "grad_norm": 0.5390625, "learning_rate": 0.00012529796027947767, "loss": 0.0085, "step": 8972 }, { "epoch": 4.188098016336056, "grad_norm": 0.37109375, "learning_rate": 0.00012528376382918248, "loss": 0.0036, "step": 8973 }, { "epoch": 4.188564760793466, "grad_norm": 0.322265625, "learning_rate": 0.00012526956683449984, "loss": 0.0081, "step": 8974 }, { "epoch": 4.189031505250875, "grad_norm": 0.390625, "learning_rate": 0.00012525536929573538, "loss": 0.0066, "step": 8975 }, { "epoch": 4.189498249708285, "grad_norm": 0.28515625, "learning_rate": 0.00012524117121319477, "loss": 0.0034, "step": 8976 }, { "epoch": 4.1899649941656945, "grad_norm": 0.578125, "learning_rate": 0.0001252269725871838, "loss": 0.0054, "step": 8977 }, { "epoch": 4.190431738623104, "grad_norm": 0.447265625, "learning_rate": 0.00012521277341800808, "loss": 0.0082, "step": 8978 }, { "epoch": 4.190898483080513, "grad_norm": 0.484375, "learning_rate": 0.00012519857370597336, "loss": 0.0096, "step": 8979 }, { "epoch": 4.191365227537923, "grad_norm": 0.314453125, "learning_rate": 0.00012518437345138543, "loss": 0.0052, "step": 8980 }, { "epoch": 4.191831971995333, "grad_norm": 0.53125, "learning_rate": 0.00012517017265454997, "loss": 0.0062, "step": 8981 }, { "epoch": 4.1922987164527425, "grad_norm": 0.462890625, "learning_rate": 0.0001251559713157728, "loss": 0.0099, "step": 8982 }, { "epoch": 4.1927654609101515, "grad_norm": 0.56640625, "learning_rate": 0.0001251417694353596, "loss": 0.0125, "step": 8983 }, { "epoch": 4.193232205367561, "grad_norm": 0.486328125, "learning_rate": 0.00012512756701361626, "loss": 0.0078, "step": 8984 }, { "epoch": 4.193698949824971, "grad_norm": 0.5703125, "learning_rate": 0.0001251133640508485, "loss": 0.0124, "step": 8985 }, { "epoch": 4.194165694282381, "grad_norm": 0.62109375, "learning_rate": 0.00012509916054736215, "loss": 0.0112, "step": 8986 }, { "epoch": 4.19463243873979, "grad_norm": 0.61328125, "learning_rate": 0.00012508495650346302, "loss": 0.009, "step": 8987 }, { "epoch": 4.1950991831971995, "grad_norm": 0.55859375, "learning_rate": 0.00012507075191945695, "loss": 0.0093, "step": 8988 }, { "epoch": 4.195565927654609, "grad_norm": 0.80859375, "learning_rate": 0.00012505654679564976, "loss": 0.0167, "step": 8989 }, { "epoch": 4.196032672112018, "grad_norm": 0.609375, "learning_rate": 0.00012504234113234735, "loss": 0.015, "step": 8990 }, { "epoch": 4.196499416569428, "grad_norm": 0.494140625, "learning_rate": 0.00012502813492985552, "loss": 0.0073, "step": 8991 }, { "epoch": 4.196966161026838, "grad_norm": 0.462890625, "learning_rate": 0.00012501392818848017, "loss": 0.0085, "step": 8992 }, { "epoch": 4.197432905484248, "grad_norm": 0.494140625, "learning_rate": 0.00012499972090852722, "loss": 0.0077, "step": 8993 }, { "epoch": 4.1978996499416565, "grad_norm": 0.359375, "learning_rate": 0.00012498551309030254, "loss": 0.0067, "step": 8994 }, { "epoch": 4.198366394399066, "grad_norm": 0.34765625, "learning_rate": 0.000124971304734112, "loss": 0.0043, "step": 8995 }, { "epoch": 4.198833138856476, "grad_norm": 0.5625, "learning_rate": 0.00012495709584026162, "loss": 0.0069, "step": 8996 }, { "epoch": 4.199299883313886, "grad_norm": 0.7109375, "learning_rate": 0.00012494288640905725, "loss": 0.0127, "step": 8997 }, { "epoch": 4.199766627771295, "grad_norm": 0.6328125, "learning_rate": 0.00012492867644080485, "loss": 0.0143, "step": 8998 }, { "epoch": 4.200233372228705, "grad_norm": 0.451171875, "learning_rate": 0.0001249144659358104, "loss": 0.0076, "step": 8999 }, { "epoch": 4.200700116686114, "grad_norm": 0.369140625, "learning_rate": 0.00012490025489437985, "loss": 0.0073, "step": 9000 }, { "epoch": 4.201166861143524, "grad_norm": 0.4609375, "learning_rate": 0.0001248860433168192, "loss": 0.009, "step": 9001 }, { "epoch": 4.201633605600933, "grad_norm": 0.4140625, "learning_rate": 0.0001248718312034344, "loss": 0.0112, "step": 9002 }, { "epoch": 4.202100350058343, "grad_norm": 0.47265625, "learning_rate": 0.00012485761855453152, "loss": 0.0079, "step": 9003 }, { "epoch": 4.202567094515753, "grad_norm": 0.455078125, "learning_rate": 0.0001248434053704165, "loss": 0.0061, "step": 9004 }, { "epoch": 4.2030338389731625, "grad_norm": 0.369140625, "learning_rate": 0.0001248291916513954, "loss": 0.0081, "step": 9005 }, { "epoch": 4.203500583430571, "grad_norm": 0.75, "learning_rate": 0.00012481497739777423, "loss": 0.0092, "step": 9006 }, { "epoch": 4.203967327887981, "grad_norm": 0.6953125, "learning_rate": 0.00012480076260985913, "loss": 0.0136, "step": 9007 }, { "epoch": 4.204434072345391, "grad_norm": 0.443359375, "learning_rate": 0.00012478654728795605, "loss": 0.0079, "step": 9008 }, { "epoch": 4.204900816802801, "grad_norm": 0.3203125, "learning_rate": 0.00012477233143237112, "loss": 0.0067, "step": 9009 }, { "epoch": 4.20536756126021, "grad_norm": 0.6015625, "learning_rate": 0.00012475811504341039, "loss": 0.0089, "step": 9010 }, { "epoch": 4.2058343057176195, "grad_norm": 0.36328125, "learning_rate": 0.00012474389812138002, "loss": 0.0048, "step": 9011 }, { "epoch": 4.206301050175029, "grad_norm": 0.275390625, "learning_rate": 0.000124729680666586, "loss": 0.004, "step": 9012 }, { "epoch": 4.206767794632439, "grad_norm": 0.47265625, "learning_rate": 0.00012471546267933456, "loss": 0.0059, "step": 9013 }, { "epoch": 4.207234539089848, "grad_norm": 0.80078125, "learning_rate": 0.00012470124415993179, "loss": 0.0181, "step": 9014 }, { "epoch": 4.207701283547258, "grad_norm": 0.50390625, "learning_rate": 0.0001246870251086838, "loss": 0.0078, "step": 9015 }, { "epoch": 4.208168028004668, "grad_norm": 0.44140625, "learning_rate": 0.0001246728055258968, "loss": 0.0072, "step": 9016 }, { "epoch": 4.208634772462077, "grad_norm": 0.435546875, "learning_rate": 0.0001246585854118769, "loss": 0.0064, "step": 9017 }, { "epoch": 4.209101516919486, "grad_norm": 0.34765625, "learning_rate": 0.0001246443647669303, "loss": 0.0042, "step": 9018 }, { "epoch": 4.209568261376896, "grad_norm": 0.466796875, "learning_rate": 0.00012463014359136317, "loss": 0.0096, "step": 9019 }, { "epoch": 4.210035005834306, "grad_norm": 0.375, "learning_rate": 0.00012461592188548176, "loss": 0.0056, "step": 9020 }, { "epoch": 4.210501750291716, "grad_norm": 0.400390625, "learning_rate": 0.0001246016996495922, "loss": 0.0053, "step": 9021 }, { "epoch": 4.210968494749125, "grad_norm": 0.384765625, "learning_rate": 0.00012458747688400077, "loss": 0.0069, "step": 9022 }, { "epoch": 4.211435239206534, "grad_norm": 0.41796875, "learning_rate": 0.0001245732535890137, "loss": 0.0057, "step": 9023 }, { "epoch": 4.211901983663944, "grad_norm": 0.388671875, "learning_rate": 0.00012455902976493718, "loss": 0.0045, "step": 9024 }, { "epoch": 4.212368728121354, "grad_norm": 0.34765625, "learning_rate": 0.0001245448054120775, "loss": 0.0049, "step": 9025 }, { "epoch": 4.212835472578763, "grad_norm": 0.5859375, "learning_rate": 0.00012453058053074094, "loss": 0.0107, "step": 9026 }, { "epoch": 4.213302217036173, "grad_norm": 0.53125, "learning_rate": 0.0001245163551212338, "loss": 0.0077, "step": 9027 }, { "epoch": 4.2137689614935825, "grad_norm": 0.408203125, "learning_rate": 0.00012450212918386225, "loss": 0.0092, "step": 9028 }, { "epoch": 4.214235705950992, "grad_norm": 0.66015625, "learning_rate": 0.00012448790271893273, "loss": 0.0129, "step": 9029 }, { "epoch": 4.214702450408401, "grad_norm": 0.51171875, "learning_rate": 0.00012447367572675152, "loss": 0.008, "step": 9030 }, { "epoch": 4.215169194865811, "grad_norm": 0.5546875, "learning_rate": 0.00012445944820762487, "loss": 0.0068, "step": 9031 }, { "epoch": 4.215635939323221, "grad_norm": 0.498046875, "learning_rate": 0.00012444522016185917, "loss": 0.0096, "step": 9032 }, { "epoch": 4.21610268378063, "grad_norm": 0.3203125, "learning_rate": 0.0001244309915897608, "loss": 0.0058, "step": 9033 }, { "epoch": 4.2165694282380395, "grad_norm": 0.326171875, "learning_rate": 0.00012441676249163602, "loss": 0.0037, "step": 9034 }, { "epoch": 4.217036172695449, "grad_norm": 0.41015625, "learning_rate": 0.00012440253286779125, "loss": 0.0065, "step": 9035 }, { "epoch": 4.217502917152859, "grad_norm": 0.28515625, "learning_rate": 0.0001243883027185329, "loss": 0.0042, "step": 9036 }, { "epoch": 4.217969661610268, "grad_norm": 0.498046875, "learning_rate": 0.00012437407204416733, "loss": 0.0127, "step": 9037 }, { "epoch": 4.218436406067678, "grad_norm": 0.482421875, "learning_rate": 0.00012435984084500093, "loss": 0.0056, "step": 9038 }, { "epoch": 4.218903150525088, "grad_norm": 0.53515625, "learning_rate": 0.00012434560912134013, "loss": 0.0186, "step": 9039 }, { "epoch": 4.219369894982497, "grad_norm": 0.43359375, "learning_rate": 0.00012433137687349138, "loss": 0.0081, "step": 9040 }, { "epoch": 4.219836639439906, "grad_norm": 0.2255859375, "learning_rate": 0.00012431714410176108, "loss": 0.0026, "step": 9041 }, { "epoch": 4.220303383897316, "grad_norm": 0.35546875, "learning_rate": 0.0001243029108064557, "loss": 0.0055, "step": 9042 }, { "epoch": 4.220770128354726, "grad_norm": 0.7109375, "learning_rate": 0.00012428867698788165, "loss": 0.0083, "step": 9043 }, { "epoch": 4.221236872812136, "grad_norm": 0.40234375, "learning_rate": 0.00012427444264634545, "loss": 0.0112, "step": 9044 }, { "epoch": 4.221703617269545, "grad_norm": 0.474609375, "learning_rate": 0.00012426020778215358, "loss": 0.0121, "step": 9045 }, { "epoch": 4.222170361726954, "grad_norm": 0.6015625, "learning_rate": 0.0001242459723956125, "loss": 0.0108, "step": 9046 }, { "epoch": 4.222637106184364, "grad_norm": 0.443359375, "learning_rate": 0.00012423173648702876, "loss": 0.0063, "step": 9047 }, { "epoch": 4.223103850641774, "grad_norm": 0.6171875, "learning_rate": 0.0001242175000567088, "loss": 0.0092, "step": 9048 }, { "epoch": 4.223570595099183, "grad_norm": 0.5, "learning_rate": 0.00012420326310495924, "loss": 0.0091, "step": 9049 }, { "epoch": 4.224037339556593, "grad_norm": 0.451171875, "learning_rate": 0.00012418902563208657, "loss": 0.0095, "step": 9050 }, { "epoch": 4.2245040840140025, "grad_norm": 0.431640625, "learning_rate": 0.0001241747876383973, "loss": 0.006, "step": 9051 }, { "epoch": 4.224970828471412, "grad_norm": 0.56640625, "learning_rate": 0.00012416054912419805, "loss": 0.0122, "step": 9052 }, { "epoch": 4.225437572928821, "grad_norm": 0.44921875, "learning_rate": 0.0001241463100897954, "loss": 0.0065, "step": 9053 }, { "epoch": 4.225904317386231, "grad_norm": 0.369140625, "learning_rate": 0.00012413207053549584, "loss": 0.0073, "step": 9054 }, { "epoch": 4.226371061843641, "grad_norm": 0.7421875, "learning_rate": 0.0001241178304616061, "loss": 0.0185, "step": 9055 }, { "epoch": 4.2268378063010505, "grad_norm": 0.30078125, "learning_rate": 0.0001241035898684327, "loss": 0.0059, "step": 9056 }, { "epoch": 4.2273045507584595, "grad_norm": 0.357421875, "learning_rate": 0.00012408934875628223, "loss": 0.0071, "step": 9057 }, { "epoch": 4.227771295215869, "grad_norm": 0.65234375, "learning_rate": 0.00012407510712546135, "loss": 0.0061, "step": 9058 }, { "epoch": 4.228238039673279, "grad_norm": 0.6015625, "learning_rate": 0.00012406086497627675, "loss": 0.0142, "step": 9059 }, { "epoch": 4.228704784130689, "grad_norm": 0.232421875, "learning_rate": 0.00012404662230903498, "loss": 0.0055, "step": 9060 }, { "epoch": 4.229171528588098, "grad_norm": 0.470703125, "learning_rate": 0.00012403237912404276, "loss": 0.0146, "step": 9061 }, { "epoch": 4.2296382730455075, "grad_norm": 0.671875, "learning_rate": 0.00012401813542160675, "loss": 0.0133, "step": 9062 }, { "epoch": 4.230105017502917, "grad_norm": 0.7890625, "learning_rate": 0.0001240038912020337, "loss": 0.0175, "step": 9063 }, { "epoch": 4.230571761960327, "grad_norm": 0.3203125, "learning_rate": 0.00012398964646563013, "loss": 0.0042, "step": 9064 }, { "epoch": 4.231038506417736, "grad_norm": 0.369140625, "learning_rate": 0.00012397540121270294, "loss": 0.0049, "step": 9065 }, { "epoch": 4.231505250875146, "grad_norm": 0.396484375, "learning_rate": 0.00012396115544355873, "loss": 0.0082, "step": 9066 }, { "epoch": 4.231971995332556, "grad_norm": 0.59765625, "learning_rate": 0.00012394690915850425, "loss": 0.0118, "step": 9067 }, { "epoch": 4.2324387397899645, "grad_norm": 0.5078125, "learning_rate": 0.00012393266235784627, "loss": 0.0066, "step": 9068 }, { "epoch": 4.232905484247374, "grad_norm": 0.40625, "learning_rate": 0.0001239184150418915, "loss": 0.0064, "step": 9069 }, { "epoch": 4.233372228704784, "grad_norm": 0.53125, "learning_rate": 0.00012390416721094673, "loss": 0.0082, "step": 9070 }, { "epoch": 4.233838973162194, "grad_norm": 0.375, "learning_rate": 0.00012388991886531868, "loss": 0.0063, "step": 9071 }, { "epoch": 4.234305717619603, "grad_norm": 0.298828125, "learning_rate": 0.0001238756700053142, "loss": 0.0055, "step": 9072 }, { "epoch": 4.234772462077013, "grad_norm": 0.484375, "learning_rate": 0.00012386142063124008, "loss": 0.0118, "step": 9073 }, { "epoch": 4.235239206534422, "grad_norm": 0.52734375, "learning_rate": 0.00012384717074340307, "loss": 0.0139, "step": 9074 }, { "epoch": 4.235705950991832, "grad_norm": 0.5, "learning_rate": 0.00012383292034211, "loss": 0.0054, "step": 9075 }, { "epoch": 4.236172695449241, "grad_norm": 0.4296875, "learning_rate": 0.00012381866942766774, "loss": 0.0068, "step": 9076 }, { "epoch": 4.236639439906651, "grad_norm": 0.5078125, "learning_rate": 0.0001238044180003831, "loss": 0.0081, "step": 9077 }, { "epoch": 4.237106184364061, "grad_norm": 0.447265625, "learning_rate": 0.0001237901660605629, "loss": 0.0085, "step": 9078 }, { "epoch": 4.2375729288214705, "grad_norm": 0.73046875, "learning_rate": 0.00012377591360851408, "loss": 0.0174, "step": 9079 }, { "epoch": 4.238039673278879, "grad_norm": 0.52734375, "learning_rate": 0.00012376166064454342, "loss": 0.0105, "step": 9080 }, { "epoch": 4.238506417736289, "grad_norm": 0.28125, "learning_rate": 0.00012374740716895784, "loss": 0.0041, "step": 9081 }, { "epoch": 4.238973162193699, "grad_norm": 0.33984375, "learning_rate": 0.00012373315318206426, "loss": 0.0039, "step": 9082 }, { "epoch": 4.239439906651109, "grad_norm": 0.443359375, "learning_rate": 0.00012371889868416952, "loss": 0.0046, "step": 9083 }, { "epoch": 4.239906651108518, "grad_norm": 0.478515625, "learning_rate": 0.00012370464367558062, "loss": 0.0062, "step": 9084 }, { "epoch": 4.2403733955659275, "grad_norm": 0.57421875, "learning_rate": 0.0001236903881566044, "loss": 0.0122, "step": 9085 }, { "epoch": 4.240840140023337, "grad_norm": 0.48046875, "learning_rate": 0.0001236761321275479, "loss": 0.0138, "step": 9086 }, { "epoch": 4.241306884480747, "grad_norm": 0.306640625, "learning_rate": 0.00012366187558871794, "loss": 0.006, "step": 9087 }, { "epoch": 4.241773628938156, "grad_norm": 0.48046875, "learning_rate": 0.00012364761854042158, "loss": 0.0058, "step": 9088 }, { "epoch": 4.242240373395566, "grad_norm": 0.412109375, "learning_rate": 0.00012363336098296574, "loss": 0.0066, "step": 9089 }, { "epoch": 4.242707117852976, "grad_norm": 0.4765625, "learning_rate": 0.00012361910291665742, "loss": 0.0097, "step": 9090 }, { "epoch": 4.243173862310385, "grad_norm": 0.375, "learning_rate": 0.0001236048443418036, "loss": 0.0072, "step": 9091 }, { "epoch": 4.243640606767794, "grad_norm": 0.3984375, "learning_rate": 0.00012359058525871131, "loss": 0.0063, "step": 9092 }, { "epoch": 4.244107351225204, "grad_norm": 0.5625, "learning_rate": 0.00012357632566768753, "loss": 0.0065, "step": 9093 }, { "epoch": 4.244574095682614, "grad_norm": 0.39453125, "learning_rate": 0.00012356206556903931, "loss": 0.007, "step": 9094 }, { "epoch": 4.245040840140024, "grad_norm": 0.34375, "learning_rate": 0.00012354780496307368, "loss": 0.0064, "step": 9095 }, { "epoch": 4.245507584597433, "grad_norm": 0.388671875, "learning_rate": 0.00012353354385009769, "loss": 0.0074, "step": 9096 }, { "epoch": 4.245974329054842, "grad_norm": 0.64453125, "learning_rate": 0.00012351928223041836, "loss": 0.021, "step": 9097 }, { "epoch": 4.246441073512252, "grad_norm": 0.578125, "learning_rate": 0.0001235050201043428, "loss": 0.0119, "step": 9098 }, { "epoch": 4.246907817969662, "grad_norm": 0.357421875, "learning_rate": 0.00012349075747217808, "loss": 0.0057, "step": 9099 }, { "epoch": 4.247374562427071, "grad_norm": 0.37890625, "learning_rate": 0.00012347649433423129, "loss": 0.0072, "step": 9100 }, { "epoch": 4.247841306884481, "grad_norm": 0.32421875, "learning_rate": 0.00012346223069080952, "loss": 0.0061, "step": 9101 }, { "epoch": 4.2483080513418905, "grad_norm": 0.373046875, "learning_rate": 0.0001234479665422199, "loss": 0.0047, "step": 9102 }, { "epoch": 4.2487747957993, "grad_norm": 0.5234375, "learning_rate": 0.00012343370188876954, "loss": 0.0106, "step": 9103 }, { "epoch": 4.249241540256709, "grad_norm": 0.3671875, "learning_rate": 0.00012341943673076558, "loss": 0.0035, "step": 9104 }, { "epoch": 4.249708284714119, "grad_norm": 0.416015625, "learning_rate": 0.0001234051710685152, "loss": 0.008, "step": 9105 }, { "epoch": 4.250175029171529, "grad_norm": 0.68359375, "learning_rate": 0.00012339090490232546, "loss": 0.0118, "step": 9106 }, { "epoch": 4.250641773628939, "grad_norm": 0.431640625, "learning_rate": 0.00012337663823250362, "loss": 0.0081, "step": 9107 }, { "epoch": 4.2511085180863475, "grad_norm": 0.26171875, "learning_rate": 0.0001233623710593568, "loss": 0.0044, "step": 9108 }, { "epoch": 4.251575262543757, "grad_norm": 0.28515625, "learning_rate": 0.00012334810338319224, "loss": 0.0042, "step": 9109 }, { "epoch": 4.252042007001167, "grad_norm": 0.365234375, "learning_rate": 0.00012333383520431708, "loss": 0.0053, "step": 9110 }, { "epoch": 4.252508751458576, "grad_norm": 0.337890625, "learning_rate": 0.00012331956652303857, "loss": 0.0042, "step": 9111 }, { "epoch": 4.252975495915986, "grad_norm": 0.58203125, "learning_rate": 0.00012330529733966392, "loss": 0.0108, "step": 9112 }, { "epoch": 4.252975495915986, "eval_loss": 1.994627833366394, "eval_runtime": 55.4701, "eval_samples_per_second": 32.522, "eval_steps_per_second": 4.074, "step": 9112 }, { "epoch": 4.253442240373396, "grad_norm": 0.515625, "learning_rate": 0.00012329102765450034, "loss": 0.0114, "step": 9113 }, { "epoch": 4.253908984830805, "grad_norm": 0.474609375, "learning_rate": 0.0001232767574678551, "loss": 0.0087, "step": 9114 }, { "epoch": 4.254375729288215, "grad_norm": 0.421875, "learning_rate": 0.00012326248678003548, "loss": 0.0071, "step": 9115 }, { "epoch": 4.254842473745624, "grad_norm": 0.494140625, "learning_rate": 0.00012324821559134866, "loss": 0.0048, "step": 9116 }, { "epoch": 4.255309218203034, "grad_norm": 0.27734375, "learning_rate": 0.000123233943902102, "loss": 0.0049, "step": 9117 }, { "epoch": 4.255775962660444, "grad_norm": 0.359375, "learning_rate": 0.00012321967171260276, "loss": 0.0066, "step": 9118 }, { "epoch": 4.256242707117853, "grad_norm": 0.9375, "learning_rate": 0.0001232053990231582, "loss": 0.0143, "step": 9119 }, { "epoch": 4.256709451575262, "grad_norm": 0.267578125, "learning_rate": 0.00012319112583407564, "loss": 0.0034, "step": 9120 }, { "epoch": 4.257176196032672, "grad_norm": 0.5703125, "learning_rate": 0.00012317685214566242, "loss": 0.0146, "step": 9121 }, { "epoch": 4.257642940490082, "grad_norm": 0.4453125, "learning_rate": 0.0001231625779582259, "loss": 0.009, "step": 9122 }, { "epoch": 4.258109684947491, "grad_norm": 0.486328125, "learning_rate": 0.00012314830327207331, "loss": 0.0072, "step": 9123 }, { "epoch": 4.258576429404901, "grad_norm": 0.291015625, "learning_rate": 0.0001231340280875121, "loss": 0.0145, "step": 9124 }, { "epoch": 4.2590431738623105, "grad_norm": 0.421875, "learning_rate": 0.00012311975240484962, "loss": 0.0093, "step": 9125 }, { "epoch": 4.25950991831972, "grad_norm": 0.28515625, "learning_rate": 0.00012310547622439318, "loss": 0.0031, "step": 9126 }, { "epoch": 4.259976662777129, "grad_norm": 0.734375, "learning_rate": 0.00012309119954645022, "loss": 0.0175, "step": 9127 }, { "epoch": 4.260443407234539, "grad_norm": 0.474609375, "learning_rate": 0.00012307692237132812, "loss": 0.011, "step": 9128 }, { "epoch": 4.260910151691949, "grad_norm": 0.439453125, "learning_rate": 0.00012306264469933426, "loss": 0.0061, "step": 9129 }, { "epoch": 4.2613768961493586, "grad_norm": 0.58203125, "learning_rate": 0.00012304836653077609, "loss": 0.0158, "step": 9130 }, { "epoch": 4.2618436406067675, "grad_norm": 0.26171875, "learning_rate": 0.00012303408786596104, "loss": 0.0068, "step": 9131 }, { "epoch": 4.262310385064177, "grad_norm": 0.65625, "learning_rate": 0.00012301980870519646, "loss": 0.0114, "step": 9132 }, { "epoch": 4.262777129521587, "grad_norm": 0.3671875, "learning_rate": 0.0001230055290487899, "loss": 0.0067, "step": 9133 }, { "epoch": 4.263243873978997, "grad_norm": 0.55078125, "learning_rate": 0.00012299124889704873, "loss": 0.0114, "step": 9134 }, { "epoch": 4.263710618436406, "grad_norm": 0.4609375, "learning_rate": 0.0001229769682502805, "loss": 0.0118, "step": 9135 }, { "epoch": 4.2641773628938155, "grad_norm": 0.44140625, "learning_rate": 0.00012296268710879264, "loss": 0.0073, "step": 9136 }, { "epoch": 4.264644107351225, "grad_norm": 0.56640625, "learning_rate": 0.00012294840547289262, "loss": 0.0098, "step": 9137 }, { "epoch": 4.265110851808635, "grad_norm": 0.6796875, "learning_rate": 0.00012293412334288804, "loss": 0.0163, "step": 9138 }, { "epoch": 4.265577596266044, "grad_norm": 0.404296875, "learning_rate": 0.00012291984071908626, "loss": 0.0064, "step": 9139 }, { "epoch": 4.266044340723454, "grad_norm": 0.31640625, "learning_rate": 0.00012290555760179495, "loss": 0.0067, "step": 9140 }, { "epoch": 4.266511085180864, "grad_norm": 0.267578125, "learning_rate": 0.00012289127399132152, "loss": 0.0056, "step": 9141 }, { "epoch": 4.266977829638273, "grad_norm": 0.6328125, "learning_rate": 0.00012287698988797357, "loss": 0.0076, "step": 9142 }, { "epoch": 4.267444574095682, "grad_norm": 0.5390625, "learning_rate": 0.00012286270529205865, "loss": 0.0097, "step": 9143 }, { "epoch": 4.267911318553092, "grad_norm": 0.37890625, "learning_rate": 0.00012284842020388433, "loss": 0.0051, "step": 9144 }, { "epoch": 4.268378063010502, "grad_norm": 0.46484375, "learning_rate": 0.00012283413462375816, "loss": 0.0098, "step": 9145 }, { "epoch": 4.268844807467912, "grad_norm": 0.64453125, "learning_rate": 0.00012281984855198774, "loss": 0.0149, "step": 9146 }, { "epoch": 4.269311551925321, "grad_norm": 0.51171875, "learning_rate": 0.00012280556198888067, "loss": 0.0061, "step": 9147 }, { "epoch": 4.26977829638273, "grad_norm": 0.4453125, "learning_rate": 0.00012279127493474454, "loss": 0.0081, "step": 9148 }, { "epoch": 4.27024504084014, "grad_norm": 0.2314453125, "learning_rate": 0.00012277698738988695, "loss": 0.0044, "step": 9149 }, { "epoch": 4.27071178529755, "grad_norm": 0.2294921875, "learning_rate": 0.00012276269935461562, "loss": 0.0046, "step": 9150 }, { "epoch": 4.271178529754959, "grad_norm": 0.482421875, "learning_rate": 0.00012274841082923806, "loss": 0.0073, "step": 9151 }, { "epoch": 4.271645274212369, "grad_norm": 0.5546875, "learning_rate": 0.00012273412181406198, "loss": 0.009, "step": 9152 }, { "epoch": 4.2721120186697785, "grad_norm": 0.330078125, "learning_rate": 0.00012271983230939504, "loss": 0.0045, "step": 9153 }, { "epoch": 4.272578763127187, "grad_norm": 0.66796875, "learning_rate": 0.00012270554231554493, "loss": 0.0136, "step": 9154 }, { "epoch": 4.273045507584597, "grad_norm": 0.43359375, "learning_rate": 0.00012269125183281924, "loss": 0.006, "step": 9155 }, { "epoch": 4.273512252042007, "grad_norm": 0.4453125, "learning_rate": 0.0001226769608615258, "loss": 0.0073, "step": 9156 }, { "epoch": 4.273978996499417, "grad_norm": 0.37890625, "learning_rate": 0.00012266266940197217, "loss": 0.0073, "step": 9157 }, { "epoch": 4.274445740956826, "grad_norm": 0.283203125, "learning_rate": 0.00012264837745446612, "loss": 0.0073, "step": 9158 }, { "epoch": 4.2749124854142355, "grad_norm": 0.5, "learning_rate": 0.0001226340850193154, "loss": 0.0105, "step": 9159 }, { "epoch": 4.275379229871645, "grad_norm": 0.51953125, "learning_rate": 0.00012261979209682772, "loss": 0.0141, "step": 9160 }, { "epoch": 4.275845974329055, "grad_norm": 0.41796875, "learning_rate": 0.00012260549868731082, "loss": 0.0099, "step": 9161 }, { "epoch": 4.276312718786464, "grad_norm": 0.447265625, "learning_rate": 0.0001225912047910724, "loss": 0.0046, "step": 9162 }, { "epoch": 4.276779463243874, "grad_norm": 0.46484375, "learning_rate": 0.00012257691040842034, "loss": 0.0076, "step": 9163 }, { "epoch": 4.277246207701284, "grad_norm": 0.5546875, "learning_rate": 0.00012256261553966234, "loss": 0.01, "step": 9164 }, { "epoch": 4.277712952158693, "grad_norm": 0.41015625, "learning_rate": 0.00012254832018510615, "loss": 0.0082, "step": 9165 }, { "epoch": 4.278179696616102, "grad_norm": 0.6328125, "learning_rate": 0.00012253402434505965, "loss": 0.0149, "step": 9166 }, { "epoch": 4.278646441073512, "grad_norm": 0.435546875, "learning_rate": 0.00012251972801983062, "loss": 0.0113, "step": 9167 }, { "epoch": 4.279113185530922, "grad_norm": 0.51171875, "learning_rate": 0.00012250543120972683, "loss": 0.0062, "step": 9168 }, { "epoch": 4.279579929988332, "grad_norm": 0.30078125, "learning_rate": 0.00012249113391505612, "loss": 0.0156, "step": 9169 }, { "epoch": 4.280046674445741, "grad_norm": 0.443359375, "learning_rate": 0.00012247683613612637, "loss": 0.0067, "step": 9170 }, { "epoch": 4.28051341890315, "grad_norm": 0.462890625, "learning_rate": 0.0001224625378732454, "loss": 0.0059, "step": 9171 }, { "epoch": 4.28098016336056, "grad_norm": 0.400390625, "learning_rate": 0.00012244823912672102, "loss": 0.0042, "step": 9172 }, { "epoch": 4.28144690781797, "grad_norm": 0.8671875, "learning_rate": 0.0001224339398968612, "loss": 0.0078, "step": 9173 }, { "epoch": 4.281913652275379, "grad_norm": 0.380859375, "learning_rate": 0.00012241964018397375, "loss": 0.0062, "step": 9174 }, { "epoch": 4.282380396732789, "grad_norm": 0.6875, "learning_rate": 0.00012240533998836658, "loss": 0.0114, "step": 9175 }, { "epoch": 4.2828471411901985, "grad_norm": 0.546875, "learning_rate": 0.00012239103931034758, "loss": 0.0097, "step": 9176 }, { "epoch": 4.283313885647608, "grad_norm": 0.345703125, "learning_rate": 0.00012237673815022467, "loss": 0.0062, "step": 9177 }, { "epoch": 4.283780630105017, "grad_norm": 0.3046875, "learning_rate": 0.0001223624365083057, "loss": 0.0048, "step": 9178 }, { "epoch": 4.284247374562427, "grad_norm": 0.43359375, "learning_rate": 0.0001223481343848987, "loss": 0.0083, "step": 9179 }, { "epoch": 4.284714119019837, "grad_norm": 0.470703125, "learning_rate": 0.0001223338317803116, "loss": 0.0051, "step": 9180 }, { "epoch": 4.285180863477247, "grad_norm": 0.703125, "learning_rate": 0.0001223195286948523, "loss": 0.0077, "step": 9181 }, { "epoch": 4.2856476079346555, "grad_norm": 0.419921875, "learning_rate": 0.00012230522512882876, "loss": 0.0105, "step": 9182 }, { "epoch": 4.286114352392065, "grad_norm": 0.447265625, "learning_rate": 0.000122290921082549, "loss": 0.0072, "step": 9183 }, { "epoch": 4.286581096849475, "grad_norm": 0.6875, "learning_rate": 0.00012227661655632096, "loss": 0.0124, "step": 9184 }, { "epoch": 4.287047841306885, "grad_norm": 0.462890625, "learning_rate": 0.00012226231155045263, "loss": 0.005, "step": 9185 }, { "epoch": 4.287514585764294, "grad_norm": 0.255859375, "learning_rate": 0.00012224800606525207, "loss": 0.0034, "step": 9186 }, { "epoch": 4.287981330221704, "grad_norm": 0.34765625, "learning_rate": 0.00012223370010102725, "loss": 0.0092, "step": 9187 }, { "epoch": 4.288448074679113, "grad_norm": 0.404296875, "learning_rate": 0.00012221939365808618, "loss": 0.0117, "step": 9188 }, { "epoch": 4.288914819136522, "grad_norm": 0.2578125, "learning_rate": 0.00012220508673673695, "loss": 0.0028, "step": 9189 }, { "epoch": 4.289381563593932, "grad_norm": 0.53515625, "learning_rate": 0.00012219077933728754, "loss": 0.0087, "step": 9190 }, { "epoch": 4.289848308051342, "grad_norm": 0.48046875, "learning_rate": 0.00012217647146004602, "loss": 0.0089, "step": 9191 }, { "epoch": 4.290315052508752, "grad_norm": 0.52734375, "learning_rate": 0.00012216216310532047, "loss": 0.0135, "step": 9192 }, { "epoch": 4.2907817969661615, "grad_norm": 0.703125, "learning_rate": 0.00012214785427341898, "loss": 0.0079, "step": 9193 }, { "epoch": 4.29124854142357, "grad_norm": 0.49609375, "learning_rate": 0.0001221335449646496, "loss": 0.0073, "step": 9194 }, { "epoch": 4.29171528588098, "grad_norm": 0.53515625, "learning_rate": 0.0001221192351793204, "loss": 0.0095, "step": 9195 }, { "epoch": 4.29218203033839, "grad_norm": 0.3984375, "learning_rate": 0.00012210492491773956, "loss": 0.0058, "step": 9196 }, { "epoch": 4.292648774795799, "grad_norm": 0.447265625, "learning_rate": 0.00012209061418021517, "loss": 0.0057, "step": 9197 }, { "epoch": 4.293115519253209, "grad_norm": 0.515625, "learning_rate": 0.00012207630296705535, "loss": 0.0086, "step": 9198 }, { "epoch": 4.2935822637106185, "grad_norm": 0.73046875, "learning_rate": 0.00012206199127856823, "loss": 0.0084, "step": 9199 }, { "epoch": 4.294049008168028, "grad_norm": 0.33203125, "learning_rate": 0.00012204767911506196, "loss": 0.0054, "step": 9200 }, { "epoch": 4.294515752625437, "grad_norm": 0.3046875, "learning_rate": 0.00012203336647684467, "loss": 0.0049, "step": 9201 }, { "epoch": 4.294982497082847, "grad_norm": 0.404296875, "learning_rate": 0.00012201905336422456, "loss": 0.0058, "step": 9202 }, { "epoch": 4.295449241540257, "grad_norm": 0.625, "learning_rate": 0.00012200473977750983, "loss": 0.0101, "step": 9203 }, { "epoch": 4.2959159859976666, "grad_norm": 0.353515625, "learning_rate": 0.0001219904257170086, "loss": 0.0046, "step": 9204 }, { "epoch": 4.2963827304550755, "grad_norm": 0.404296875, "learning_rate": 0.00012197611118302915, "loss": 0.0065, "step": 9205 }, { "epoch": 4.296849474912485, "grad_norm": 0.439453125, "learning_rate": 0.00012196179617587961, "loss": 0.0051, "step": 9206 }, { "epoch": 4.297316219369895, "grad_norm": 0.53515625, "learning_rate": 0.0001219474806958682, "loss": 0.0087, "step": 9207 }, { "epoch": 4.297782963827305, "grad_norm": 0.31640625, "learning_rate": 0.00012193316474330325, "loss": 0.0043, "step": 9208 }, { "epoch": 4.298249708284714, "grad_norm": 0.54296875, "learning_rate": 0.00012191884831849289, "loss": 0.0102, "step": 9209 }, { "epoch": 4.2987164527421236, "grad_norm": 0.609375, "learning_rate": 0.00012190453142174543, "loss": 0.0167, "step": 9210 }, { "epoch": 4.299183197199533, "grad_norm": 0.5390625, "learning_rate": 0.00012189021405336906, "loss": 0.0068, "step": 9211 }, { "epoch": 4.299649941656943, "grad_norm": 0.5, "learning_rate": 0.00012187589621367213, "loss": 0.009, "step": 9212 }, { "epoch": 4.300116686114352, "grad_norm": 0.73046875, "learning_rate": 0.00012186157790296289, "loss": 0.0122, "step": 9213 }, { "epoch": 4.300583430571762, "grad_norm": 0.52734375, "learning_rate": 0.00012184725912154959, "loss": 0.01, "step": 9214 }, { "epoch": 4.301050175029172, "grad_norm": 0.55859375, "learning_rate": 0.00012183293986974058, "loss": 0.0087, "step": 9215 }, { "epoch": 4.301516919486581, "grad_norm": 0.7265625, "learning_rate": 0.00012181862014784415, "loss": 0.0124, "step": 9216 }, { "epoch": 4.30198366394399, "grad_norm": 0.69921875, "learning_rate": 0.0001218042999561686, "loss": 0.0105, "step": 9217 }, { "epoch": 4.3024504084014, "grad_norm": 0.640625, "learning_rate": 0.0001217899792950223, "loss": 0.011, "step": 9218 }, { "epoch": 4.30291715285881, "grad_norm": 0.5078125, "learning_rate": 0.00012177565816471357, "loss": 0.0051, "step": 9219 }, { "epoch": 4.30338389731622, "grad_norm": 0.55078125, "learning_rate": 0.00012176133656555078, "loss": 0.0085, "step": 9220 }, { "epoch": 4.303850641773629, "grad_norm": 0.5234375, "learning_rate": 0.00012174701449784223, "loss": 0.0069, "step": 9221 }, { "epoch": 4.304317386231038, "grad_norm": 0.4765625, "learning_rate": 0.00012173269196189636, "loss": 0.008, "step": 9222 }, { "epoch": 4.304784130688448, "grad_norm": 0.60546875, "learning_rate": 0.00012171836895802149, "loss": 0.0115, "step": 9223 }, { "epoch": 4.305250875145858, "grad_norm": 0.64453125, "learning_rate": 0.00012170404548652606, "loss": 0.0114, "step": 9224 }, { "epoch": 4.305717619603267, "grad_norm": 0.6015625, "learning_rate": 0.00012168972154771844, "loss": 0.0113, "step": 9225 }, { "epoch": 4.306184364060677, "grad_norm": 0.41015625, "learning_rate": 0.00012167539714190705, "loss": 0.0048, "step": 9226 }, { "epoch": 4.3066511085180865, "grad_norm": 0.470703125, "learning_rate": 0.00012166107226940032, "loss": 0.0058, "step": 9227 }, { "epoch": 4.307117852975496, "grad_norm": 0.234375, "learning_rate": 0.00012164674693050666, "loss": 0.0033, "step": 9228 }, { "epoch": 4.307584597432905, "grad_norm": 0.5, "learning_rate": 0.00012163242112553453, "loss": 0.0068, "step": 9229 }, { "epoch": 4.308051341890315, "grad_norm": 0.50390625, "learning_rate": 0.00012161809485479234, "loss": 0.0093, "step": 9230 }, { "epoch": 4.308518086347725, "grad_norm": 0.56640625, "learning_rate": 0.00012160376811858863, "loss": 0.0128, "step": 9231 }, { "epoch": 4.308984830805134, "grad_norm": 0.345703125, "learning_rate": 0.0001215894409172318, "loss": 0.0043, "step": 9232 }, { "epoch": 4.3094515752625435, "grad_norm": 0.58984375, "learning_rate": 0.00012157511325103033, "loss": 0.0108, "step": 9233 }, { "epoch": 4.309918319719953, "grad_norm": 0.4765625, "learning_rate": 0.00012156078512029276, "loss": 0.0095, "step": 9234 }, { "epoch": 4.310385064177363, "grad_norm": 0.61328125, "learning_rate": 0.00012154645652532757, "loss": 0.0125, "step": 9235 }, { "epoch": 4.310851808634773, "grad_norm": 0.4453125, "learning_rate": 0.00012153212746644327, "loss": 0.0113, "step": 9236 }, { "epoch": 4.311318553092182, "grad_norm": 0.486328125, "learning_rate": 0.00012151779794394835, "loss": 0.0087, "step": 9237 }, { "epoch": 4.311785297549592, "grad_norm": 0.45703125, "learning_rate": 0.00012150346795815137, "loss": 0.0091, "step": 9238 }, { "epoch": 4.312252042007001, "grad_norm": 0.369140625, "learning_rate": 0.00012148913750936089, "loss": 0.006, "step": 9239 }, { "epoch": 4.31271878646441, "grad_norm": 0.953125, "learning_rate": 0.00012147480659788542, "loss": 0.0075, "step": 9240 }, { "epoch": 4.31318553092182, "grad_norm": 0.484375, "learning_rate": 0.00012146047522403354, "loss": 0.0072, "step": 9241 }, { "epoch": 4.31365227537923, "grad_norm": 0.6015625, "learning_rate": 0.00012144614338811382, "loss": 0.0111, "step": 9242 }, { "epoch": 4.31411901983664, "grad_norm": 0.357421875, "learning_rate": 0.00012143181109043484, "loss": 0.0058, "step": 9243 }, { "epoch": 4.314585764294049, "grad_norm": 0.345703125, "learning_rate": 0.00012141747833130517, "loss": 0.0092, "step": 9244 }, { "epoch": 4.315052508751458, "grad_norm": 0.53125, "learning_rate": 0.00012140314511103344, "loss": 0.009, "step": 9245 }, { "epoch": 4.315519253208868, "grad_norm": 0.53515625, "learning_rate": 0.00012138881142992824, "loss": 0.0105, "step": 9246 }, { "epoch": 4.315985997666278, "grad_norm": 0.55078125, "learning_rate": 0.00012137447728829823, "loss": 0.0128, "step": 9247 }, { "epoch": 4.316452742123687, "grad_norm": 0.59765625, "learning_rate": 0.000121360142686452, "loss": 0.0087, "step": 9248 }, { "epoch": 4.316919486581097, "grad_norm": 0.60546875, "learning_rate": 0.00012134580762469818, "loss": 0.0092, "step": 9249 }, { "epoch": 4.3173862310385065, "grad_norm": 0.447265625, "learning_rate": 0.00012133147210334544, "loss": 0.0086, "step": 9250 }, { "epoch": 4.317852975495916, "grad_norm": 0.35546875, "learning_rate": 0.00012131713612270243, "loss": 0.0075, "step": 9251 }, { "epoch": 4.318319719953325, "grad_norm": 0.53515625, "learning_rate": 0.00012130279968307785, "loss": 0.0112, "step": 9252 }, { "epoch": 4.318786464410735, "grad_norm": 0.671875, "learning_rate": 0.00012128846278478033, "loss": 0.0176, "step": 9253 }, { "epoch": 4.319253208868145, "grad_norm": 1.0234375, "learning_rate": 0.00012127412542811861, "loss": 0.0233, "step": 9254 }, { "epoch": 4.319719953325555, "grad_norm": 0.5859375, "learning_rate": 0.00012125978761340135, "loss": 0.0091, "step": 9255 }, { "epoch": 4.3201866977829635, "grad_norm": 0.3984375, "learning_rate": 0.00012124544934093727, "loss": 0.0049, "step": 9256 }, { "epoch": 4.320653442240373, "grad_norm": 0.45703125, "learning_rate": 0.00012123111061103512, "loss": 0.0083, "step": 9257 }, { "epoch": 4.321120186697783, "grad_norm": 0.484375, "learning_rate": 0.00012121677142400359, "loss": 0.017, "step": 9258 }, { "epoch": 4.321586931155193, "grad_norm": 0.5234375, "learning_rate": 0.00012120243178015142, "loss": 0.01, "step": 9259 }, { "epoch": 4.322053675612602, "grad_norm": 0.498046875, "learning_rate": 0.00012118809167978737, "loss": 0.0111, "step": 9260 }, { "epoch": 4.322520420070012, "grad_norm": 0.408203125, "learning_rate": 0.00012117375112322021, "loss": 0.0051, "step": 9261 }, { "epoch": 4.322987164527421, "grad_norm": 0.486328125, "learning_rate": 0.0001211594101107587, "loss": 0.0118, "step": 9262 }, { "epoch": 4.323453908984831, "grad_norm": 0.61328125, "learning_rate": 0.00012114506864271158, "loss": 0.0124, "step": 9263 }, { "epoch": 4.32392065344224, "grad_norm": 0.5078125, "learning_rate": 0.0001211307267193877, "loss": 0.0085, "step": 9264 }, { "epoch": 4.32438739789965, "grad_norm": 0.5703125, "learning_rate": 0.00012111638434109584, "loss": 0.0117, "step": 9265 }, { "epoch": 4.32485414235706, "grad_norm": 0.408203125, "learning_rate": 0.00012110204150814477, "loss": 0.0105, "step": 9266 }, { "epoch": 4.3253208868144695, "grad_norm": 0.52734375, "learning_rate": 0.00012108769822084334, "loss": 0.0106, "step": 9267 }, { "epoch": 4.325787631271878, "grad_norm": 0.74609375, "learning_rate": 0.00012107335447950037, "loss": 0.0154, "step": 9268 }, { "epoch": 4.326254375729288, "grad_norm": 0.41796875, "learning_rate": 0.00012105901028442473, "loss": 0.0056, "step": 9269 }, { "epoch": 4.326721120186698, "grad_norm": 0.53515625, "learning_rate": 0.0001210446656359252, "loss": 0.0092, "step": 9270 }, { "epoch": 4.327187864644108, "grad_norm": 0.5, "learning_rate": 0.00012103032053431067, "loss": 0.0112, "step": 9271 }, { "epoch": 4.327654609101517, "grad_norm": 0.78515625, "learning_rate": 0.00012101597497988999, "loss": 0.0123, "step": 9272 }, { "epoch": 4.3281213535589265, "grad_norm": 0.58984375, "learning_rate": 0.00012100162897297207, "loss": 0.015, "step": 9273 }, { "epoch": 4.328588098016336, "grad_norm": 0.44140625, "learning_rate": 0.0001209872825138658, "loss": 0.0097, "step": 9274 }, { "epoch": 4.329054842473745, "grad_norm": 0.578125, "learning_rate": 0.00012097293560288003, "loss": 0.0127, "step": 9275 }, { "epoch": 4.329521586931155, "grad_norm": 0.60546875, "learning_rate": 0.00012095858824032367, "loss": 0.0152, "step": 9276 }, { "epoch": 4.329988331388565, "grad_norm": 0.62890625, "learning_rate": 0.00012094424042650568, "loss": 0.0155, "step": 9277 }, { "epoch": 4.3304550758459746, "grad_norm": 0.25, "learning_rate": 0.00012092989216173493, "loss": 0.0036, "step": 9278 }, { "epoch": 4.330921820303384, "grad_norm": 0.37109375, "learning_rate": 0.00012091554344632036, "loss": 0.0047, "step": 9279 }, { "epoch": 4.331388564760793, "grad_norm": 0.439453125, "learning_rate": 0.00012090119428057099, "loss": 0.0065, "step": 9280 }, { "epoch": 4.331855309218203, "grad_norm": 0.5546875, "learning_rate": 0.00012088684466479569, "loss": 0.009, "step": 9281 }, { "epoch": 4.332322053675613, "grad_norm": 0.375, "learning_rate": 0.00012087249459930343, "loss": 0.0085, "step": 9282 }, { "epoch": 4.332788798133022, "grad_norm": 0.453125, "learning_rate": 0.00012085814408440324, "loss": 0.008, "step": 9283 }, { "epoch": 4.3332555425904316, "grad_norm": 0.58203125, "learning_rate": 0.00012084379312040406, "loss": 0.0127, "step": 9284 }, { "epoch": 4.333722287047841, "grad_norm": 0.400390625, "learning_rate": 0.00012082944170761488, "loss": 0.0155, "step": 9285 }, { "epoch": 4.334189031505251, "grad_norm": 0.482421875, "learning_rate": 0.00012081508984634468, "loss": 0.0133, "step": 9286 }, { "epoch": 4.33465577596266, "grad_norm": 0.46875, "learning_rate": 0.00012080073753690253, "loss": 0.0082, "step": 9287 }, { "epoch": 4.33512252042007, "grad_norm": 0.5234375, "learning_rate": 0.00012078638477959743, "loss": 0.009, "step": 9288 }, { "epoch": 4.33558926487748, "grad_norm": 0.51953125, "learning_rate": 0.00012077203157473838, "loss": 0.0066, "step": 9289 }, { "epoch": 4.336056009334889, "grad_norm": 0.41015625, "learning_rate": 0.00012075767792263446, "loss": 0.0052, "step": 9290 }, { "epoch": 4.336522753792298, "grad_norm": 0.61328125, "learning_rate": 0.0001207433238235947, "loss": 0.0125, "step": 9291 }, { "epoch": 4.336989498249708, "grad_norm": 0.6640625, "learning_rate": 0.00012072896927792818, "loss": 0.0163, "step": 9292 }, { "epoch": 4.337456242707118, "grad_norm": 0.357421875, "learning_rate": 0.0001207146142859439, "loss": 0.0065, "step": 9293 }, { "epoch": 4.337922987164528, "grad_norm": 0.498046875, "learning_rate": 0.00012070025884795107, "loss": 0.0086, "step": 9294 }, { "epoch": 4.338389731621937, "grad_norm": 0.5078125, "learning_rate": 0.00012068590296425864, "loss": 0.0098, "step": 9295 }, { "epoch": 4.338856476079346, "grad_norm": 0.625, "learning_rate": 0.00012067154663517577, "loss": 0.008, "step": 9296 }, { "epoch": 4.339323220536756, "grad_norm": 0.287109375, "learning_rate": 0.0001206571898610116, "loss": 0.0038, "step": 9297 }, { "epoch": 4.339789964994166, "grad_norm": 0.435546875, "learning_rate": 0.00012064283264207516, "loss": 0.0073, "step": 9298 }, { "epoch": 4.340256709451575, "grad_norm": 0.53515625, "learning_rate": 0.00012062847497867566, "loss": 0.0118, "step": 9299 }, { "epoch": 4.340723453908985, "grad_norm": 0.384765625, "learning_rate": 0.00012061411687112223, "loss": 0.0074, "step": 9300 }, { "epoch": 4.3411901983663945, "grad_norm": 0.3671875, "learning_rate": 0.00012059975831972397, "loss": 0.007, "step": 9301 }, { "epoch": 4.341656942823804, "grad_norm": 0.51953125, "learning_rate": 0.00012058539932479003, "loss": 0.0089, "step": 9302 }, { "epoch": 4.342123687281213, "grad_norm": 0.375, "learning_rate": 0.00012057103988662964, "loss": 0.0056, "step": 9303 }, { "epoch": 4.342590431738623, "grad_norm": 0.55859375, "learning_rate": 0.00012055668000555194, "loss": 0.0102, "step": 9304 }, { "epoch": 4.343057176196033, "grad_norm": 0.64453125, "learning_rate": 0.00012054231968186606, "loss": 0.0052, "step": 9305 }, { "epoch": 4.343523920653443, "grad_norm": 0.6328125, "learning_rate": 0.00012052795891588129, "loss": 0.0102, "step": 9306 }, { "epoch": 4.3439906651108515, "grad_norm": 0.5859375, "learning_rate": 0.0001205135977079068, "loss": 0.0128, "step": 9307 }, { "epoch": 4.344457409568261, "grad_norm": 0.515625, "learning_rate": 0.00012049923605825178, "loss": 0.0083, "step": 9308 }, { "epoch": 4.344924154025671, "grad_norm": 0.4296875, "learning_rate": 0.00012048487396722543, "loss": 0.0063, "step": 9309 }, { "epoch": 4.345390898483081, "grad_norm": 0.421875, "learning_rate": 0.00012047051143513706, "loss": 0.0125, "step": 9310 }, { "epoch": 4.34585764294049, "grad_norm": 0.3515625, "learning_rate": 0.00012045614846229584, "loss": 0.005, "step": 9311 }, { "epoch": 4.3463243873979, "grad_norm": 0.5078125, "learning_rate": 0.00012044178504901105, "loss": 0.0072, "step": 9312 }, { "epoch": 4.346791131855309, "grad_norm": 0.73046875, "learning_rate": 0.00012042742119559194, "loss": 0.0129, "step": 9313 }, { "epoch": 4.347257876312719, "grad_norm": 0.5859375, "learning_rate": 0.00012041305690234781, "loss": 0.0143, "step": 9314 }, { "epoch": 4.347724620770128, "grad_norm": 0.498046875, "learning_rate": 0.00012039869216958788, "loss": 0.0089, "step": 9315 }, { "epoch": 4.348191365227538, "grad_norm": 0.56640625, "learning_rate": 0.00012038432699762152, "loss": 0.0103, "step": 9316 }, { "epoch": 4.348658109684948, "grad_norm": 0.443359375, "learning_rate": 0.00012036996138675796, "loss": 0.0061, "step": 9317 }, { "epoch": 4.349124854142357, "grad_norm": 0.66015625, "learning_rate": 0.00012035559533730653, "loss": 0.0091, "step": 9318 }, { "epoch": 4.349591598599766, "grad_norm": 0.345703125, "learning_rate": 0.00012034122884957657, "loss": 0.0046, "step": 9319 }, { "epoch": 4.350058343057176, "grad_norm": 0.423828125, "learning_rate": 0.00012032686192387736, "loss": 0.0068, "step": 9320 }, { "epoch": 4.350525087514586, "grad_norm": 0.5546875, "learning_rate": 0.00012031249456051825, "loss": 0.0096, "step": 9321 }, { "epoch": 4.350991831971996, "grad_norm": 0.5390625, "learning_rate": 0.0001202981267598086, "loss": 0.0072, "step": 9322 }, { "epoch": 4.351458576429405, "grad_norm": 0.25, "learning_rate": 0.0001202837585220578, "loss": 0.0043, "step": 9323 }, { "epoch": 4.3519253208868145, "grad_norm": 0.333984375, "learning_rate": 0.00012026938984757514, "loss": 0.0038, "step": 9324 }, { "epoch": 4.352392065344224, "grad_norm": 0.40625, "learning_rate": 0.00012025502073667001, "loss": 0.0069, "step": 9325 }, { "epoch": 4.352858809801633, "grad_norm": 0.65625, "learning_rate": 0.00012024065118965184, "loss": 0.0125, "step": 9326 }, { "epoch": 4.353325554259043, "grad_norm": 0.75, "learning_rate": 0.00012022628120683, "loss": 0.0127, "step": 9327 }, { "epoch": 4.353792298716453, "grad_norm": 0.5234375, "learning_rate": 0.00012021191078851384, "loss": 0.0074, "step": 9328 }, { "epoch": 4.354259043173863, "grad_norm": 0.302734375, "learning_rate": 0.00012019753993501284, "loss": 0.0038, "step": 9329 }, { "epoch": 4.3547257876312715, "grad_norm": 0.7734375, "learning_rate": 0.00012018316864663641, "loss": 0.0181, "step": 9330 }, { "epoch": 4.355192532088681, "grad_norm": 0.4375, "learning_rate": 0.00012016879692369394, "loss": 0.0078, "step": 9331 }, { "epoch": 4.355659276546091, "grad_norm": 0.4453125, "learning_rate": 0.0001201544247664949, "loss": 0.0061, "step": 9332 }, { "epoch": 4.356126021003501, "grad_norm": 0.330078125, "learning_rate": 0.00012014005217534874, "loss": 0.005, "step": 9333 }, { "epoch": 4.35659276546091, "grad_norm": 0.412109375, "learning_rate": 0.00012012567915056492, "loss": 0.0086, "step": 9334 }, { "epoch": 4.35705950991832, "grad_norm": 0.515625, "learning_rate": 0.00012011130569245288, "loss": 0.0073, "step": 9335 }, { "epoch": 4.357526254375729, "grad_norm": 0.54296875, "learning_rate": 0.00012009693180132213, "loss": 0.0067, "step": 9336 }, { "epoch": 4.357992998833139, "grad_norm": 0.51171875, "learning_rate": 0.00012008255747748214, "loss": 0.0099, "step": 9337 }, { "epoch": 4.358459743290548, "grad_norm": 0.349609375, "learning_rate": 0.00012006818272124238, "loss": 0.0051, "step": 9338 }, { "epoch": 4.358926487747958, "grad_norm": 0.58984375, "learning_rate": 0.0001200538075329124, "loss": 0.012, "step": 9339 }, { "epoch": 4.359393232205368, "grad_norm": 0.29296875, "learning_rate": 0.00012003943191280171, "loss": 0.0036, "step": 9340 }, { "epoch": 4.3598599766627775, "grad_norm": 0.6953125, "learning_rate": 0.00012002505586121977, "loss": 0.0159, "step": 9341 }, { "epoch": 4.360326721120186, "grad_norm": 0.5234375, "learning_rate": 0.00012001067937847623, "loss": 0.0078, "step": 9342 }, { "epoch": 4.360793465577596, "grad_norm": 0.421875, "learning_rate": 0.00011999630246488051, "loss": 0.0142, "step": 9343 }, { "epoch": 4.361260210035006, "grad_norm": 0.416015625, "learning_rate": 0.00011998192512074222, "loss": 0.0043, "step": 9344 }, { "epoch": 4.361726954492416, "grad_norm": 0.435546875, "learning_rate": 0.00011996754734637092, "loss": 0.0074, "step": 9345 }, { "epoch": 4.362193698949825, "grad_norm": 0.337890625, "learning_rate": 0.00011995316914207617, "loss": 0.0068, "step": 9346 }, { "epoch": 4.3626604434072345, "grad_norm": 0.56640625, "learning_rate": 0.00011993879050816751, "loss": 0.0203, "step": 9347 }, { "epoch": 4.363127187864644, "grad_norm": 0.400390625, "learning_rate": 0.0001199244114449546, "loss": 0.0065, "step": 9348 }, { "epoch": 4.363593932322054, "grad_norm": 0.5625, "learning_rate": 0.00011991003195274704, "loss": 0.0097, "step": 9349 }, { "epoch": 4.364060676779463, "grad_norm": 0.56640625, "learning_rate": 0.00011989565203185437, "loss": 0.0105, "step": 9350 }, { "epoch": 4.364527421236873, "grad_norm": 0.60546875, "learning_rate": 0.00011988127168258623, "loss": 0.0089, "step": 9351 }, { "epoch": 4.364994165694283, "grad_norm": 0.56640625, "learning_rate": 0.00011986689090525225, "loss": 0.0076, "step": 9352 }, { "epoch": 4.365460910151692, "grad_norm": 0.29296875, "learning_rate": 0.00011985250970016209, "loss": 0.0049, "step": 9353 }, { "epoch": 4.365927654609101, "grad_norm": 0.490234375, "learning_rate": 0.00011983812806762534, "loss": 0.0102, "step": 9354 }, { "epoch": 4.366394399066511, "grad_norm": 0.40234375, "learning_rate": 0.0001198237460079517, "loss": 0.0118, "step": 9355 }, { "epoch": 4.366861143523921, "grad_norm": 0.4609375, "learning_rate": 0.0001198093635214508, "loss": 0.0076, "step": 9356 }, { "epoch": 4.367327887981331, "grad_norm": 0.63671875, "learning_rate": 0.00011979498060843234, "loss": 0.0137, "step": 9357 }, { "epoch": 4.3677946324387396, "grad_norm": 0.40234375, "learning_rate": 0.00011978059726920596, "loss": 0.0075, "step": 9358 }, { "epoch": 4.368261376896149, "grad_norm": 0.30078125, "learning_rate": 0.00011976621350408142, "loss": 0.0048, "step": 9359 }, { "epoch": 4.368728121353559, "grad_norm": 0.478515625, "learning_rate": 0.00011975182931336834, "loss": 0.0135, "step": 9360 }, { "epoch": 4.369194865810968, "grad_norm": 0.447265625, "learning_rate": 0.00011973744469737644, "loss": 0.0079, "step": 9361 }, { "epoch": 4.369661610268378, "grad_norm": 0.57421875, "learning_rate": 0.00011972305965641549, "loss": 0.011, "step": 9362 }, { "epoch": 4.370128354725788, "grad_norm": 0.408203125, "learning_rate": 0.00011970867419079519, "loss": 0.0076, "step": 9363 }, { "epoch": 4.370595099183197, "grad_norm": 0.462890625, "learning_rate": 0.00011969428830082523, "loss": 0.0085, "step": 9364 }, { "epoch": 4.371061843640607, "grad_norm": 0.61328125, "learning_rate": 0.00011967990198681544, "loss": 0.013, "step": 9365 }, { "epoch": 4.371528588098016, "grad_norm": 0.42578125, "learning_rate": 0.0001196655152490755, "loss": 0.007, "step": 9366 }, { "epoch": 4.371995332555426, "grad_norm": 0.4453125, "learning_rate": 0.00011965112808791519, "loss": 0.0064, "step": 9367 }, { "epoch": 4.372462077012836, "grad_norm": 0.40234375, "learning_rate": 0.00011963674050364429, "loss": 0.0081, "step": 9368 }, { "epoch": 4.372928821470245, "grad_norm": 0.478515625, "learning_rate": 0.00011962235249657258, "loss": 0.0061, "step": 9369 }, { "epoch": 4.373395565927654, "grad_norm": 0.408203125, "learning_rate": 0.00011960796406700985, "loss": 0.0074, "step": 9370 }, { "epoch": 4.373862310385064, "grad_norm": 0.443359375, "learning_rate": 0.00011959357521526591, "loss": 0.0067, "step": 9371 }, { "epoch": 4.374329054842474, "grad_norm": 0.392578125, "learning_rate": 0.00011957918594165055, "loss": 0.0074, "step": 9372 }, { "epoch": 4.374795799299883, "grad_norm": 0.421875, "learning_rate": 0.00011956479624647359, "loss": 0.007, "step": 9373 }, { "epoch": 4.375262543757293, "grad_norm": 0.3984375, "learning_rate": 0.00011955040613004484, "loss": 0.0135, "step": 9374 }, { "epoch": 4.3757292882147025, "grad_norm": 0.52734375, "learning_rate": 0.00011953601559267418, "loss": 0.0096, "step": 9375 }, { "epoch": 4.376196032672112, "grad_norm": 0.400390625, "learning_rate": 0.00011952162463467144, "loss": 0.0056, "step": 9376 }, { "epoch": 4.376662777129521, "grad_norm": 0.474609375, "learning_rate": 0.00011950723325634643, "loss": 0.0071, "step": 9377 }, { "epoch": 4.377129521586931, "grad_norm": 0.6875, "learning_rate": 0.00011949284145800905, "loss": 0.01, "step": 9378 }, { "epoch": 4.377596266044341, "grad_norm": 0.65625, "learning_rate": 0.00011947844923996918, "loss": 0.0176, "step": 9379 }, { "epoch": 4.378063010501751, "grad_norm": 0.609375, "learning_rate": 0.00011946405660253668, "loss": 0.0184, "step": 9380 }, { "epoch": 4.3785297549591595, "grad_norm": 0.404296875, "learning_rate": 0.00011944966354602145, "loss": 0.0091, "step": 9381 }, { "epoch": 4.378996499416569, "grad_norm": 0.328125, "learning_rate": 0.00011943527007073338, "loss": 0.0038, "step": 9382 }, { "epoch": 4.379463243873979, "grad_norm": 0.25, "learning_rate": 0.00011942087617698239, "loss": 0.0036, "step": 9383 }, { "epoch": 4.379929988331389, "grad_norm": 0.5234375, "learning_rate": 0.00011940648186507837, "loss": 0.0129, "step": 9384 }, { "epoch": 4.380396732788798, "grad_norm": 0.5234375, "learning_rate": 0.00011939208713533127, "loss": 0.0115, "step": 9385 }, { "epoch": 4.380863477246208, "grad_norm": 0.515625, "learning_rate": 0.00011937769198805104, "loss": 0.0108, "step": 9386 }, { "epoch": 4.381330221703617, "grad_norm": 0.404296875, "learning_rate": 0.00011936329642354755, "loss": 0.0062, "step": 9387 }, { "epoch": 4.381796966161027, "grad_norm": 0.5625, "learning_rate": 0.00011934890044213085, "loss": 0.0197, "step": 9388 }, { "epoch": 4.382263710618436, "grad_norm": 0.4375, "learning_rate": 0.00011933450404411089, "loss": 0.0074, "step": 9389 }, { "epoch": 4.382730455075846, "grad_norm": 0.58203125, "learning_rate": 0.00011932010722979754, "loss": 0.01, "step": 9390 }, { "epoch": 4.383197199533256, "grad_norm": 0.2109375, "learning_rate": 0.00011930570999950085, "loss": 0.0024, "step": 9391 }, { "epoch": 4.3836639439906655, "grad_norm": 0.44140625, "learning_rate": 0.00011929131235353084, "loss": 0.0103, "step": 9392 }, { "epoch": 4.384130688448074, "grad_norm": 0.41796875, "learning_rate": 0.00011927691429219743, "loss": 0.0067, "step": 9393 }, { "epoch": 4.384597432905484, "grad_norm": 0.333984375, "learning_rate": 0.00011926251581581069, "loss": 0.0046, "step": 9394 }, { "epoch": 4.385064177362894, "grad_norm": 0.53125, "learning_rate": 0.00011924811692468061, "loss": 0.0081, "step": 9395 }, { "epoch": 4.385530921820304, "grad_norm": 0.294921875, "learning_rate": 0.00011923371761911721, "loss": 0.0033, "step": 9396 }, { "epoch": 4.385997666277713, "grad_norm": 0.50390625, "learning_rate": 0.00011921931789943053, "loss": 0.0098, "step": 9397 }, { "epoch": 4.3864644107351225, "grad_norm": 0.392578125, "learning_rate": 0.00011920491776593064, "loss": 0.0049, "step": 9398 }, { "epoch": 4.386931155192532, "grad_norm": 0.466796875, "learning_rate": 0.00011919051721892753, "loss": 0.0099, "step": 9399 }, { "epoch": 4.387397899649942, "grad_norm": 0.53125, "learning_rate": 0.00011917611625873128, "loss": 0.0095, "step": 9400 }, { "epoch": 4.387864644107351, "grad_norm": 0.44921875, "learning_rate": 0.000119161714885652, "loss": 0.0079, "step": 9401 }, { "epoch": 4.388331388564761, "grad_norm": 0.337890625, "learning_rate": 0.00011914731309999973, "loss": 0.0083, "step": 9402 }, { "epoch": 4.388798133022171, "grad_norm": 0.40234375, "learning_rate": 0.00011913291090208457, "loss": 0.0055, "step": 9403 }, { "epoch": 4.3892648774795795, "grad_norm": 0.5078125, "learning_rate": 0.0001191185082922166, "loss": 0.0074, "step": 9404 }, { "epoch": 4.389731621936989, "grad_norm": 0.50390625, "learning_rate": 0.00011910410527070595, "loss": 0.0127, "step": 9405 }, { "epoch": 4.390198366394399, "grad_norm": 0.32421875, "learning_rate": 0.00011908970183786267, "loss": 0.0124, "step": 9406 }, { "epoch": 4.390665110851809, "grad_norm": 0.5546875, "learning_rate": 0.00011907529799399697, "loss": 0.0081, "step": 9407 }, { "epoch": 4.391131855309218, "grad_norm": 0.388671875, "learning_rate": 0.00011906089373941895, "loss": 0.0126, "step": 9408 }, { "epoch": 4.391598599766628, "grad_norm": 0.69140625, "learning_rate": 0.00011904648907443871, "loss": 0.0201, "step": 9409 }, { "epoch": 4.392065344224037, "grad_norm": 0.48828125, "learning_rate": 0.00011903208399936644, "loss": 0.0062, "step": 9410 }, { "epoch": 4.392532088681447, "grad_norm": 0.67578125, "learning_rate": 0.00011901767851451229, "loss": 0.0129, "step": 9411 }, { "epoch": 4.392998833138856, "grad_norm": 0.6875, "learning_rate": 0.00011900327262018642, "loss": 0.0143, "step": 9412 }, { "epoch": 4.393465577596266, "grad_norm": 0.482421875, "learning_rate": 0.00011898886631669901, "loss": 0.0087, "step": 9413 }, { "epoch": 4.393932322053676, "grad_norm": 0.37109375, "learning_rate": 0.00011897445960436024, "loss": 0.0069, "step": 9414 }, { "epoch": 4.3943990665110855, "grad_norm": 0.412109375, "learning_rate": 0.0001189600524834803, "loss": 0.009, "step": 9415 }, { "epoch": 4.394865810968494, "grad_norm": 0.4921875, "learning_rate": 0.00011894564495436938, "loss": 0.0075, "step": 9416 }, { "epoch": 4.395332555425904, "grad_norm": 0.220703125, "learning_rate": 0.00011893123701733773, "loss": 0.0036, "step": 9417 }, { "epoch": 4.395799299883314, "grad_norm": 0.419921875, "learning_rate": 0.00011891682867269551, "loss": 0.0083, "step": 9418 }, { "epoch": 4.396266044340724, "grad_norm": 0.271484375, "learning_rate": 0.00011890241992075299, "loss": 0.0044, "step": 9419 }, { "epoch": 4.396732788798133, "grad_norm": 0.49609375, "learning_rate": 0.00011888801076182041, "loss": 0.0131, "step": 9420 }, { "epoch": 4.3971995332555425, "grad_norm": 0.53125, "learning_rate": 0.00011887360119620798, "loss": 0.007, "step": 9421 }, { "epoch": 4.397666277712952, "grad_norm": 0.330078125, "learning_rate": 0.00011885919122422602, "loss": 0.0054, "step": 9422 }, { "epoch": 4.398133022170362, "grad_norm": 0.41015625, "learning_rate": 0.00011884478084618468, "loss": 0.0077, "step": 9423 }, { "epoch": 4.398599766627771, "grad_norm": 0.48046875, "learning_rate": 0.00011883037006239434, "loss": 0.0096, "step": 9424 }, { "epoch": 4.399066511085181, "grad_norm": 0.36328125, "learning_rate": 0.00011881595887316526, "loss": 0.0061, "step": 9425 }, { "epoch": 4.399533255542591, "grad_norm": 0.453125, "learning_rate": 0.00011880154727880766, "loss": 0.0107, "step": 9426 }, { "epoch": 4.4, "grad_norm": 0.310546875, "learning_rate": 0.00011878713527963192, "loss": 0.0037, "step": 9427 }, { "epoch": 4.400466744457409, "grad_norm": 0.59375, "learning_rate": 0.00011877272287594832, "loss": 0.008, "step": 9428 }, { "epoch": 4.400933488914819, "grad_norm": 0.3125, "learning_rate": 0.00011875831006806712, "loss": 0.0063, "step": 9429 }, { "epoch": 4.401400233372229, "grad_norm": 0.53125, "learning_rate": 0.00011874389685629874, "loss": 0.0137, "step": 9430 }, { "epoch": 4.401866977829639, "grad_norm": 0.54296875, "learning_rate": 0.00011872948324095346, "loss": 0.0128, "step": 9431 }, { "epoch": 4.402333722287048, "grad_norm": 0.6328125, "learning_rate": 0.00011871506922234161, "loss": 0.0079, "step": 9432 }, { "epoch": 4.402800466744457, "grad_norm": 0.54296875, "learning_rate": 0.00011870065480077355, "loss": 0.0105, "step": 9433 }, { "epoch": 4.403267211201867, "grad_norm": 0.62109375, "learning_rate": 0.00011868623997655966, "loss": 0.0113, "step": 9434 }, { "epoch": 4.403733955659277, "grad_norm": 0.306640625, "learning_rate": 0.00011867182475001026, "loss": 0.0045, "step": 9435 }, { "epoch": 4.404200700116686, "grad_norm": 0.4296875, "learning_rate": 0.00011865740912143577, "loss": 0.0067, "step": 9436 }, { "epoch": 4.404667444574096, "grad_norm": 0.482421875, "learning_rate": 0.00011864299309114661, "loss": 0.0054, "step": 9437 }, { "epoch": 4.405134189031505, "grad_norm": 0.48046875, "learning_rate": 0.00011862857665945309, "loss": 0.009, "step": 9438 }, { "epoch": 4.405600933488914, "grad_norm": 0.2275390625, "learning_rate": 0.00011861415982666561, "loss": 0.0023, "step": 9439 }, { "epoch": 4.406067677946324, "grad_norm": 0.478515625, "learning_rate": 0.00011859974259309465, "loss": 0.0089, "step": 9440 }, { "epoch": 4.406534422403734, "grad_norm": 0.4296875, "learning_rate": 0.0001185853249590506, "loss": 0.006, "step": 9441 }, { "epoch": 4.407001166861144, "grad_norm": 0.36328125, "learning_rate": 0.00011857090692484384, "loss": 0.0171, "step": 9442 }, { "epoch": 4.4074679113185535, "grad_norm": 0.79296875, "learning_rate": 0.00011855648849078491, "loss": 0.0127, "step": 9443 }, { "epoch": 4.407934655775962, "grad_norm": 0.45703125, "learning_rate": 0.00011854206965718417, "loss": 0.007, "step": 9444 }, { "epoch": 4.408401400233372, "grad_norm": 0.498046875, "learning_rate": 0.00011852765042435207, "loss": 0.01, "step": 9445 }, { "epoch": 4.408868144690782, "grad_norm": 0.625, "learning_rate": 0.00011851323079259912, "loss": 0.0118, "step": 9446 }, { "epoch": 4.409334889148191, "grad_norm": 0.478515625, "learning_rate": 0.00011849881076223579, "loss": 0.0073, "step": 9447 }, { "epoch": 4.409801633605601, "grad_norm": 0.5703125, "learning_rate": 0.00011848439033357252, "loss": 0.0065, "step": 9448 }, { "epoch": 4.4102683780630105, "grad_norm": 0.333984375, "learning_rate": 0.00011846996950691982, "loss": 0.0035, "step": 9449 }, { "epoch": 4.41073512252042, "grad_norm": 0.443359375, "learning_rate": 0.00011845554828258818, "loss": 0.0049, "step": 9450 }, { "epoch": 4.411201866977829, "grad_norm": 0.349609375, "learning_rate": 0.00011844112666088812, "loss": 0.0047, "step": 9451 }, { "epoch": 4.411668611435239, "grad_norm": 0.404296875, "learning_rate": 0.00011842670464213012, "loss": 0.0066, "step": 9452 }, { "epoch": 4.412135355892649, "grad_norm": 0.5859375, "learning_rate": 0.00011841228222662475, "loss": 0.0066, "step": 9453 }, { "epoch": 4.412602100350059, "grad_norm": 0.75390625, "learning_rate": 0.00011839785941468251, "loss": 0.0133, "step": 9454 }, { "epoch": 4.4130688448074675, "grad_norm": 0.87890625, "learning_rate": 0.00011838343620661392, "loss": 0.0101, "step": 9455 }, { "epoch": 4.413535589264877, "grad_norm": 0.5546875, "learning_rate": 0.00011836901260272957, "loss": 0.0108, "step": 9456 }, { "epoch": 4.414002333722287, "grad_norm": 0.421875, "learning_rate": 0.00011835458860334002, "loss": 0.0068, "step": 9457 }, { "epoch": 4.414469078179697, "grad_norm": 0.3828125, "learning_rate": 0.00011834016420875577, "loss": 0.0091, "step": 9458 }, { "epoch": 4.414935822637106, "grad_norm": 0.421875, "learning_rate": 0.00011832573941928747, "loss": 0.0088, "step": 9459 }, { "epoch": 4.415402567094516, "grad_norm": 0.26953125, "learning_rate": 0.00011831131423524564, "loss": 0.0052, "step": 9460 }, { "epoch": 4.415869311551925, "grad_norm": 0.515625, "learning_rate": 0.00011829688865694093, "loss": 0.0119, "step": 9461 }, { "epoch": 4.416336056009335, "grad_norm": 0.38671875, "learning_rate": 0.00011828246268468388, "loss": 0.0052, "step": 9462 }, { "epoch": 4.416802800466744, "grad_norm": 0.6328125, "learning_rate": 0.00011826803631878514, "loss": 0.0107, "step": 9463 }, { "epoch": 4.417269544924154, "grad_norm": 0.3828125, "learning_rate": 0.00011825360955955532, "loss": 0.0076, "step": 9464 }, { "epoch": 4.417736289381564, "grad_norm": 0.322265625, "learning_rate": 0.00011823918240730498, "loss": 0.0044, "step": 9465 }, { "epoch": 4.4182030338389735, "grad_norm": 0.44921875, "learning_rate": 0.00011822475486234486, "loss": 0.0039, "step": 9466 }, { "epoch": 4.418669778296382, "grad_norm": 0.59375, "learning_rate": 0.00011821032692498553, "loss": 0.0145, "step": 9467 }, { "epoch": 4.419136522753792, "grad_norm": 0.38671875, "learning_rate": 0.00011819589859553764, "loss": 0.0092, "step": 9468 }, { "epoch": 4.419603267211202, "grad_norm": 0.2392578125, "learning_rate": 0.00011818146987431189, "loss": 0.005, "step": 9469 }, { "epoch": 4.420070011668612, "grad_norm": 0.48828125, "learning_rate": 0.0001181670407616189, "loss": 0.0107, "step": 9470 }, { "epoch": 4.420536756126021, "grad_norm": 0.5078125, "learning_rate": 0.00011815261125776938, "loss": 0.0049, "step": 9471 }, { "epoch": 4.4210035005834305, "grad_norm": 0.412109375, "learning_rate": 0.00011813818136307397, "loss": 0.0146, "step": 9472 }, { "epoch": 4.42147024504084, "grad_norm": 0.466796875, "learning_rate": 0.00011812375107784344, "loss": 0.0094, "step": 9473 }, { "epoch": 4.42193698949825, "grad_norm": 0.58984375, "learning_rate": 0.00011810932040238842, "loss": 0.0108, "step": 9474 }, { "epoch": 4.422403733955659, "grad_norm": 0.2392578125, "learning_rate": 0.00011809488933701962, "loss": 0.0034, "step": 9475 }, { "epoch": 4.422870478413069, "grad_norm": 0.455078125, "learning_rate": 0.0001180804578820478, "loss": 0.0055, "step": 9476 }, { "epoch": 4.423337222870479, "grad_norm": 0.5625, "learning_rate": 0.00011806602603778365, "loss": 0.0109, "step": 9477 }, { "epoch": 4.423803967327888, "grad_norm": 0.455078125, "learning_rate": 0.0001180515938045379, "loss": 0.013, "step": 9478 }, { "epoch": 4.424270711785297, "grad_norm": 0.412109375, "learning_rate": 0.00011803716118262134, "loss": 0.0111, "step": 9479 }, { "epoch": 4.424737456242707, "grad_norm": 0.3671875, "learning_rate": 0.00011802272817234468, "loss": 0.0078, "step": 9480 }, { "epoch": 4.425204200700117, "grad_norm": 0.443359375, "learning_rate": 0.00011800829477401867, "loss": 0.0091, "step": 9481 }, { "epoch": 4.425670945157526, "grad_norm": 0.796875, "learning_rate": 0.00011799386098795409, "loss": 0.0133, "step": 9482 }, { "epoch": 4.426137689614936, "grad_norm": 0.6796875, "learning_rate": 0.00011797942681446172, "loss": 0.0097, "step": 9483 }, { "epoch": 4.426604434072345, "grad_norm": 0.5234375, "learning_rate": 0.00011796499225385236, "loss": 0.0096, "step": 9484 }, { "epoch": 4.427071178529755, "grad_norm": 0.451171875, "learning_rate": 0.00011795055730643678, "loss": 0.0059, "step": 9485 }, { "epoch": 4.427537922987165, "grad_norm": 0.484375, "learning_rate": 0.00011793612197252579, "loss": 0.0105, "step": 9486 }, { "epoch": 4.428004667444574, "grad_norm": 0.384765625, "learning_rate": 0.00011792168625243019, "loss": 0.0039, "step": 9487 }, { "epoch": 4.428471411901984, "grad_norm": 0.74609375, "learning_rate": 0.00011790725014646078, "loss": 0.0149, "step": 9488 }, { "epoch": 4.4289381563593935, "grad_norm": 0.4609375, "learning_rate": 0.00011789281365492842, "loss": 0.0082, "step": 9489 }, { "epoch": 4.429404900816802, "grad_norm": 0.48046875, "learning_rate": 0.00011787837677814393, "loss": 0.0064, "step": 9490 }, { "epoch": 4.429871645274212, "grad_norm": 0.404296875, "learning_rate": 0.00011786393951641815, "loss": 0.0147, "step": 9491 }, { "epoch": 4.430338389731622, "grad_norm": 0.2431640625, "learning_rate": 0.00011784950187006192, "loss": 0.0042, "step": 9492 }, { "epoch": 4.430805134189032, "grad_norm": 0.326171875, "learning_rate": 0.00011783506383938611, "loss": 0.0052, "step": 9493 }, { "epoch": 4.431271878646441, "grad_norm": 0.64453125, "learning_rate": 0.0001178206254247016, "loss": 0.0088, "step": 9494 }, { "epoch": 4.4317386231038505, "grad_norm": 0.3671875, "learning_rate": 0.00011780618662631926, "loss": 0.0071, "step": 9495 }, { "epoch": 4.43220536756126, "grad_norm": 0.53125, "learning_rate": 0.00011779174744454998, "loss": 0.0085, "step": 9496 }, { "epoch": 4.43267211201867, "grad_norm": 0.53515625, "learning_rate": 0.00011777730787970461, "loss": 0.0095, "step": 9497 }, { "epoch": 4.433138856476079, "grad_norm": 0.50390625, "learning_rate": 0.00011776286793209406, "loss": 0.0154, "step": 9498 }, { "epoch": 4.433605600933489, "grad_norm": 0.52734375, "learning_rate": 0.00011774842760202926, "loss": 0.013, "step": 9499 }, { "epoch": 4.434072345390899, "grad_norm": 0.5703125, "learning_rate": 0.00011773398688982116, "loss": 0.013, "step": 9500 }, { "epoch": 4.434539089848308, "grad_norm": 0.64453125, "learning_rate": 0.00011771954579578062, "loss": 0.0151, "step": 9501 }, { "epoch": 4.435005834305717, "grad_norm": 0.7578125, "learning_rate": 0.0001177051043202186, "loss": 0.0091, "step": 9502 }, { "epoch": 4.435472578763127, "grad_norm": 0.42578125, "learning_rate": 0.00011769066246344604, "loss": 0.011, "step": 9503 }, { "epoch": 4.435939323220537, "grad_norm": 0.375, "learning_rate": 0.00011767622022577389, "loss": 0.0118, "step": 9504 }, { "epoch": 4.436406067677947, "grad_norm": 0.419921875, "learning_rate": 0.00011766177760751311, "loss": 0.0067, "step": 9505 }, { "epoch": 4.436872812135356, "grad_norm": 0.486328125, "learning_rate": 0.00011764733460897466, "loss": 0.0103, "step": 9506 }, { "epoch": 4.437339556592765, "grad_norm": 0.384765625, "learning_rate": 0.00011763289123046953, "loss": 0.0097, "step": 9507 }, { "epoch": 4.437806301050175, "grad_norm": 0.388671875, "learning_rate": 0.00011761844747230866, "loss": 0.0064, "step": 9508 }, { "epoch": 4.438273045507585, "grad_norm": 0.33203125, "learning_rate": 0.0001176040033348031, "loss": 0.0078, "step": 9509 }, { "epoch": 4.438739789964994, "grad_norm": 0.50390625, "learning_rate": 0.00011758955881826379, "loss": 0.0105, "step": 9510 }, { "epoch": 4.439206534422404, "grad_norm": 0.51953125, "learning_rate": 0.00011757511392300178, "loss": 0.0082, "step": 9511 }, { "epoch": 4.4396732788798134, "grad_norm": 0.59765625, "learning_rate": 0.00011756066864932806, "loss": 0.0153, "step": 9512 }, { "epoch": 4.440140023337223, "grad_norm": 0.8671875, "learning_rate": 0.00011754622299755367, "loss": 0.012, "step": 9513 }, { "epoch": 4.440606767794632, "grad_norm": 0.2255859375, "learning_rate": 0.0001175317769679896, "loss": 0.0041, "step": 9514 }, { "epoch": 4.441073512252042, "grad_norm": 0.51171875, "learning_rate": 0.00011751733056094693, "loss": 0.0089, "step": 9515 }, { "epoch": 4.441540256709452, "grad_norm": 0.388671875, "learning_rate": 0.00011750288377673673, "loss": 0.007, "step": 9516 }, { "epoch": 4.4420070011668615, "grad_norm": 0.49609375, "learning_rate": 0.00011748843661566997, "loss": 0.0125, "step": 9517 }, { "epoch": 4.44247374562427, "grad_norm": 0.494140625, "learning_rate": 0.0001174739890780578, "loss": 0.0111, "step": 9518 }, { "epoch": 4.44294049008168, "grad_norm": 0.384765625, "learning_rate": 0.00011745954116421125, "loss": 0.0063, "step": 9519 }, { "epoch": 4.44340723453909, "grad_norm": 1.140625, "learning_rate": 0.00011744509287444141, "loss": 0.0316, "step": 9520 }, { "epoch": 4.4438739789965, "grad_norm": 0.443359375, "learning_rate": 0.00011743064420905935, "loss": 0.0063, "step": 9521 }, { "epoch": 4.444340723453909, "grad_norm": 0.4296875, "learning_rate": 0.00011741619516837621, "loss": 0.0084, "step": 9522 }, { "epoch": 4.4448074679113185, "grad_norm": 0.58984375, "learning_rate": 0.00011740174575270306, "loss": 0.0113, "step": 9523 }, { "epoch": 4.445274212368728, "grad_norm": 0.400390625, "learning_rate": 0.00011738729596235099, "loss": 0.0061, "step": 9524 }, { "epoch": 4.445740956826137, "grad_norm": 0.49609375, "learning_rate": 0.00011737284579763117, "loss": 0.0096, "step": 9525 }, { "epoch": 4.446207701283547, "grad_norm": 0.4375, "learning_rate": 0.00011735839525885471, "loss": 0.009, "step": 9526 }, { "epoch": 4.446674445740957, "grad_norm": 0.54296875, "learning_rate": 0.00011734394434633272, "loss": 0.0065, "step": 9527 }, { "epoch": 4.447141190198367, "grad_norm": 0.38671875, "learning_rate": 0.00011732949306037636, "loss": 0.0158, "step": 9528 }, { "epoch": 4.447607934655776, "grad_norm": 0.46875, "learning_rate": 0.00011731504140129682, "loss": 0.0077, "step": 9529 }, { "epoch": 4.448074679113185, "grad_norm": 0.68359375, "learning_rate": 0.0001173005893694052, "loss": 0.0098, "step": 9530 }, { "epoch": 4.448541423570595, "grad_norm": 0.33984375, "learning_rate": 0.00011728613696501267, "loss": 0.0061, "step": 9531 }, { "epoch": 4.449008168028005, "grad_norm": 0.484375, "learning_rate": 0.00011727168418843049, "loss": 0.0095, "step": 9532 }, { "epoch": 4.449474912485414, "grad_norm": 0.33203125, "learning_rate": 0.00011725723103996975, "loss": 0.0069, "step": 9533 }, { "epoch": 4.449941656942824, "grad_norm": 0.61328125, "learning_rate": 0.0001172427775199417, "loss": 0.0105, "step": 9534 }, { "epoch": 4.450408401400233, "grad_norm": 0.35546875, "learning_rate": 0.00011722832362865749, "loss": 0.0051, "step": 9535 }, { "epoch": 4.450875145857643, "grad_norm": 0.71875, "learning_rate": 0.00011721386936642839, "loss": 0.0112, "step": 9536 }, { "epoch": 4.451341890315052, "grad_norm": 0.400390625, "learning_rate": 0.00011719941473356557, "loss": 0.0085, "step": 9537 }, { "epoch": 4.451808634772462, "grad_norm": 0.498046875, "learning_rate": 0.00011718495973038026, "loss": 0.0118, "step": 9538 }, { "epoch": 4.452275379229872, "grad_norm": 0.482421875, "learning_rate": 0.00011717050435718371, "loss": 0.0074, "step": 9539 }, { "epoch": 4.4527421236872815, "grad_norm": 0.58203125, "learning_rate": 0.00011715604861428713, "loss": 0.0074, "step": 9540 }, { "epoch": 4.45320886814469, "grad_norm": 0.484375, "learning_rate": 0.0001171415925020018, "loss": 0.0142, "step": 9541 }, { "epoch": 4.4536756126021, "grad_norm": 0.40234375, "learning_rate": 0.00011712713602063898, "loss": 0.007, "step": 9542 }, { "epoch": 4.45414235705951, "grad_norm": 0.384765625, "learning_rate": 0.0001171126791705099, "loss": 0.0067, "step": 9543 }, { "epoch": 4.45460910151692, "grad_norm": 0.5546875, "learning_rate": 0.00011709822195192586, "loss": 0.0114, "step": 9544 }, { "epoch": 4.455075845974329, "grad_norm": 0.5859375, "learning_rate": 0.00011708376436519812, "loss": 0.0087, "step": 9545 }, { "epoch": 4.4555425904317385, "grad_norm": 0.494140625, "learning_rate": 0.000117069306410638, "loss": 0.0072, "step": 9546 }, { "epoch": 4.456009334889148, "grad_norm": 0.58984375, "learning_rate": 0.00011705484808855676, "loss": 0.0109, "step": 9547 }, { "epoch": 4.456476079346558, "grad_norm": 0.3359375, "learning_rate": 0.00011704038939926571, "loss": 0.0074, "step": 9548 }, { "epoch": 4.456942823803967, "grad_norm": 0.6171875, "learning_rate": 0.00011702593034307619, "loss": 0.0186, "step": 9549 }, { "epoch": 4.457409568261377, "grad_norm": 0.376953125, "learning_rate": 0.00011701147092029947, "loss": 0.0054, "step": 9550 }, { "epoch": 4.457876312718787, "grad_norm": 0.5234375, "learning_rate": 0.00011699701113124693, "loss": 0.0104, "step": 9551 }, { "epoch": 4.458343057176196, "grad_norm": 0.5703125, "learning_rate": 0.00011698255097622988, "loss": 0.0099, "step": 9552 }, { "epoch": 4.458809801633605, "grad_norm": 0.52734375, "learning_rate": 0.00011696809045555965, "loss": 0.0113, "step": 9553 }, { "epoch": 4.459276546091015, "grad_norm": 0.58984375, "learning_rate": 0.00011695362956954761, "loss": 0.0108, "step": 9554 }, { "epoch": 4.459743290548425, "grad_norm": 0.859375, "learning_rate": 0.00011693916831850513, "loss": 0.0131, "step": 9555 }, { "epoch": 4.460210035005835, "grad_norm": 0.337890625, "learning_rate": 0.00011692470670274356, "loss": 0.008, "step": 9556 }, { "epoch": 4.460676779463244, "grad_norm": 0.404296875, "learning_rate": 0.00011691024472257428, "loss": 0.0081, "step": 9557 }, { "epoch": 4.461143523920653, "grad_norm": 0.54296875, "learning_rate": 0.00011689578237830868, "loss": 0.0141, "step": 9558 }, { "epoch": 4.461610268378063, "grad_norm": 0.3984375, "learning_rate": 0.00011688131967025809, "loss": 0.0074, "step": 9559 }, { "epoch": 4.462077012835473, "grad_norm": 0.271484375, "learning_rate": 0.000116866856598734, "loss": 0.0035, "step": 9560 }, { "epoch": 4.462543757292882, "grad_norm": 0.39453125, "learning_rate": 0.00011685239316404774, "loss": 0.0084, "step": 9561 }, { "epoch": 4.463010501750292, "grad_norm": 0.310546875, "learning_rate": 0.00011683792936651078, "loss": 0.0045, "step": 9562 }, { "epoch": 4.4634772462077015, "grad_norm": 0.392578125, "learning_rate": 0.00011682346520643451, "loss": 0.0077, "step": 9563 }, { "epoch": 4.463943990665111, "grad_norm": 0.5625, "learning_rate": 0.00011680900068413039, "loss": 0.0073, "step": 9564 }, { "epoch": 4.46441073512252, "grad_norm": 0.443359375, "learning_rate": 0.00011679453579990982, "loss": 0.009, "step": 9565 }, { "epoch": 4.46487747957993, "grad_norm": 0.255859375, "learning_rate": 0.00011678007055408424, "loss": 0.0029, "step": 9566 }, { "epoch": 4.46534422403734, "grad_norm": 0.416015625, "learning_rate": 0.00011676560494696513, "loss": 0.0078, "step": 9567 }, { "epoch": 4.465810968494749, "grad_norm": 0.5078125, "learning_rate": 0.00011675113897886399, "loss": 0.0095, "step": 9568 }, { "epoch": 4.4662777129521585, "grad_norm": 0.515625, "learning_rate": 0.00011673667265009218, "loss": 0.0108, "step": 9569 }, { "epoch": 4.466744457409568, "grad_norm": 0.369140625, "learning_rate": 0.00011672220596096126, "loss": 0.0062, "step": 9570 }, { "epoch": 4.467211201866978, "grad_norm": 0.69921875, "learning_rate": 0.0001167077389117827, "loss": 0.0182, "step": 9571 }, { "epoch": 4.467677946324388, "grad_norm": 1.3828125, "learning_rate": 0.000116693271502868, "loss": 0.0293, "step": 9572 }, { "epoch": 4.468144690781797, "grad_norm": 0.7734375, "learning_rate": 0.0001166788037345286, "loss": 0.0156, "step": 9573 }, { "epoch": 4.468611435239207, "grad_norm": 0.37109375, "learning_rate": 0.00011666433560707609, "loss": 0.0038, "step": 9574 }, { "epoch": 4.469078179696616, "grad_norm": 0.5234375, "learning_rate": 0.00011664986712082194, "loss": 0.0077, "step": 9575 }, { "epoch": 4.469544924154025, "grad_norm": 0.423828125, "learning_rate": 0.00011663539827607763, "loss": 0.0067, "step": 9576 }, { "epoch": 4.470011668611435, "grad_norm": 0.3671875, "learning_rate": 0.00011662092907315479, "loss": 0.0097, "step": 9577 }, { "epoch": 4.470478413068845, "grad_norm": 0.5546875, "learning_rate": 0.00011660645951236491, "loss": 0.0094, "step": 9578 }, { "epoch": 4.470945157526255, "grad_norm": 0.61328125, "learning_rate": 0.00011659198959401949, "loss": 0.0141, "step": 9579 }, { "epoch": 4.471411901983664, "grad_norm": 0.7265625, "learning_rate": 0.00011657751931843017, "loss": 0.0096, "step": 9580 }, { "epoch": 4.471878646441073, "grad_norm": 0.78515625, "learning_rate": 0.00011656304868590844, "loss": 0.014, "step": 9581 }, { "epoch": 4.472345390898483, "grad_norm": 0.58203125, "learning_rate": 0.00011654857769676588, "loss": 0.0123, "step": 9582 }, { "epoch": 4.472812135355893, "grad_norm": 0.7109375, "learning_rate": 0.00011653410635131413, "loss": 0.0109, "step": 9583 }, { "epoch": 4.473278879813302, "grad_norm": 0.447265625, "learning_rate": 0.00011651963464986468, "loss": 0.0062, "step": 9584 }, { "epoch": 4.473745624270712, "grad_norm": 0.5625, "learning_rate": 0.00011650516259272917, "loss": 0.0172, "step": 9585 }, { "epoch": 4.4742123687281214, "grad_norm": 0.6796875, "learning_rate": 0.00011649069018021921, "loss": 0.021, "step": 9586 }, { "epoch": 4.474679113185531, "grad_norm": 0.453125, "learning_rate": 0.0001164762174126464, "loss": 0.007, "step": 9587 }, { "epoch": 4.47514585764294, "grad_norm": 0.65234375, "learning_rate": 0.00011646174429032235, "loss": 0.0077, "step": 9588 }, { "epoch": 4.47561260210035, "grad_norm": 0.490234375, "learning_rate": 0.00011644727081355866, "loss": 0.0061, "step": 9589 }, { "epoch": 4.47607934655776, "grad_norm": 0.67578125, "learning_rate": 0.00011643279698266698, "loss": 0.0141, "step": 9590 }, { "epoch": 4.4765460910151695, "grad_norm": 0.5234375, "learning_rate": 0.00011641832279795897, "loss": 0.015, "step": 9591 }, { "epoch": 4.4770128354725784, "grad_norm": 0.419921875, "learning_rate": 0.00011640384825974623, "loss": 0.0085, "step": 9592 }, { "epoch": 4.477479579929988, "grad_norm": 0.40625, "learning_rate": 0.00011638937336834046, "loss": 0.0085, "step": 9593 }, { "epoch": 4.477946324387398, "grad_norm": 0.40234375, "learning_rate": 0.00011637489812405328, "loss": 0.0057, "step": 9594 }, { "epoch": 4.478413068844808, "grad_norm": 0.48046875, "learning_rate": 0.00011636042252719641, "loss": 0.0063, "step": 9595 }, { "epoch": 4.478879813302217, "grad_norm": 0.369140625, "learning_rate": 0.00011634594657808145, "loss": 0.0055, "step": 9596 }, { "epoch": 4.4793465577596265, "grad_norm": 0.5234375, "learning_rate": 0.00011633147027702013, "loss": 0.0073, "step": 9597 }, { "epoch": 4.479813302217036, "grad_norm": 0.357421875, "learning_rate": 0.00011631699362432415, "loss": 0.0042, "step": 9598 }, { "epoch": 4.480280046674446, "grad_norm": 0.609375, "learning_rate": 0.00011630251662030518, "loss": 0.01, "step": 9599 }, { "epoch": 4.480746791131855, "grad_norm": 0.54296875, "learning_rate": 0.00011628803926527495, "loss": 0.0064, "step": 9600 }, { "epoch": 4.481213535589265, "grad_norm": 0.5546875, "learning_rate": 0.00011627356155954516, "loss": 0.0093, "step": 9601 }, { "epoch": 4.481680280046675, "grad_norm": 0.62890625, "learning_rate": 0.0001162590835034275, "loss": 0.0109, "step": 9602 }, { "epoch": 4.482147024504084, "grad_norm": 0.6015625, "learning_rate": 0.00011624460509723379, "loss": 0.0074, "step": 9603 }, { "epoch": 4.482613768961493, "grad_norm": 0.515625, "learning_rate": 0.0001162301263412757, "loss": 0.0084, "step": 9604 }, { "epoch": 4.483080513418903, "grad_norm": 0.68359375, "learning_rate": 0.00011621564723586496, "loss": 0.0106, "step": 9605 }, { "epoch": 4.483547257876313, "grad_norm": 0.435546875, "learning_rate": 0.00011620116778131333, "loss": 0.0065, "step": 9606 }, { "epoch": 4.484014002333723, "grad_norm": 0.470703125, "learning_rate": 0.00011618668797793261, "loss": 0.0115, "step": 9607 }, { "epoch": 4.484480746791132, "grad_norm": 0.404296875, "learning_rate": 0.00011617220782603452, "loss": 0.0057, "step": 9608 }, { "epoch": 4.484947491248541, "grad_norm": 0.69140625, "learning_rate": 0.00011615772732593087, "loss": 0.011, "step": 9609 }, { "epoch": 4.485414235705951, "grad_norm": 0.4140625, "learning_rate": 0.00011614324647793342, "loss": 0.0094, "step": 9610 }, { "epoch": 4.48588098016336, "grad_norm": 0.42578125, "learning_rate": 0.00011612876528235396, "loss": 0.0077, "step": 9611 }, { "epoch": 4.48634772462077, "grad_norm": 0.609375, "learning_rate": 0.00011611428373950426, "loss": 0.0137, "step": 9612 }, { "epoch": 4.48681446907818, "grad_norm": 0.453125, "learning_rate": 0.0001160998018496962, "loss": 0.0117, "step": 9613 }, { "epoch": 4.4872812135355895, "grad_norm": 0.59375, "learning_rate": 0.00011608531961324152, "loss": 0.0095, "step": 9614 }, { "epoch": 4.487747957992999, "grad_norm": 0.42578125, "learning_rate": 0.00011607083703045205, "loss": 0.0052, "step": 9615 }, { "epoch": 4.488214702450408, "grad_norm": 0.84375, "learning_rate": 0.00011605635410163964, "loss": 0.012, "step": 9616 }, { "epoch": 4.488681446907818, "grad_norm": 0.484375, "learning_rate": 0.0001160418708271161, "loss": 0.0053, "step": 9617 }, { "epoch": 4.489148191365228, "grad_norm": 0.640625, "learning_rate": 0.00011602738720719329, "loss": 0.0161, "step": 9618 }, { "epoch": 4.489614935822637, "grad_norm": 0.365234375, "learning_rate": 0.00011601290324218306, "loss": 0.008, "step": 9619 }, { "epoch": 4.4900816802800465, "grad_norm": 0.333984375, "learning_rate": 0.00011599841893239725, "loss": 0.0093, "step": 9620 }, { "epoch": 4.490548424737456, "grad_norm": 0.73046875, "learning_rate": 0.00011598393427814773, "loss": 0.0101, "step": 9621 }, { "epoch": 4.491015169194866, "grad_norm": 0.5859375, "learning_rate": 0.00011596944927974635, "loss": 0.0199, "step": 9622 }, { "epoch": 4.491481913652275, "grad_norm": 0.5703125, "learning_rate": 0.00011595496393750505, "loss": 0.0092, "step": 9623 }, { "epoch": 4.491948658109685, "grad_norm": 0.35546875, "learning_rate": 0.00011594047825173566, "loss": 0.0061, "step": 9624 }, { "epoch": 4.492415402567095, "grad_norm": 0.5, "learning_rate": 0.00011592599222275007, "loss": 0.0098, "step": 9625 }, { "epoch": 4.492882147024504, "grad_norm": 0.609375, "learning_rate": 0.00011591150585086022, "loss": 0.0154, "step": 9626 }, { "epoch": 4.493348891481913, "grad_norm": 0.66796875, "learning_rate": 0.00011589701913637798, "loss": 0.0116, "step": 9627 }, { "epoch": 4.493815635939323, "grad_norm": 0.443359375, "learning_rate": 0.00011588253207961528, "loss": 0.0076, "step": 9628 }, { "epoch": 4.494282380396733, "grad_norm": 0.435546875, "learning_rate": 0.00011586804468088406, "loss": 0.0119, "step": 9629 }, { "epoch": 4.494749124854143, "grad_norm": 0.703125, "learning_rate": 0.00011585355694049622, "loss": 0.0117, "step": 9630 }, { "epoch": 4.495215869311552, "grad_norm": 0.494140625, "learning_rate": 0.00011583906885876368, "loss": 0.0076, "step": 9631 }, { "epoch": 4.495682613768961, "grad_norm": 0.33203125, "learning_rate": 0.00011582458043599844, "loss": 0.0042, "step": 9632 }, { "epoch": 4.496149358226371, "grad_norm": 0.703125, "learning_rate": 0.00011581009167251244, "loss": 0.011, "step": 9633 }, { "epoch": 4.496616102683781, "grad_norm": 0.51171875, "learning_rate": 0.00011579560256861759, "loss": 0.0111, "step": 9634 }, { "epoch": 4.49708284714119, "grad_norm": 0.43359375, "learning_rate": 0.00011578111312462591, "loss": 0.0144, "step": 9635 }, { "epoch": 4.4975495915986, "grad_norm": 0.48046875, "learning_rate": 0.00011576662334084937, "loss": 0.0121, "step": 9636 }, { "epoch": 4.4980163360560095, "grad_norm": 0.6875, "learning_rate": 0.00011575213321759993, "loss": 0.0112, "step": 9637 }, { "epoch": 4.498483080513419, "grad_norm": 0.478515625, "learning_rate": 0.00011573764275518957, "loss": 0.0068, "step": 9638 }, { "epoch": 4.498949824970828, "grad_norm": 0.39453125, "learning_rate": 0.0001157231519539303, "loss": 0.007, "step": 9639 }, { "epoch": 4.499416569428238, "grad_norm": 1.15625, "learning_rate": 0.00011570866081413414, "loss": 0.0102, "step": 9640 }, { "epoch": 4.499883313885648, "grad_norm": 0.5390625, "learning_rate": 0.00011569416933611305, "loss": 0.011, "step": 9641 }, { "epoch": 4.500350058343058, "grad_norm": 0.4921875, "learning_rate": 0.00011567967752017912, "loss": 0.0085, "step": 9642 }, { "epoch": 4.5008168028004665, "grad_norm": 0.419921875, "learning_rate": 0.00011566518536664433, "loss": 0.0071, "step": 9643 }, { "epoch": 4.501283547257876, "grad_norm": 0.451171875, "learning_rate": 0.00011565069287582069, "loss": 0.007, "step": 9644 }, { "epoch": 4.501750291715286, "grad_norm": 0.5703125, "learning_rate": 0.00011563620004802029, "loss": 0.0118, "step": 9645 }, { "epoch": 4.502217036172695, "grad_norm": 0.69921875, "learning_rate": 0.00011562170688355516, "loss": 0.0155, "step": 9646 }, { "epoch": 4.502683780630105, "grad_norm": 0.53515625, "learning_rate": 0.00011560721338273735, "loss": 0.009, "step": 9647 }, { "epoch": 4.503150525087515, "grad_norm": 0.57421875, "learning_rate": 0.0001155927195458789, "loss": 0.0112, "step": 9648 }, { "epoch": 4.503150525087515, "eval_loss": 1.984728455543518, "eval_runtime": 58.7541, "eval_samples_per_second": 30.704, "eval_steps_per_second": 3.847, "step": 9648 }, { "epoch": 4.503617269544924, "grad_norm": 0.43359375, "learning_rate": 0.0001155782253732919, "loss": 0.0062, "step": 9649 }, { "epoch": 4.504084014002334, "grad_norm": 0.5625, "learning_rate": 0.00011556373086528845, "loss": 0.0122, "step": 9650 }, { "epoch": 4.504550758459743, "grad_norm": 0.48046875, "learning_rate": 0.00011554923602218055, "loss": 0.0105, "step": 9651 }, { "epoch": 4.505017502917153, "grad_norm": 0.6328125, "learning_rate": 0.00011553474084428042, "loss": 0.0081, "step": 9652 }, { "epoch": 4.505484247374563, "grad_norm": 0.578125, "learning_rate": 0.00011552024533190005, "loss": 0.0088, "step": 9653 }, { "epoch": 4.505950991831972, "grad_norm": 0.4921875, "learning_rate": 0.00011550574948535156, "loss": 0.0068, "step": 9654 }, { "epoch": 4.506417736289381, "grad_norm": 0.310546875, "learning_rate": 0.0001154912533049471, "loss": 0.0085, "step": 9655 }, { "epoch": 4.506884480746791, "grad_norm": 0.88671875, "learning_rate": 0.00011547675679099877, "loss": 0.0149, "step": 9656 }, { "epoch": 4.507351225204201, "grad_norm": 0.427734375, "learning_rate": 0.00011546225994381867, "loss": 0.005, "step": 9657 }, { "epoch": 4.507817969661611, "grad_norm": 0.376953125, "learning_rate": 0.00011544776276371898, "loss": 0.0057, "step": 9658 }, { "epoch": 4.50828471411902, "grad_norm": 0.84765625, "learning_rate": 0.00011543326525101182, "loss": 0.0252, "step": 9659 }, { "epoch": 4.5087514585764294, "grad_norm": 0.68359375, "learning_rate": 0.00011541876740600932, "loss": 0.0134, "step": 9660 }, { "epoch": 4.509218203033839, "grad_norm": 0.4609375, "learning_rate": 0.00011540426922902366, "loss": 0.0151, "step": 9661 }, { "epoch": 4.509684947491248, "grad_norm": 0.4375, "learning_rate": 0.00011538977072036699, "loss": 0.0108, "step": 9662 }, { "epoch": 4.510151691948658, "grad_norm": 0.41796875, "learning_rate": 0.00011537527188035148, "loss": 0.0055, "step": 9663 }, { "epoch": 4.510618436406068, "grad_norm": 0.498046875, "learning_rate": 0.00011536077270928932, "loss": 0.0093, "step": 9664 }, { "epoch": 4.5110851808634775, "grad_norm": 0.734375, "learning_rate": 0.00011534627320749264, "loss": 0.0206, "step": 9665 }, { "epoch": 4.5115519253208864, "grad_norm": 0.52734375, "learning_rate": 0.00011533177337527372, "loss": 0.0096, "step": 9666 }, { "epoch": 4.512018669778296, "grad_norm": 0.46484375, "learning_rate": 0.00011531727321294468, "loss": 0.0048, "step": 9667 }, { "epoch": 4.512485414235706, "grad_norm": 0.625, "learning_rate": 0.00011530277272081775, "loss": 0.0117, "step": 9668 }, { "epoch": 4.512952158693116, "grad_norm": 0.55859375, "learning_rate": 0.00011528827189920518, "loss": 0.0141, "step": 9669 }, { "epoch": 4.513418903150525, "grad_norm": 0.328125, "learning_rate": 0.00011527377074841914, "loss": 0.0055, "step": 9670 }, { "epoch": 4.5138856476079345, "grad_norm": 0.498046875, "learning_rate": 0.00011525926926877185, "loss": 0.0078, "step": 9671 }, { "epoch": 4.514352392065344, "grad_norm": 0.796875, "learning_rate": 0.00011524476746057556, "loss": 0.012, "step": 9672 }, { "epoch": 4.514819136522754, "grad_norm": 0.58203125, "learning_rate": 0.00011523026532414252, "loss": 0.0128, "step": 9673 }, { "epoch": 4.515285880980163, "grad_norm": 0.74609375, "learning_rate": 0.00011521576285978497, "loss": 0.0184, "step": 9674 }, { "epoch": 4.515752625437573, "grad_norm": 0.486328125, "learning_rate": 0.00011520126006781518, "loss": 0.0065, "step": 9675 }, { "epoch": 4.516219369894983, "grad_norm": 0.66015625, "learning_rate": 0.0001151867569485454, "loss": 0.0213, "step": 9676 }, { "epoch": 4.516686114352392, "grad_norm": 0.77734375, "learning_rate": 0.00011517225350228785, "loss": 0.0096, "step": 9677 }, { "epoch": 4.517152858809801, "grad_norm": 0.486328125, "learning_rate": 0.00011515774972935487, "loss": 0.0103, "step": 9678 }, { "epoch": 4.517619603267211, "grad_norm": 0.384765625, "learning_rate": 0.00011514324563005874, "loss": 0.0134, "step": 9679 }, { "epoch": 4.518086347724621, "grad_norm": 0.341796875, "learning_rate": 0.00011512874120471169, "loss": 0.0058, "step": 9680 }, { "epoch": 4.518553092182031, "grad_norm": 0.63671875, "learning_rate": 0.00011511423645362608, "loss": 0.0111, "step": 9681 }, { "epoch": 4.51901983663944, "grad_norm": 0.51171875, "learning_rate": 0.0001150997313771142, "loss": 0.0093, "step": 9682 }, { "epoch": 4.519486581096849, "grad_norm": 0.427734375, "learning_rate": 0.00011508522597548832, "loss": 0.0069, "step": 9683 }, { "epoch": 4.519953325554259, "grad_norm": 0.58203125, "learning_rate": 0.00011507072024906083, "loss": 0.0118, "step": 9684 }, { "epoch": 4.520420070011669, "grad_norm": 0.44140625, "learning_rate": 0.00011505621419814398, "loss": 0.0078, "step": 9685 }, { "epoch": 4.520886814469078, "grad_norm": 0.64453125, "learning_rate": 0.00011504170782305014, "loss": 0.0104, "step": 9686 }, { "epoch": 4.521353558926488, "grad_norm": 0.546875, "learning_rate": 0.00011502720112409165, "loss": 0.011, "step": 9687 }, { "epoch": 4.5218203033838975, "grad_norm": 0.60546875, "learning_rate": 0.00011501269410158082, "loss": 0.0185, "step": 9688 }, { "epoch": 4.522287047841306, "grad_norm": 0.484375, "learning_rate": 0.00011499818675583005, "loss": 0.0096, "step": 9689 }, { "epoch": 4.522753792298716, "grad_norm": 0.458984375, "learning_rate": 0.00011498367908715166, "loss": 0.0096, "step": 9690 }, { "epoch": 4.523220536756126, "grad_norm": 0.435546875, "learning_rate": 0.00011496917109585808, "loss": 0.0074, "step": 9691 }, { "epoch": 4.523687281213536, "grad_norm": 0.5, "learning_rate": 0.0001149546627822616, "loss": 0.0078, "step": 9692 }, { "epoch": 4.524154025670946, "grad_norm": 0.470703125, "learning_rate": 0.00011494015414667463, "loss": 0.0083, "step": 9693 }, { "epoch": 4.5246207701283545, "grad_norm": 1.015625, "learning_rate": 0.00011492564518940958, "loss": 0.0155, "step": 9694 }, { "epoch": 4.525087514585764, "grad_norm": 0.546875, "learning_rate": 0.00011491113591077885, "loss": 0.0081, "step": 9695 }, { "epoch": 4.525554259043174, "grad_norm": 0.3515625, "learning_rate": 0.00011489662631109479, "loss": 0.006, "step": 9696 }, { "epoch": 4.526021003500583, "grad_norm": 0.44921875, "learning_rate": 0.00011488211639066984, "loss": 0.0062, "step": 9697 }, { "epoch": 4.526487747957993, "grad_norm": 0.6796875, "learning_rate": 0.00011486760614981643, "loss": 0.0213, "step": 9698 }, { "epoch": 4.526954492415403, "grad_norm": 0.431640625, "learning_rate": 0.00011485309558884696, "loss": 0.0065, "step": 9699 }, { "epoch": 4.527421236872812, "grad_norm": 0.482421875, "learning_rate": 0.00011483858470807384, "loss": 0.0086, "step": 9700 }, { "epoch": 4.527887981330222, "grad_norm": 0.3828125, "learning_rate": 0.00011482407350780958, "loss": 0.0108, "step": 9701 }, { "epoch": 4.528354725787631, "grad_norm": 0.3984375, "learning_rate": 0.00011480956198836652, "loss": 0.0066, "step": 9702 }, { "epoch": 4.528821470245041, "grad_norm": 0.59765625, "learning_rate": 0.00011479505015005716, "loss": 0.0167, "step": 9703 }, { "epoch": 4.529288214702451, "grad_norm": 0.55078125, "learning_rate": 0.00011478053799319398, "loss": 0.0132, "step": 9704 }, { "epoch": 4.52975495915986, "grad_norm": 0.58984375, "learning_rate": 0.0001147660255180894, "loss": 0.0096, "step": 9705 }, { "epoch": 4.530221703617269, "grad_norm": 0.451171875, "learning_rate": 0.0001147515127250559, "loss": 0.0079, "step": 9706 }, { "epoch": 4.530688448074679, "grad_norm": 0.63671875, "learning_rate": 0.00011473699961440599, "loss": 0.0124, "step": 9707 }, { "epoch": 4.531155192532089, "grad_norm": 0.458984375, "learning_rate": 0.00011472248618645212, "loss": 0.0113, "step": 9708 }, { "epoch": 4.531621936989498, "grad_norm": 0.6015625, "learning_rate": 0.00011470797244150675, "loss": 0.0084, "step": 9709 }, { "epoch": 4.532088681446908, "grad_norm": 0.416015625, "learning_rate": 0.00011469345837988245, "loss": 0.0079, "step": 9710 }, { "epoch": 4.5325554259043175, "grad_norm": 0.55859375, "learning_rate": 0.0001146789440018917, "loss": 0.0096, "step": 9711 }, { "epoch": 4.533022170361727, "grad_norm": 0.4609375, "learning_rate": 0.00011466442930784697, "loss": 0.0113, "step": 9712 }, { "epoch": 4.533488914819136, "grad_norm": 0.69921875, "learning_rate": 0.0001146499142980608, "loss": 0.0141, "step": 9713 }, { "epoch": 4.533955659276546, "grad_norm": 0.68359375, "learning_rate": 0.00011463539897284574, "loss": 0.0129, "step": 9714 }, { "epoch": 4.534422403733956, "grad_norm": 0.439453125, "learning_rate": 0.00011462088333251432, "loss": 0.0105, "step": 9715 }, { "epoch": 4.534889148191366, "grad_norm": 0.408203125, "learning_rate": 0.00011460636737737903, "loss": 0.0078, "step": 9716 }, { "epoch": 4.5353558926487745, "grad_norm": 0.3984375, "learning_rate": 0.00011459185110775246, "loss": 0.0059, "step": 9717 }, { "epoch": 4.535822637106184, "grad_norm": 0.388671875, "learning_rate": 0.00011457733452394714, "loss": 0.01, "step": 9718 }, { "epoch": 4.536289381563594, "grad_norm": 0.70703125, "learning_rate": 0.00011456281762627565, "loss": 0.0105, "step": 9719 }, { "epoch": 4.536756126021004, "grad_norm": 0.62109375, "learning_rate": 0.00011454830041505049, "loss": 0.0101, "step": 9720 }, { "epoch": 4.537222870478413, "grad_norm": 0.5546875, "learning_rate": 0.00011453378289058433, "loss": 0.0077, "step": 9721 }, { "epoch": 4.537689614935823, "grad_norm": 0.34765625, "learning_rate": 0.00011451926505318969, "loss": 0.007, "step": 9722 }, { "epoch": 4.538156359393232, "grad_norm": 0.7265625, "learning_rate": 0.00011450474690317914, "loss": 0.0172, "step": 9723 }, { "epoch": 4.538623103850642, "grad_norm": 0.515625, "learning_rate": 0.00011449022844086534, "loss": 0.0118, "step": 9724 }, { "epoch": 4.539089848308051, "grad_norm": 0.5625, "learning_rate": 0.00011447570966656083, "loss": 0.0135, "step": 9725 }, { "epoch": 4.539556592765461, "grad_norm": 0.43359375, "learning_rate": 0.0001144611905805782, "loss": 0.0103, "step": 9726 }, { "epoch": 4.540023337222871, "grad_norm": 0.54296875, "learning_rate": 0.00011444667118323011, "loss": 0.0116, "step": 9727 }, { "epoch": 4.5404900816802805, "grad_norm": 0.5625, "learning_rate": 0.00011443215147482917, "loss": 0.0088, "step": 9728 }, { "epoch": 4.540956826137689, "grad_norm": 0.49609375, "learning_rate": 0.00011441763145568796, "loss": 0.0082, "step": 9729 }, { "epoch": 4.541423570595099, "grad_norm": 0.4609375, "learning_rate": 0.0001144031111261192, "loss": 0.0065, "step": 9730 }, { "epoch": 4.541890315052509, "grad_norm": 0.49609375, "learning_rate": 0.00011438859048643544, "loss": 0.0094, "step": 9731 }, { "epoch": 4.542357059509918, "grad_norm": 0.5078125, "learning_rate": 0.00011437406953694936, "loss": 0.0072, "step": 9732 }, { "epoch": 4.542823803967328, "grad_norm": 0.453125, "learning_rate": 0.00011435954827797363, "loss": 0.006, "step": 9733 }, { "epoch": 4.5432905484247375, "grad_norm": 0.48828125, "learning_rate": 0.0001143450267098209, "loss": 0.0119, "step": 9734 }, { "epoch": 4.543757292882147, "grad_norm": 0.48046875, "learning_rate": 0.00011433050483280383, "loss": 0.0111, "step": 9735 }, { "epoch": 4.544224037339557, "grad_norm": 0.396484375, "learning_rate": 0.00011431598264723506, "loss": 0.0048, "step": 9736 }, { "epoch": 4.544690781796966, "grad_norm": 0.53515625, "learning_rate": 0.00011430146015342733, "loss": 0.0089, "step": 9737 }, { "epoch": 4.545157526254376, "grad_norm": 0.490234375, "learning_rate": 0.00011428693735169329, "loss": 0.0101, "step": 9738 }, { "epoch": 4.5456242707117855, "grad_norm": 0.57421875, "learning_rate": 0.0001142724142423456, "loss": 0.0182, "step": 9739 }, { "epoch": 4.5460910151691944, "grad_norm": 0.4921875, "learning_rate": 0.00011425789082569705, "loss": 0.0081, "step": 9740 }, { "epoch": 4.546557759626604, "grad_norm": 0.4921875, "learning_rate": 0.00011424336710206027, "loss": 0.0105, "step": 9741 }, { "epoch": 4.547024504084014, "grad_norm": 0.61328125, "learning_rate": 0.00011422884307174797, "loss": 0.0121, "step": 9742 }, { "epoch": 4.547491248541424, "grad_norm": 0.67578125, "learning_rate": 0.00011421431873507294, "loss": 0.0124, "step": 9743 }, { "epoch": 4.547957992998834, "grad_norm": 0.392578125, "learning_rate": 0.00011419979409234782, "loss": 0.006, "step": 9744 }, { "epoch": 4.5484247374562425, "grad_norm": 0.515625, "learning_rate": 0.0001141852691438854, "loss": 0.0084, "step": 9745 }, { "epoch": 4.548891481913652, "grad_norm": 0.609375, "learning_rate": 0.00011417074388999837, "loss": 0.013, "step": 9746 }, { "epoch": 4.549358226371062, "grad_norm": 0.65234375, "learning_rate": 0.00011415621833099952, "loss": 0.014, "step": 9747 }, { "epoch": 4.549824970828471, "grad_norm": 0.66796875, "learning_rate": 0.00011414169246720158, "loss": 0.0112, "step": 9748 }, { "epoch": 4.550291715285881, "grad_norm": 0.462890625, "learning_rate": 0.00011412716629891733, "loss": 0.0086, "step": 9749 }, { "epoch": 4.550758459743291, "grad_norm": 0.39453125, "learning_rate": 0.0001141126398264595, "loss": 0.009, "step": 9750 }, { "epoch": 4.5512252042007, "grad_norm": 0.453125, "learning_rate": 0.00011409811305014088, "loss": 0.0052, "step": 9751 }, { "epoch": 4.551691948658109, "grad_norm": 0.515625, "learning_rate": 0.00011408358597027421, "loss": 0.0086, "step": 9752 }, { "epoch": 4.552158693115519, "grad_norm": 0.609375, "learning_rate": 0.00011406905858717234, "loss": 0.0105, "step": 9753 }, { "epoch": 4.552625437572929, "grad_norm": 0.6328125, "learning_rate": 0.00011405453090114804, "loss": 0.0126, "step": 9754 }, { "epoch": 4.553092182030339, "grad_norm": 0.58984375, "learning_rate": 0.00011404000291251406, "loss": 0.0117, "step": 9755 }, { "epoch": 4.553558926487748, "grad_norm": 0.439453125, "learning_rate": 0.00011402547462158327, "loss": 0.0067, "step": 9756 }, { "epoch": 4.554025670945157, "grad_norm": 0.625, "learning_rate": 0.00011401094602866844, "loss": 0.0101, "step": 9757 }, { "epoch": 4.554492415402567, "grad_norm": 0.57421875, "learning_rate": 0.00011399641713408237, "loss": 0.009, "step": 9758 }, { "epoch": 4.554959159859977, "grad_norm": 0.421875, "learning_rate": 0.00011398188793813795, "loss": 0.0043, "step": 9759 }, { "epoch": 4.555425904317386, "grad_norm": 0.7109375, "learning_rate": 0.00011396735844114797, "loss": 0.0193, "step": 9760 }, { "epoch": 4.555892648774796, "grad_norm": 0.462890625, "learning_rate": 0.00011395282864342525, "loss": 0.0125, "step": 9761 }, { "epoch": 4.5563593932322055, "grad_norm": 0.53125, "learning_rate": 0.00011393829854528262, "loss": 0.0086, "step": 9762 }, { "epoch": 4.556826137689615, "grad_norm": 0.62890625, "learning_rate": 0.000113923768147033, "loss": 0.0139, "step": 9763 }, { "epoch": 4.557292882147024, "grad_norm": 0.703125, "learning_rate": 0.00011390923744898918, "loss": 0.0116, "step": 9764 }, { "epoch": 4.557759626604434, "grad_norm": 0.58984375, "learning_rate": 0.00011389470645146405, "loss": 0.0114, "step": 9765 }, { "epoch": 4.558226371061844, "grad_norm": 0.515625, "learning_rate": 0.00011388017515477048, "loss": 0.009, "step": 9766 }, { "epoch": 4.558693115519253, "grad_norm": 0.70703125, "learning_rate": 0.00011386564355922133, "loss": 0.0189, "step": 9767 }, { "epoch": 4.5591598599766625, "grad_norm": 0.5625, "learning_rate": 0.00011385111166512951, "loss": 0.0082, "step": 9768 }, { "epoch": 4.559626604434072, "grad_norm": 0.515625, "learning_rate": 0.00011383657947280786, "loss": 0.0073, "step": 9769 }, { "epoch": 4.560093348891482, "grad_norm": 0.361328125, "learning_rate": 0.00011382204698256933, "loss": 0.0078, "step": 9770 }, { "epoch": 4.560560093348892, "grad_norm": 0.56640625, "learning_rate": 0.00011380751419472677, "loss": 0.0106, "step": 9771 }, { "epoch": 4.561026837806301, "grad_norm": 0.39453125, "learning_rate": 0.00011379298110959313, "loss": 0.0104, "step": 9772 }, { "epoch": 4.561493582263711, "grad_norm": 0.76171875, "learning_rate": 0.00011377844772748129, "loss": 0.0144, "step": 9773 }, { "epoch": 4.56196032672112, "grad_norm": 0.48828125, "learning_rate": 0.00011376391404870421, "loss": 0.0095, "step": 9774 }, { "epoch": 4.562427071178529, "grad_norm": 0.65234375, "learning_rate": 0.00011374938007357474, "loss": 0.0214, "step": 9775 }, { "epoch": 4.562893815635939, "grad_norm": 0.349609375, "learning_rate": 0.0001137348458024059, "loss": 0.0053, "step": 9776 }, { "epoch": 4.563360560093349, "grad_norm": 0.388671875, "learning_rate": 0.00011372031123551056, "loss": 0.0052, "step": 9777 }, { "epoch": 4.563827304550759, "grad_norm": 0.51171875, "learning_rate": 0.0001137057763732017, "loss": 0.0075, "step": 9778 }, { "epoch": 4.5642940490081685, "grad_norm": 0.482421875, "learning_rate": 0.00011369124121579229, "loss": 0.0087, "step": 9779 }, { "epoch": 4.564760793465577, "grad_norm": 0.36328125, "learning_rate": 0.00011367670576359526, "loss": 0.0078, "step": 9780 }, { "epoch": 4.565227537922987, "grad_norm": 0.498046875, "learning_rate": 0.00011366217001692356, "loss": 0.0089, "step": 9781 }, { "epoch": 4.565694282380397, "grad_norm": 0.306640625, "learning_rate": 0.00011364763397609022, "loss": 0.0035, "step": 9782 }, { "epoch": 4.566161026837806, "grad_norm": 0.42578125, "learning_rate": 0.00011363309764140814, "loss": 0.008, "step": 9783 }, { "epoch": 4.566627771295216, "grad_norm": 0.5234375, "learning_rate": 0.00011361856101319037, "loss": 0.012, "step": 9784 }, { "epoch": 4.5670945157526255, "grad_norm": 0.54296875, "learning_rate": 0.00011360402409174984, "loss": 0.0074, "step": 9785 }, { "epoch": 4.567561260210035, "grad_norm": 0.28125, "learning_rate": 0.0001135894868773996, "loss": 0.0067, "step": 9786 }, { "epoch": 4.568028004667444, "grad_norm": 0.6015625, "learning_rate": 0.00011357494937045263, "loss": 0.0083, "step": 9787 }, { "epoch": 4.568494749124854, "grad_norm": 0.431640625, "learning_rate": 0.00011356041157122193, "loss": 0.0057, "step": 9788 }, { "epoch": 4.568961493582264, "grad_norm": 0.625, "learning_rate": 0.00011354587348002053, "loss": 0.0094, "step": 9789 }, { "epoch": 4.569428238039674, "grad_norm": 0.30859375, "learning_rate": 0.00011353133509716145, "loss": 0.0045, "step": 9790 }, { "epoch": 4.5698949824970825, "grad_norm": 0.52734375, "learning_rate": 0.0001135167964229577, "loss": 0.0089, "step": 9791 }, { "epoch": 4.570361726954492, "grad_norm": 0.625, "learning_rate": 0.00011350225745772232, "loss": 0.0102, "step": 9792 }, { "epoch": 4.570828471411902, "grad_norm": 0.396484375, "learning_rate": 0.00011348771820176838, "loss": 0.0063, "step": 9793 }, { "epoch": 4.571295215869312, "grad_norm": 0.6796875, "learning_rate": 0.0001134731786554089, "loss": 0.0146, "step": 9794 }, { "epoch": 4.571761960326721, "grad_norm": 0.6171875, "learning_rate": 0.00011345863881895689, "loss": 0.0152, "step": 9795 }, { "epoch": 4.572228704784131, "grad_norm": 0.34375, "learning_rate": 0.00011344409869272552, "loss": 0.0088, "step": 9796 }, { "epoch": 4.57269544924154, "grad_norm": 0.337890625, "learning_rate": 0.00011342955827702774, "loss": 0.0073, "step": 9797 }, { "epoch": 4.57316219369895, "grad_norm": 0.341796875, "learning_rate": 0.0001134150175721767, "loss": 0.0087, "step": 9798 }, { "epoch": 4.573628938156359, "grad_norm": 0.455078125, "learning_rate": 0.00011340047657848542, "loss": 0.0086, "step": 9799 }, { "epoch": 4.574095682613769, "grad_norm": 0.421875, "learning_rate": 0.00011338593529626704, "loss": 0.0099, "step": 9800 }, { "epoch": 4.574562427071179, "grad_norm": 0.498046875, "learning_rate": 0.00011337139372583458, "loss": 0.0083, "step": 9801 }, { "epoch": 4.5750291715285885, "grad_norm": 0.5, "learning_rate": 0.00011335685186750123, "loss": 0.0101, "step": 9802 }, { "epoch": 4.575495915985997, "grad_norm": 0.47265625, "learning_rate": 0.00011334230972158, "loss": 0.008, "step": 9803 }, { "epoch": 4.575962660443407, "grad_norm": 0.482421875, "learning_rate": 0.00011332776728838403, "loss": 0.0088, "step": 9804 }, { "epoch": 4.576429404900817, "grad_norm": 0.57421875, "learning_rate": 0.00011331322456822649, "loss": 0.0086, "step": 9805 }, { "epoch": 4.576896149358227, "grad_norm": 0.455078125, "learning_rate": 0.0001132986815614204, "loss": 0.0114, "step": 9806 }, { "epoch": 4.577362893815636, "grad_norm": 0.51171875, "learning_rate": 0.00011328413826827897, "loss": 0.0132, "step": 9807 }, { "epoch": 4.5778296382730455, "grad_norm": 0.734375, "learning_rate": 0.00011326959468911531, "loss": 0.0202, "step": 9808 }, { "epoch": 4.578296382730455, "grad_norm": 0.4296875, "learning_rate": 0.00011325505082424255, "loss": 0.0111, "step": 9809 }, { "epoch": 4.578763127187864, "grad_norm": 0.6328125, "learning_rate": 0.00011324050667397383, "loss": 0.013, "step": 9810 }, { "epoch": 4.579229871645274, "grad_norm": 0.609375, "learning_rate": 0.00011322596223862229, "loss": 0.009, "step": 9811 }, { "epoch": 4.579696616102684, "grad_norm": 0.55859375, "learning_rate": 0.00011321141751850115, "loss": 0.0094, "step": 9812 }, { "epoch": 4.5801633605600935, "grad_norm": 0.423828125, "learning_rate": 0.00011319687251392353, "loss": 0.0067, "step": 9813 }, { "epoch": 4.580630105017503, "grad_norm": 0.6484375, "learning_rate": 0.00011318232722520257, "loss": 0.0082, "step": 9814 }, { "epoch": 4.581096849474912, "grad_norm": 0.40625, "learning_rate": 0.0001131677816526515, "loss": 0.0113, "step": 9815 }, { "epoch": 4.581563593932322, "grad_norm": 0.4765625, "learning_rate": 0.00011315323579658348, "loss": 0.0105, "step": 9816 }, { "epoch": 4.582030338389732, "grad_norm": 0.6015625, "learning_rate": 0.00011313868965731169, "loss": 0.0079, "step": 9817 }, { "epoch": 4.582497082847141, "grad_norm": 0.390625, "learning_rate": 0.00011312414323514935, "loss": 0.0053, "step": 9818 }, { "epoch": 4.5829638273045505, "grad_norm": 0.451171875, "learning_rate": 0.00011310959653040967, "loss": 0.0083, "step": 9819 }, { "epoch": 4.58343057176196, "grad_norm": 0.419921875, "learning_rate": 0.00011309504954340577, "loss": 0.0114, "step": 9820 }, { "epoch": 4.58389731621937, "grad_norm": 0.69921875, "learning_rate": 0.00011308050227445095, "loss": 0.0145, "step": 9821 }, { "epoch": 4.58436406067678, "grad_norm": 0.6484375, "learning_rate": 0.00011306595472385842, "loss": 0.0123, "step": 9822 }, { "epoch": 4.584830805134189, "grad_norm": 0.578125, "learning_rate": 0.00011305140689194136, "loss": 0.0105, "step": 9823 }, { "epoch": 4.585297549591599, "grad_norm": 0.482421875, "learning_rate": 0.00011303685877901304, "loss": 0.0082, "step": 9824 }, { "epoch": 4.585764294049008, "grad_norm": 0.75390625, "learning_rate": 0.0001130223103853867, "loss": 0.0164, "step": 9825 }, { "epoch": 4.586231038506417, "grad_norm": 0.396484375, "learning_rate": 0.00011300776171137557, "loss": 0.0057, "step": 9826 }, { "epoch": 4.586697782963827, "grad_norm": 0.609375, "learning_rate": 0.00011299321275729287, "loss": 0.009, "step": 9827 }, { "epoch": 4.587164527421237, "grad_norm": 0.369140625, "learning_rate": 0.0001129786635234519, "loss": 0.006, "step": 9828 }, { "epoch": 4.587631271878647, "grad_norm": 0.431640625, "learning_rate": 0.00011296411401016589, "loss": 0.0046, "step": 9829 }, { "epoch": 4.588098016336056, "grad_norm": 0.35546875, "learning_rate": 0.00011294956421774812, "loss": 0.0041, "step": 9830 }, { "epoch": 4.588564760793465, "grad_norm": 0.4453125, "learning_rate": 0.00011293501414651189, "loss": 0.0057, "step": 9831 }, { "epoch": 4.589031505250875, "grad_norm": 0.53125, "learning_rate": 0.00011292046379677044, "loss": 0.0088, "step": 9832 }, { "epoch": 4.589498249708285, "grad_norm": 0.494140625, "learning_rate": 0.00011290591316883707, "loss": 0.008, "step": 9833 }, { "epoch": 4.589964994165694, "grad_norm": 0.61328125, "learning_rate": 0.00011289136226302505, "loss": 0.0091, "step": 9834 }, { "epoch": 4.590431738623104, "grad_norm": 0.66015625, "learning_rate": 0.00011287681107964773, "loss": 0.0132, "step": 9835 }, { "epoch": 4.5908984830805135, "grad_norm": 0.72265625, "learning_rate": 0.0001128622596190184, "loss": 0.0232, "step": 9836 }, { "epoch": 4.591365227537923, "grad_norm": 0.43359375, "learning_rate": 0.00011284770788145029, "loss": 0.0046, "step": 9837 }, { "epoch": 4.591831971995332, "grad_norm": 0.392578125, "learning_rate": 0.0001128331558672568, "loss": 0.0072, "step": 9838 }, { "epoch": 4.592298716452742, "grad_norm": 0.578125, "learning_rate": 0.00011281860357675124, "loss": 0.0122, "step": 9839 }, { "epoch": 4.592765460910152, "grad_norm": 0.48828125, "learning_rate": 0.00011280405101024689, "loss": 0.0102, "step": 9840 }, { "epoch": 4.593232205367562, "grad_norm": 0.40234375, "learning_rate": 0.00011278949816805715, "loss": 0.0053, "step": 9841 }, { "epoch": 4.5936989498249705, "grad_norm": 0.416015625, "learning_rate": 0.0001127749450504953, "loss": 0.0053, "step": 9842 }, { "epoch": 4.59416569428238, "grad_norm": 0.515625, "learning_rate": 0.00011276039165787474, "loss": 0.0072, "step": 9843 }, { "epoch": 4.59463243873979, "grad_norm": 0.68359375, "learning_rate": 0.00011274583799050878, "loss": 0.0095, "step": 9844 }, { "epoch": 4.5950991831972, "grad_norm": 0.4765625, "learning_rate": 0.00011273128404871076, "loss": 0.0099, "step": 9845 }, { "epoch": 4.595565927654609, "grad_norm": 0.380859375, "learning_rate": 0.00011271672983279406, "loss": 0.0058, "step": 9846 }, { "epoch": 4.596032672112019, "grad_norm": 0.345703125, "learning_rate": 0.0001127021753430721, "loss": 0.0049, "step": 9847 }, { "epoch": 4.596499416569428, "grad_norm": 0.5703125, "learning_rate": 0.0001126876205798582, "loss": 0.0089, "step": 9848 }, { "epoch": 4.596966161026838, "grad_norm": 0.400390625, "learning_rate": 0.00011267306554346575, "loss": 0.0059, "step": 9849 }, { "epoch": 4.597432905484247, "grad_norm": 0.412109375, "learning_rate": 0.00011265851023420812, "loss": 0.0081, "step": 9850 }, { "epoch": 4.597899649941657, "grad_norm": 0.36328125, "learning_rate": 0.00011264395465239874, "loss": 0.0059, "step": 9851 }, { "epoch": 4.598366394399067, "grad_norm": 0.478515625, "learning_rate": 0.00011262939879835097, "loss": 0.009, "step": 9852 }, { "epoch": 4.598833138856476, "grad_norm": 0.4453125, "learning_rate": 0.00011261484267237822, "loss": 0.0071, "step": 9853 }, { "epoch": 4.599299883313885, "grad_norm": 0.37109375, "learning_rate": 0.00011260028627479395, "loss": 0.0042, "step": 9854 }, { "epoch": 4.599766627771295, "grad_norm": 0.5546875, "learning_rate": 0.00011258572960591154, "loss": 0.0096, "step": 9855 }, { "epoch": 4.600233372228705, "grad_norm": 0.50390625, "learning_rate": 0.00011257117266604436, "loss": 0.0133, "step": 9856 }, { "epoch": 4.600700116686115, "grad_norm": 0.6171875, "learning_rate": 0.0001125566154555059, "loss": 0.0076, "step": 9857 }, { "epoch": 4.601166861143524, "grad_norm": 0.380859375, "learning_rate": 0.00011254205797460962, "loss": 0.0056, "step": 9858 }, { "epoch": 4.6016336056009335, "grad_norm": 0.52734375, "learning_rate": 0.00011252750022366887, "loss": 0.0086, "step": 9859 }, { "epoch": 4.602100350058343, "grad_norm": 0.51171875, "learning_rate": 0.00011251294220299715, "loss": 0.0071, "step": 9860 }, { "epoch": 4.602567094515752, "grad_norm": 0.365234375, "learning_rate": 0.0001124983839129079, "loss": 0.007, "step": 9861 }, { "epoch": 4.603033838973162, "grad_norm": 0.5703125, "learning_rate": 0.0001124838253537146, "loss": 0.006, "step": 9862 }, { "epoch": 4.603500583430572, "grad_norm": 0.47265625, "learning_rate": 0.00011246926652573064, "loss": 0.0095, "step": 9863 }, { "epoch": 4.603967327887982, "grad_norm": 0.470703125, "learning_rate": 0.00011245470742926957, "loss": 0.0073, "step": 9864 }, { "epoch": 4.604434072345391, "grad_norm": 0.65234375, "learning_rate": 0.00011244014806464484, "loss": 0.0151, "step": 9865 }, { "epoch": 4.6049008168028, "grad_norm": 0.39453125, "learning_rate": 0.0001124255884321699, "loss": 0.0046, "step": 9866 }, { "epoch": 4.60536756126021, "grad_norm": 0.462890625, "learning_rate": 0.0001124110285321583, "loss": 0.0103, "step": 9867 }, { "epoch": 4.60583430571762, "grad_norm": 0.5390625, "learning_rate": 0.00011239646836492346, "loss": 0.0056, "step": 9868 }, { "epoch": 4.606301050175029, "grad_norm": 0.48828125, "learning_rate": 0.00011238190793077885, "loss": 0.0181, "step": 9869 }, { "epoch": 4.606767794632439, "grad_norm": 0.44140625, "learning_rate": 0.00011236734723003807, "loss": 0.0075, "step": 9870 }, { "epoch": 4.607234539089848, "grad_norm": 0.5390625, "learning_rate": 0.0001123527862630146, "loss": 0.0066, "step": 9871 }, { "epoch": 4.607701283547258, "grad_norm": 0.66015625, "learning_rate": 0.00011233822503002189, "loss": 0.0093, "step": 9872 }, { "epoch": 4.608168028004667, "grad_norm": 0.52734375, "learning_rate": 0.00011232366353137354, "loss": 0.0106, "step": 9873 }, { "epoch": 4.608634772462077, "grad_norm": 0.494140625, "learning_rate": 0.00011230910176738303, "loss": 0.0083, "step": 9874 }, { "epoch": 4.609101516919487, "grad_norm": 0.75390625, "learning_rate": 0.0001122945397383639, "loss": 0.0201, "step": 9875 }, { "epoch": 4.6095682613768965, "grad_norm": 0.474609375, "learning_rate": 0.00011227997744462968, "loss": 0.0185, "step": 9876 }, { "epoch": 4.610035005834305, "grad_norm": 0.33203125, "learning_rate": 0.00011226541488649393, "loss": 0.0051, "step": 9877 }, { "epoch": 4.610501750291715, "grad_norm": 0.419921875, "learning_rate": 0.00011225085206427019, "loss": 0.0116, "step": 9878 }, { "epoch": 4.610968494749125, "grad_norm": 0.57421875, "learning_rate": 0.000112236288978272, "loss": 0.0084, "step": 9879 }, { "epoch": 4.611435239206535, "grad_norm": 0.40625, "learning_rate": 0.00011222172562881297, "loss": 0.0063, "step": 9880 }, { "epoch": 4.611901983663944, "grad_norm": 0.6328125, "learning_rate": 0.00011220716201620661, "loss": 0.0083, "step": 9881 }, { "epoch": 4.6123687281213535, "grad_norm": 0.578125, "learning_rate": 0.00011219259814076646, "loss": 0.0113, "step": 9882 }, { "epoch": 4.612835472578763, "grad_norm": 0.55859375, "learning_rate": 0.0001121780340028062, "loss": 0.018, "step": 9883 }, { "epoch": 4.613302217036173, "grad_norm": 0.45703125, "learning_rate": 0.00011216346960263934, "loss": 0.0072, "step": 9884 }, { "epoch": 4.613768961493582, "grad_norm": 0.734375, "learning_rate": 0.00011214890494057949, "loss": 0.0101, "step": 9885 }, { "epoch": 4.614235705950992, "grad_norm": 0.404296875, "learning_rate": 0.00011213434001694022, "loss": 0.0048, "step": 9886 }, { "epoch": 4.6147024504084015, "grad_norm": 0.427734375, "learning_rate": 0.00011211977483203515, "loss": 0.0071, "step": 9887 }, { "epoch": 4.615169194865811, "grad_norm": 0.43359375, "learning_rate": 0.0001121052093861779, "loss": 0.0079, "step": 9888 }, { "epoch": 4.61563593932322, "grad_norm": 0.330078125, "learning_rate": 0.00011209064367968203, "loss": 0.0087, "step": 9889 }, { "epoch": 4.61610268378063, "grad_norm": 0.5234375, "learning_rate": 0.00011207607771286121, "loss": 0.0128, "step": 9890 }, { "epoch": 4.61656942823804, "grad_norm": 0.5078125, "learning_rate": 0.00011206151148602906, "loss": 0.0079, "step": 9891 }, { "epoch": 4.61703617269545, "grad_norm": 0.486328125, "learning_rate": 0.00011204694499949915, "loss": 0.0078, "step": 9892 }, { "epoch": 4.6175029171528585, "grad_norm": 0.74609375, "learning_rate": 0.00011203237825358515, "loss": 0.0091, "step": 9893 }, { "epoch": 4.617969661610268, "grad_norm": 0.50390625, "learning_rate": 0.00011201781124860071, "loss": 0.007, "step": 9894 }, { "epoch": 4.618436406067678, "grad_norm": 0.58984375, "learning_rate": 0.00011200324398485943, "loss": 0.0069, "step": 9895 }, { "epoch": 4.618903150525087, "grad_norm": 0.396484375, "learning_rate": 0.00011198867646267502, "loss": 0.0045, "step": 9896 }, { "epoch": 4.619369894982497, "grad_norm": 0.45703125, "learning_rate": 0.0001119741086823611, "loss": 0.0051, "step": 9897 }, { "epoch": 4.619836639439907, "grad_norm": 0.275390625, "learning_rate": 0.00011195954064423133, "loss": 0.005, "step": 9898 }, { "epoch": 4.620303383897316, "grad_norm": 0.71875, "learning_rate": 0.00011194497234859938, "loss": 0.0101, "step": 9899 }, { "epoch": 4.620770128354726, "grad_norm": 0.451171875, "learning_rate": 0.00011193040379577893, "loss": 0.015, "step": 9900 }, { "epoch": 4.621236872812135, "grad_norm": 0.59765625, "learning_rate": 0.00011191583498608364, "loss": 0.0104, "step": 9901 }, { "epoch": 4.621703617269545, "grad_norm": 0.59375, "learning_rate": 0.00011190126591982719, "loss": 0.0078, "step": 9902 }, { "epoch": 4.622170361726955, "grad_norm": 0.474609375, "learning_rate": 0.00011188669659732329, "loss": 0.0174, "step": 9903 }, { "epoch": 4.622637106184364, "grad_norm": 0.765625, "learning_rate": 0.00011187212701888563, "loss": 0.0137, "step": 9904 }, { "epoch": 4.623103850641773, "grad_norm": 0.45703125, "learning_rate": 0.00011185755718482788, "loss": 0.0084, "step": 9905 }, { "epoch": 4.623570595099183, "grad_norm": 0.5078125, "learning_rate": 0.00011184298709546379, "loss": 0.0109, "step": 9906 }, { "epoch": 4.624037339556593, "grad_norm": 0.423828125, "learning_rate": 0.00011182841675110707, "loss": 0.0076, "step": 9907 }, { "epoch": 4.624504084014003, "grad_norm": 0.5625, "learning_rate": 0.00011181384615207138, "loss": 0.0092, "step": 9908 }, { "epoch": 4.624970828471412, "grad_norm": 0.58203125, "learning_rate": 0.00011179927529867048, "loss": 0.0136, "step": 9909 }, { "epoch": 4.6254375729288215, "grad_norm": 0.58984375, "learning_rate": 0.0001117847041912181, "loss": 0.0069, "step": 9910 }, { "epoch": 4.625904317386231, "grad_norm": 0.5703125, "learning_rate": 0.00011177013283002795, "loss": 0.0089, "step": 9911 }, { "epoch": 4.62637106184364, "grad_norm": 0.4453125, "learning_rate": 0.00011175556121541377, "loss": 0.0067, "step": 9912 }, { "epoch": 4.62683780630105, "grad_norm": 0.62109375, "learning_rate": 0.00011174098934768936, "loss": 0.0141, "step": 9913 }, { "epoch": 4.62730455075846, "grad_norm": 0.53125, "learning_rate": 0.00011172641722716839, "loss": 0.008, "step": 9914 }, { "epoch": 4.62777129521587, "grad_norm": 0.5390625, "learning_rate": 0.00011171184485416466, "loss": 0.0082, "step": 9915 }, { "epoch": 4.6282380396732785, "grad_norm": 0.59375, "learning_rate": 0.00011169727222899191, "loss": 0.0144, "step": 9916 }, { "epoch": 4.628704784130688, "grad_norm": 0.49609375, "learning_rate": 0.00011168269935196392, "loss": 0.0053, "step": 9917 }, { "epoch": 4.629171528588098, "grad_norm": 0.64453125, "learning_rate": 0.00011166812622339442, "loss": 0.0154, "step": 9918 }, { "epoch": 4.629638273045508, "grad_norm": 0.53515625, "learning_rate": 0.00011165355284359725, "loss": 0.013, "step": 9919 }, { "epoch": 4.630105017502917, "grad_norm": 0.48828125, "learning_rate": 0.00011163897921288615, "loss": 0.0089, "step": 9920 }, { "epoch": 4.630571761960327, "grad_norm": 0.57421875, "learning_rate": 0.00011162440533157488, "loss": 0.0114, "step": 9921 }, { "epoch": 4.631038506417736, "grad_norm": 0.462890625, "learning_rate": 0.0001116098311999773, "loss": 0.0066, "step": 9922 }, { "epoch": 4.631505250875146, "grad_norm": 0.35546875, "learning_rate": 0.00011159525681840716, "loss": 0.0048, "step": 9923 }, { "epoch": 4.631971995332555, "grad_norm": 0.5546875, "learning_rate": 0.0001115806821871783, "loss": 0.01, "step": 9924 }, { "epoch": 4.632438739789965, "grad_norm": 0.41015625, "learning_rate": 0.00011156610730660446, "loss": 0.0092, "step": 9925 }, { "epoch": 4.632905484247375, "grad_norm": 0.392578125, "learning_rate": 0.00011155153217699955, "loss": 0.0078, "step": 9926 }, { "epoch": 4.6333722287047845, "grad_norm": 0.5546875, "learning_rate": 0.0001115369567986773, "loss": 0.0115, "step": 9927 }, { "epoch": 4.633838973162193, "grad_norm": 0.369140625, "learning_rate": 0.00011152238117195155, "loss": 0.013, "step": 9928 }, { "epoch": 4.634305717619603, "grad_norm": 0.83203125, "learning_rate": 0.00011150780529713619, "loss": 0.0202, "step": 9929 }, { "epoch": 4.634772462077013, "grad_norm": 0.60546875, "learning_rate": 0.00011149322917454501, "loss": 0.0139, "step": 9930 }, { "epoch": 4.635239206534423, "grad_norm": 0.55078125, "learning_rate": 0.00011147865280449181, "loss": 0.0122, "step": 9931 }, { "epoch": 4.635705950991832, "grad_norm": 0.54296875, "learning_rate": 0.00011146407618729053, "loss": 0.0098, "step": 9932 }, { "epoch": 4.6361726954492415, "grad_norm": 0.515625, "learning_rate": 0.00011144949932325497, "loss": 0.0102, "step": 9933 }, { "epoch": 4.636639439906651, "grad_norm": 0.640625, "learning_rate": 0.00011143492221269895, "loss": 0.009, "step": 9934 }, { "epoch": 4.637106184364061, "grad_norm": 0.49609375, "learning_rate": 0.00011142034485593637, "loss": 0.0117, "step": 9935 }, { "epoch": 4.63757292882147, "grad_norm": 0.640625, "learning_rate": 0.00011140576725328114, "loss": 0.0136, "step": 9936 }, { "epoch": 4.63803967327888, "grad_norm": 0.44140625, "learning_rate": 0.00011139118940504704, "loss": 0.0064, "step": 9937 }, { "epoch": 4.63850641773629, "grad_norm": 0.34765625, "learning_rate": 0.00011137661131154801, "loss": 0.0042, "step": 9938 }, { "epoch": 4.6389731621936985, "grad_norm": 0.58203125, "learning_rate": 0.00011136203297309795, "loss": 0.013, "step": 9939 }, { "epoch": 4.639439906651108, "grad_norm": 0.2890625, "learning_rate": 0.00011134745439001069, "loss": 0.0044, "step": 9940 }, { "epoch": 4.639906651108518, "grad_norm": 0.341796875, "learning_rate": 0.00011133287556260011, "loss": 0.0044, "step": 9941 }, { "epoch": 4.640373395565928, "grad_norm": 0.302734375, "learning_rate": 0.00011131829649118018, "loss": 0.0055, "step": 9942 }, { "epoch": 4.640840140023338, "grad_norm": 0.58203125, "learning_rate": 0.00011130371717606475, "loss": 0.0165, "step": 9943 }, { "epoch": 4.641306884480747, "grad_norm": 0.7890625, "learning_rate": 0.00011128913761756776, "loss": 0.018, "step": 9944 }, { "epoch": 4.641773628938156, "grad_norm": 0.78125, "learning_rate": 0.00011127455781600311, "loss": 0.0154, "step": 9945 }, { "epoch": 4.642240373395566, "grad_norm": 0.384765625, "learning_rate": 0.00011125997777168473, "loss": 0.0051, "step": 9946 }, { "epoch": 4.642707117852975, "grad_norm": 0.65625, "learning_rate": 0.00011124539748492651, "loss": 0.013, "step": 9947 }, { "epoch": 4.643173862310385, "grad_norm": 0.64453125, "learning_rate": 0.00011123081695604242, "loss": 0.0206, "step": 9948 }, { "epoch": 4.643640606767795, "grad_norm": 0.482421875, "learning_rate": 0.00011121623618534637, "loss": 0.01, "step": 9949 }, { "epoch": 4.6441073512252045, "grad_norm": 0.53515625, "learning_rate": 0.00011120165517315235, "loss": 0.0083, "step": 9950 }, { "epoch": 4.644574095682614, "grad_norm": 0.474609375, "learning_rate": 0.00011118707391977421, "loss": 0.0097, "step": 9951 }, { "epoch": 4.645040840140023, "grad_norm": 0.421875, "learning_rate": 0.00011117249242552599, "loss": 0.0075, "step": 9952 }, { "epoch": 4.645507584597433, "grad_norm": 0.404296875, "learning_rate": 0.00011115791069072161, "loss": 0.0065, "step": 9953 }, { "epoch": 4.645974329054843, "grad_norm": 0.75390625, "learning_rate": 0.00011114332871567502, "loss": 0.0228, "step": 9954 }, { "epoch": 4.646441073512252, "grad_norm": 0.41015625, "learning_rate": 0.00011112874650070021, "loss": 0.0059, "step": 9955 }, { "epoch": 4.6469078179696615, "grad_norm": 0.6171875, "learning_rate": 0.00011111416404611115, "loss": 0.0061, "step": 9956 }, { "epoch": 4.647374562427071, "grad_norm": 0.4375, "learning_rate": 0.0001110995813522218, "loss": 0.0067, "step": 9957 }, { "epoch": 4.647841306884481, "grad_norm": 0.59375, "learning_rate": 0.00011108499841934612, "loss": 0.0112, "step": 9958 }, { "epoch": 4.64830805134189, "grad_norm": 0.46875, "learning_rate": 0.00011107041524779815, "loss": 0.0058, "step": 9959 }, { "epoch": 4.6487747957993, "grad_norm": 0.8125, "learning_rate": 0.00011105583183789185, "loss": 0.0104, "step": 9960 }, { "epoch": 4.6492415402567095, "grad_norm": 0.4296875, "learning_rate": 0.00011104124818994121, "loss": 0.0092, "step": 9961 }, { "epoch": 4.649708284714119, "grad_norm": 0.40625, "learning_rate": 0.0001110266643042603, "loss": 0.0096, "step": 9962 }, { "epoch": 4.650175029171528, "grad_norm": 0.482421875, "learning_rate": 0.00011101208018116305, "loss": 0.0149, "step": 9963 }, { "epoch": 4.650641773628938, "grad_norm": 0.4921875, "learning_rate": 0.00011099749582096346, "loss": 0.0096, "step": 9964 }, { "epoch": 4.651108518086348, "grad_norm": 0.458984375, "learning_rate": 0.00011098291122397562, "loss": 0.0085, "step": 9965 }, { "epoch": 4.651575262543758, "grad_norm": 0.326171875, "learning_rate": 0.0001109683263905135, "loss": 0.0118, "step": 9966 }, { "epoch": 4.6520420070011665, "grad_norm": 0.78125, "learning_rate": 0.00011095374132089114, "loss": 0.0177, "step": 9967 }, { "epoch": 4.652508751458576, "grad_norm": 0.6953125, "learning_rate": 0.00011093915601542258, "loss": 0.016, "step": 9968 }, { "epoch": 4.652975495915986, "grad_norm": 0.60546875, "learning_rate": 0.00011092457047442186, "loss": 0.0109, "step": 9969 }, { "epoch": 4.653442240373396, "grad_norm": 0.53515625, "learning_rate": 0.000110909984698203, "loss": 0.0098, "step": 9970 }, { "epoch": 4.653908984830805, "grad_norm": 0.609375, "learning_rate": 0.00011089539868708008, "loss": 0.0076, "step": 9971 }, { "epoch": 4.654375729288215, "grad_norm": 0.421875, "learning_rate": 0.00011088081244136715, "loss": 0.0054, "step": 9972 }, { "epoch": 4.654842473745624, "grad_norm": 0.55859375, "learning_rate": 0.00011086622596137824, "loss": 0.0112, "step": 9973 }, { "epoch": 4.655309218203034, "grad_norm": 0.66015625, "learning_rate": 0.00011085163924742741, "loss": 0.0146, "step": 9974 }, { "epoch": 4.655775962660443, "grad_norm": 0.6015625, "learning_rate": 0.00011083705229982877, "loss": 0.015, "step": 9975 }, { "epoch": 4.656242707117853, "grad_norm": 0.66796875, "learning_rate": 0.00011082246511889638, "loss": 0.0106, "step": 9976 }, { "epoch": 4.656709451575263, "grad_norm": 0.453125, "learning_rate": 0.00011080787770494427, "loss": 0.0069, "step": 9977 }, { "epoch": 4.6571761960326725, "grad_norm": 0.58984375, "learning_rate": 0.00011079329005828657, "loss": 0.0084, "step": 9978 }, { "epoch": 4.657642940490081, "grad_norm": 0.40625, "learning_rate": 0.0001107787021792374, "loss": 0.0096, "step": 9979 }, { "epoch": 4.658109684947491, "grad_norm": 0.466796875, "learning_rate": 0.00011076411406811078, "loss": 0.0082, "step": 9980 }, { "epoch": 4.658576429404901, "grad_norm": 0.76953125, "learning_rate": 0.00011074952572522084, "loss": 0.0128, "step": 9981 }, { "epoch": 4.65904317386231, "grad_norm": 0.50390625, "learning_rate": 0.0001107349371508817, "loss": 0.007, "step": 9982 }, { "epoch": 4.65950991831972, "grad_norm": 0.46875, "learning_rate": 0.00011072034834540745, "loss": 0.0132, "step": 9983 }, { "epoch": 4.6599766627771295, "grad_norm": 0.4140625, "learning_rate": 0.00011070575930911217, "loss": 0.0078, "step": 9984 }, { "epoch": 4.660443407234539, "grad_norm": 0.5234375, "learning_rate": 0.00011069117004231007, "loss": 0.012, "step": 9985 }, { "epoch": 4.660910151691949, "grad_norm": 0.34375, "learning_rate": 0.0001106765805453152, "loss": 0.0081, "step": 9986 }, { "epoch": 4.661376896149358, "grad_norm": 0.6640625, "learning_rate": 0.00011066199081844171, "loss": 0.0113, "step": 9987 }, { "epoch": 4.661843640606768, "grad_norm": 0.35546875, "learning_rate": 0.00011064740086200372, "loss": 0.0058, "step": 9988 }, { "epoch": 4.662310385064178, "grad_norm": 0.8125, "learning_rate": 0.00011063281067631536, "loss": 0.0184, "step": 9989 }, { "epoch": 4.6627771295215865, "grad_norm": 0.78125, "learning_rate": 0.0001106182202616908, "loss": 0.0112, "step": 9990 }, { "epoch": 4.663243873978996, "grad_norm": 0.53125, "learning_rate": 0.00011060362961844417, "loss": 0.0116, "step": 9991 }, { "epoch": 4.663710618436406, "grad_norm": 0.55078125, "learning_rate": 0.00011058903874688964, "loss": 0.008, "step": 9992 }, { "epoch": 4.664177362893816, "grad_norm": 0.32421875, "learning_rate": 0.00011057444764734133, "loss": 0.005, "step": 9993 }, { "epoch": 4.664644107351226, "grad_norm": 0.515625, "learning_rate": 0.00011055985632011347, "loss": 0.0082, "step": 9994 }, { "epoch": 4.665110851808635, "grad_norm": 0.5703125, "learning_rate": 0.00011054526476552016, "loss": 0.0089, "step": 9995 }, { "epoch": 4.665577596266044, "grad_norm": 0.77734375, "learning_rate": 0.0001105306729838756, "loss": 0.0111, "step": 9996 }, { "epoch": 4.666044340723454, "grad_norm": 0.326171875, "learning_rate": 0.00011051608097549398, "loss": 0.0045, "step": 9997 }, { "epoch": 4.666511085180863, "grad_norm": 0.62890625, "learning_rate": 0.00011050148874068948, "loss": 0.0127, "step": 9998 }, { "epoch": 4.666977829638273, "grad_norm": 0.494140625, "learning_rate": 0.00011048689627977627, "loss": 0.0074, "step": 9999 }, { "epoch": 4.667444574095683, "grad_norm": 0.390625, "learning_rate": 0.0001104723035930685, "loss": 0.0055, "step": 10000 }, { "epoch": 4.6679113185530925, "grad_norm": 0.53515625, "learning_rate": 0.00011045771068088047, "loss": 0.0119, "step": 10001 }, { "epoch": 4.668378063010501, "grad_norm": 0.484375, "learning_rate": 0.00011044311754352632, "loss": 0.0083, "step": 10002 }, { "epoch": 4.668844807467911, "grad_norm": 0.66015625, "learning_rate": 0.00011042852418132024, "loss": 0.0099, "step": 10003 }, { "epoch": 4.669311551925321, "grad_norm": 0.640625, "learning_rate": 0.00011041393059457647, "loss": 0.0132, "step": 10004 }, { "epoch": 4.669778296382731, "grad_norm": 0.53515625, "learning_rate": 0.00011039933678360923, "loss": 0.0084, "step": 10005 }, { "epoch": 4.67024504084014, "grad_norm": 0.66796875, "learning_rate": 0.00011038474274873274, "loss": 0.0136, "step": 10006 }, { "epoch": 4.6707117852975495, "grad_norm": 0.7109375, "learning_rate": 0.00011037014849026117, "loss": 0.0146, "step": 10007 }, { "epoch": 4.671178529754959, "grad_norm": 0.279296875, "learning_rate": 0.00011035555400850884, "loss": 0.0067, "step": 10008 }, { "epoch": 4.671645274212369, "grad_norm": 0.62109375, "learning_rate": 0.00011034095930378993, "loss": 0.0177, "step": 10009 }, { "epoch": 4.672112018669778, "grad_norm": 0.67578125, "learning_rate": 0.00011032636437641867, "loss": 0.0124, "step": 10010 }, { "epoch": 4.672578763127188, "grad_norm": 0.361328125, "learning_rate": 0.00011031176922670938, "loss": 0.0062, "step": 10011 }, { "epoch": 4.673045507584598, "grad_norm": 0.50390625, "learning_rate": 0.00011029717385497624, "loss": 0.007, "step": 10012 }, { "epoch": 4.673512252042007, "grad_norm": 0.5859375, "learning_rate": 0.0001102825782615335, "loss": 0.0113, "step": 10013 }, { "epoch": 4.673978996499416, "grad_norm": 0.44140625, "learning_rate": 0.00011026798244669546, "loss": 0.0092, "step": 10014 }, { "epoch": 4.674445740956826, "grad_norm": 0.478515625, "learning_rate": 0.00011025338641077636, "loss": 0.0114, "step": 10015 }, { "epoch": 4.674912485414236, "grad_norm": 0.57421875, "learning_rate": 0.00011023879015409047, "loss": 0.0133, "step": 10016 }, { "epoch": 4.675379229871645, "grad_norm": 0.546875, "learning_rate": 0.00011022419367695207, "loss": 0.0146, "step": 10017 }, { "epoch": 4.675845974329055, "grad_norm": 0.5234375, "learning_rate": 0.00011020959697967544, "loss": 0.0074, "step": 10018 }, { "epoch": 4.676312718786464, "grad_norm": 0.66796875, "learning_rate": 0.00011019500006257483, "loss": 0.0112, "step": 10019 }, { "epoch": 4.676779463243874, "grad_norm": 0.4296875, "learning_rate": 0.00011018040292596459, "loss": 0.0048, "step": 10020 }, { "epoch": 4.677246207701284, "grad_norm": 0.451171875, "learning_rate": 0.00011016580557015899, "loss": 0.0086, "step": 10021 }, { "epoch": 4.677712952158693, "grad_norm": 0.62890625, "learning_rate": 0.0001101512079954723, "loss": 0.0173, "step": 10022 }, { "epoch": 4.678179696616103, "grad_norm": 0.55859375, "learning_rate": 0.00011013661020221881, "loss": 0.0125, "step": 10023 }, { "epoch": 4.6786464410735125, "grad_norm": 0.60546875, "learning_rate": 0.0001101220121907129, "loss": 0.0154, "step": 10024 }, { "epoch": 4.679113185530921, "grad_norm": 0.46484375, "learning_rate": 0.00011010741396126882, "loss": 0.0129, "step": 10025 }, { "epoch": 4.679579929988331, "grad_norm": 0.78515625, "learning_rate": 0.0001100928155142009, "loss": 0.0122, "step": 10026 }, { "epoch": 4.680046674445741, "grad_norm": 0.52734375, "learning_rate": 0.00011007821684982347, "loss": 0.0145, "step": 10027 }, { "epoch": 4.680513418903151, "grad_norm": 0.47265625, "learning_rate": 0.00011006361796845083, "loss": 0.0082, "step": 10028 }, { "epoch": 4.6809801633605606, "grad_norm": 0.314453125, "learning_rate": 0.00011004901887039734, "loss": 0.0098, "step": 10029 }, { "epoch": 4.6814469078179695, "grad_norm": 0.60546875, "learning_rate": 0.00011003441955597733, "loss": 0.0178, "step": 10030 }, { "epoch": 4.681913652275379, "grad_norm": 0.4375, "learning_rate": 0.00011001982002550515, "loss": 0.006, "step": 10031 }, { "epoch": 4.682380396732789, "grad_norm": 0.6328125, "learning_rate": 0.00011000522027929512, "loss": 0.0103, "step": 10032 }, { "epoch": 4.682847141190198, "grad_norm": 0.255859375, "learning_rate": 0.00010999062031766156, "loss": 0.0032, "step": 10033 }, { "epoch": 4.683313885647608, "grad_norm": 0.390625, "learning_rate": 0.00010997602014091893, "loss": 0.0076, "step": 10034 }, { "epoch": 4.6837806301050176, "grad_norm": 0.6171875, "learning_rate": 0.00010996141974938145, "loss": 0.0099, "step": 10035 }, { "epoch": 4.684247374562427, "grad_norm": 0.447265625, "learning_rate": 0.00010994681914336358, "loss": 0.011, "step": 10036 }, { "epoch": 4.684714119019836, "grad_norm": 0.51171875, "learning_rate": 0.00010993221832317966, "loss": 0.0131, "step": 10037 }, { "epoch": 4.685180863477246, "grad_norm": 0.671875, "learning_rate": 0.00010991761728914405, "loss": 0.012, "step": 10038 }, { "epoch": 4.685647607934656, "grad_norm": 0.458984375, "learning_rate": 0.00010990301604157114, "loss": 0.0117, "step": 10039 }, { "epoch": 4.686114352392066, "grad_norm": 0.54296875, "learning_rate": 0.0001098884145807753, "loss": 0.0128, "step": 10040 }, { "epoch": 4.6865810968494745, "grad_norm": 0.76953125, "learning_rate": 0.00010987381290707093, "loss": 0.0217, "step": 10041 }, { "epoch": 4.687047841306884, "grad_norm": 0.46484375, "learning_rate": 0.0001098592110207724, "loss": 0.0057, "step": 10042 }, { "epoch": 4.687514585764294, "grad_norm": 0.6015625, "learning_rate": 0.00010984460892219415, "loss": 0.0137, "step": 10043 }, { "epoch": 4.687981330221704, "grad_norm": 0.7421875, "learning_rate": 0.00010983000661165053, "loss": 0.0133, "step": 10044 }, { "epoch": 4.688448074679113, "grad_norm": 0.57421875, "learning_rate": 0.00010981540408945593, "loss": 0.0113, "step": 10045 }, { "epoch": 4.688914819136523, "grad_norm": 0.3984375, "learning_rate": 0.00010980080135592483, "loss": 0.004, "step": 10046 }, { "epoch": 4.689381563593932, "grad_norm": 0.43359375, "learning_rate": 0.00010978619841137158, "loss": 0.0157, "step": 10047 }, { "epoch": 4.689848308051342, "grad_norm": 0.60546875, "learning_rate": 0.00010977159525611065, "loss": 0.0101, "step": 10048 }, { "epoch": 4.690315052508751, "grad_norm": 0.5078125, "learning_rate": 0.00010975699189045638, "loss": 0.008, "step": 10049 }, { "epoch": 4.690781796966161, "grad_norm": 0.515625, "learning_rate": 0.00010974238831472329, "loss": 0.0159, "step": 10050 }, { "epoch": 4.691248541423571, "grad_norm": 0.34765625, "learning_rate": 0.00010972778452922576, "loss": 0.0109, "step": 10051 }, { "epoch": 4.6917152858809805, "grad_norm": 0.369140625, "learning_rate": 0.00010971318053427824, "loss": 0.0055, "step": 10052 }, { "epoch": 4.692182030338389, "grad_norm": 0.58984375, "learning_rate": 0.00010969857633019516, "loss": 0.0129, "step": 10053 }, { "epoch": 4.692648774795799, "grad_norm": 0.447265625, "learning_rate": 0.00010968397191729098, "loss": 0.0078, "step": 10054 }, { "epoch": 4.693115519253209, "grad_norm": 0.69921875, "learning_rate": 0.00010966936729588011, "loss": 0.0129, "step": 10055 }, { "epoch": 4.693582263710619, "grad_norm": 0.58203125, "learning_rate": 0.00010965476246627708, "loss": 0.0131, "step": 10056 }, { "epoch": 4.694049008168028, "grad_norm": 0.53125, "learning_rate": 0.00010964015742879628, "loss": 0.0097, "step": 10057 }, { "epoch": 4.6945157526254375, "grad_norm": 0.8359375, "learning_rate": 0.0001096255521837522, "loss": 0.0236, "step": 10058 }, { "epoch": 4.694982497082847, "grad_norm": 0.412109375, "learning_rate": 0.0001096109467314593, "loss": 0.0069, "step": 10059 }, { "epoch": 4.695449241540256, "grad_norm": 0.412109375, "learning_rate": 0.00010959634107223206, "loss": 0.008, "step": 10060 }, { "epoch": 4.695915985997666, "grad_norm": 0.5234375, "learning_rate": 0.00010958173520638493, "loss": 0.0131, "step": 10061 }, { "epoch": 4.696382730455076, "grad_norm": 0.546875, "learning_rate": 0.00010956712913423242, "loss": 0.0077, "step": 10062 }, { "epoch": 4.696849474912486, "grad_norm": 0.490234375, "learning_rate": 0.00010955252285608902, "loss": 0.0069, "step": 10063 }, { "epoch": 4.697316219369895, "grad_norm": 0.37890625, "learning_rate": 0.00010953791637226918, "loss": 0.007, "step": 10064 }, { "epoch": 4.697782963827304, "grad_norm": 0.4609375, "learning_rate": 0.00010952330968308742, "loss": 0.0084, "step": 10065 }, { "epoch": 4.698249708284714, "grad_norm": 0.5234375, "learning_rate": 0.00010950870278885823, "loss": 0.0074, "step": 10066 }, { "epoch": 4.698716452742124, "grad_norm": 0.431640625, "learning_rate": 0.00010949409568989614, "loss": 0.0084, "step": 10067 }, { "epoch": 4.699183197199533, "grad_norm": 0.55078125, "learning_rate": 0.00010947948838651561, "loss": 0.0127, "step": 10068 }, { "epoch": 4.699649941656943, "grad_norm": 0.50390625, "learning_rate": 0.0001094648808790312, "loss": 0.0068, "step": 10069 }, { "epoch": 4.700116686114352, "grad_norm": 0.66015625, "learning_rate": 0.0001094502731677574, "loss": 0.0178, "step": 10070 }, { "epoch": 4.700583430571762, "grad_norm": 0.5, "learning_rate": 0.00010943566525300875, "loss": 0.0104, "step": 10071 }, { "epoch": 4.701050175029172, "grad_norm": 0.6640625, "learning_rate": 0.00010942105713509972, "loss": 0.0102, "step": 10072 }, { "epoch": 4.701516919486581, "grad_norm": 0.447265625, "learning_rate": 0.0001094064488143449, "loss": 0.0064, "step": 10073 }, { "epoch": 4.701983663943991, "grad_norm": 0.466796875, "learning_rate": 0.0001093918402910588, "loss": 0.0062, "step": 10074 }, { "epoch": 4.7024504084014005, "grad_norm": 0.40625, "learning_rate": 0.00010937723156555595, "loss": 0.0051, "step": 10075 }, { "epoch": 4.702917152858809, "grad_norm": 0.7109375, "learning_rate": 0.00010936262263815089, "loss": 0.0109, "step": 10076 }, { "epoch": 4.703383897316219, "grad_norm": 0.8125, "learning_rate": 0.0001093480135091582, "loss": 0.0102, "step": 10077 }, { "epoch": 4.703850641773629, "grad_norm": 0.5390625, "learning_rate": 0.0001093334041788924, "loss": 0.0163, "step": 10078 }, { "epoch": 4.704317386231039, "grad_norm": 0.38671875, "learning_rate": 0.00010931879464766806, "loss": 0.0083, "step": 10079 }, { "epoch": 4.704784130688448, "grad_norm": 0.65625, "learning_rate": 0.00010930418491579972, "loss": 0.0121, "step": 10080 }, { "epoch": 4.7052508751458575, "grad_norm": 0.451171875, "learning_rate": 0.00010928957498360195, "loss": 0.0061, "step": 10081 }, { "epoch": 4.705717619603267, "grad_norm": 1.890625, "learning_rate": 0.00010927496485138935, "loss": 0.0091, "step": 10082 }, { "epoch": 4.706184364060677, "grad_norm": 0.56640625, "learning_rate": 0.00010926035451947645, "loss": 0.0119, "step": 10083 }, { "epoch": 4.706651108518086, "grad_norm": 0.578125, "learning_rate": 0.00010924574398817782, "loss": 0.0085, "step": 10084 }, { "epoch": 4.707117852975496, "grad_norm": 0.78515625, "learning_rate": 0.00010923113325780809, "loss": 0.0167, "step": 10085 }, { "epoch": 4.707584597432906, "grad_norm": 0.53125, "learning_rate": 0.00010921652232868182, "loss": 0.0155, "step": 10086 }, { "epoch": 4.708051341890315, "grad_norm": 0.54296875, "learning_rate": 0.0001092019112011136, "loss": 0.0083, "step": 10087 }, { "epoch": 4.708518086347724, "grad_norm": 0.71484375, "learning_rate": 0.000109187299875418, "loss": 0.018, "step": 10088 }, { "epoch": 4.708984830805134, "grad_norm": 0.39453125, "learning_rate": 0.00010917268835190966, "loss": 0.0075, "step": 10089 }, { "epoch": 4.709451575262544, "grad_norm": 0.478515625, "learning_rate": 0.00010915807663090316, "loss": 0.0094, "step": 10090 }, { "epoch": 4.709918319719954, "grad_norm": 0.48828125, "learning_rate": 0.00010914346471271309, "loss": 0.011, "step": 10091 }, { "epoch": 4.710385064177363, "grad_norm": 0.416015625, "learning_rate": 0.0001091288525976541, "loss": 0.0092, "step": 10092 }, { "epoch": 4.710851808634772, "grad_norm": 0.62890625, "learning_rate": 0.00010911424028604078, "loss": 0.0097, "step": 10093 }, { "epoch": 4.711318553092182, "grad_norm": 0.6328125, "learning_rate": 0.00010909962777818774, "loss": 0.0117, "step": 10094 }, { "epoch": 4.711785297549592, "grad_norm": 0.5703125, "learning_rate": 0.00010908501507440965, "loss": 0.011, "step": 10095 }, { "epoch": 4.712252042007001, "grad_norm": 0.578125, "learning_rate": 0.0001090704021750211, "loss": 0.0161, "step": 10096 }, { "epoch": 4.712718786464411, "grad_norm": 0.51171875, "learning_rate": 0.00010905578908033671, "loss": 0.018, "step": 10097 }, { "epoch": 4.7131855309218205, "grad_norm": 0.76171875, "learning_rate": 0.00010904117579067112, "loss": 0.0092, "step": 10098 }, { "epoch": 4.71365227537923, "grad_norm": 0.59765625, "learning_rate": 0.000109026562306339, "loss": 0.0115, "step": 10099 }, { "epoch": 4.714119019836639, "grad_norm": 0.4765625, "learning_rate": 0.000109011948627655, "loss": 0.0103, "step": 10100 }, { "epoch": 4.714585764294049, "grad_norm": 0.5625, "learning_rate": 0.00010899733475493372, "loss": 0.0093, "step": 10101 }, { "epoch": 4.715052508751459, "grad_norm": 0.447265625, "learning_rate": 0.00010898272068848985, "loss": 0.009, "step": 10102 }, { "epoch": 4.715519253208868, "grad_norm": 0.427734375, "learning_rate": 0.00010896810642863804, "loss": 0.0075, "step": 10103 }, { "epoch": 4.7159859976662775, "grad_norm": 0.7734375, "learning_rate": 0.00010895349197569293, "loss": 0.0118, "step": 10104 }, { "epoch": 4.716452742123687, "grad_norm": 0.490234375, "learning_rate": 0.00010893887732996922, "loss": 0.0069, "step": 10105 }, { "epoch": 4.716919486581097, "grad_norm": 0.99609375, "learning_rate": 0.00010892426249178158, "loss": 0.014, "step": 10106 }, { "epoch": 4.717386231038507, "grad_norm": 0.54296875, "learning_rate": 0.00010890964746144463, "loss": 0.0094, "step": 10107 }, { "epoch": 4.717852975495916, "grad_norm": 0.71875, "learning_rate": 0.00010889503223927311, "loss": 0.0169, "step": 10108 }, { "epoch": 4.7183197199533256, "grad_norm": 0.63671875, "learning_rate": 0.00010888041682558167, "loss": 0.0144, "step": 10109 }, { "epoch": 4.718786464410735, "grad_norm": 0.57421875, "learning_rate": 0.000108865801220685, "loss": 0.0104, "step": 10110 }, { "epoch": 4.719253208868144, "grad_norm": 0.490234375, "learning_rate": 0.00010885118542489779, "loss": 0.0104, "step": 10111 }, { "epoch": 4.719719953325554, "grad_norm": 0.458984375, "learning_rate": 0.00010883656943853475, "loss": 0.0086, "step": 10112 }, { "epoch": 4.720186697782964, "grad_norm": 0.70703125, "learning_rate": 0.00010882195326191056, "loss": 0.017, "step": 10113 }, { "epoch": 4.720653442240374, "grad_norm": 0.3671875, "learning_rate": 0.0001088073368953399, "loss": 0.005, "step": 10114 }, { "epoch": 4.721120186697783, "grad_norm": 0.69140625, "learning_rate": 0.00010879272033913754, "loss": 0.014, "step": 10115 }, { "epoch": 4.721586931155192, "grad_norm": 0.38671875, "learning_rate": 0.00010877810359361815, "loss": 0.0109, "step": 10116 }, { "epoch": 4.722053675612602, "grad_norm": 0.47265625, "learning_rate": 0.00010876348665909643, "loss": 0.0084, "step": 10117 }, { "epoch": 4.722520420070012, "grad_norm": 0.62890625, "learning_rate": 0.00010874886953588714, "loss": 0.0113, "step": 10118 }, { "epoch": 4.722987164527421, "grad_norm": 0.5234375, "learning_rate": 0.000108734252224305, "loss": 0.0071, "step": 10119 }, { "epoch": 4.723453908984831, "grad_norm": 0.6171875, "learning_rate": 0.00010871963472466466, "loss": 0.0165, "step": 10120 }, { "epoch": 4.72392065344224, "grad_norm": 0.494140625, "learning_rate": 0.00010870501703728098, "loss": 0.0067, "step": 10121 }, { "epoch": 4.72438739789965, "grad_norm": 0.62890625, "learning_rate": 0.00010869039916246859, "loss": 0.0154, "step": 10122 }, { "epoch": 4.724854142357059, "grad_norm": 0.373046875, "learning_rate": 0.00010867578110054226, "loss": 0.0059, "step": 10123 }, { "epoch": 4.725320886814469, "grad_norm": 0.58984375, "learning_rate": 0.00010866116285181673, "loss": 0.0114, "step": 10124 }, { "epoch": 4.725787631271879, "grad_norm": 0.6640625, "learning_rate": 0.00010864654441660676, "loss": 0.014, "step": 10125 }, { "epoch": 4.7262543757292885, "grad_norm": 0.453125, "learning_rate": 0.00010863192579522711, "loss": 0.0106, "step": 10126 }, { "epoch": 4.726721120186697, "grad_norm": 0.466796875, "learning_rate": 0.0001086173069879925, "loss": 0.0084, "step": 10127 }, { "epoch": 4.727187864644107, "grad_norm": 0.408203125, "learning_rate": 0.00010860268799521774, "loss": 0.0061, "step": 10128 }, { "epoch": 4.727654609101517, "grad_norm": 0.53515625, "learning_rate": 0.00010858806881721752, "loss": 0.0098, "step": 10129 }, { "epoch": 4.728121353558927, "grad_norm": 0.318359375, "learning_rate": 0.00010857344945430669, "loss": 0.0081, "step": 10130 }, { "epoch": 4.728588098016336, "grad_norm": 0.5859375, "learning_rate": 0.00010855882990679998, "loss": 0.0094, "step": 10131 }, { "epoch": 4.7290548424737455, "grad_norm": 0.41015625, "learning_rate": 0.00010854421017501216, "loss": 0.0044, "step": 10132 }, { "epoch": 4.729521586931155, "grad_norm": 0.57421875, "learning_rate": 0.00010852959025925798, "loss": 0.0106, "step": 10133 }, { "epoch": 4.729988331388565, "grad_norm": 0.55078125, "learning_rate": 0.00010851497015985229, "loss": 0.0075, "step": 10134 }, { "epoch": 4.730455075845974, "grad_norm": 0.703125, "learning_rate": 0.00010850034987710982, "loss": 0.0102, "step": 10135 }, { "epoch": 4.730921820303384, "grad_norm": 0.48046875, "learning_rate": 0.00010848572941134543, "loss": 0.0093, "step": 10136 }, { "epoch": 4.731388564760794, "grad_norm": 0.5, "learning_rate": 0.0001084711087628738, "loss": 0.0075, "step": 10137 }, { "epoch": 4.731855309218203, "grad_norm": 0.3828125, "learning_rate": 0.00010845648793200987, "loss": 0.0051, "step": 10138 }, { "epoch": 4.732322053675612, "grad_norm": 0.6328125, "learning_rate": 0.00010844186691906835, "loss": 0.0151, "step": 10139 }, { "epoch": 4.732788798133022, "grad_norm": 0.51171875, "learning_rate": 0.00010842724572436404, "loss": 0.0106, "step": 10140 }, { "epoch": 4.733255542590432, "grad_norm": 0.59375, "learning_rate": 0.00010841262434821182, "loss": 0.0111, "step": 10141 }, { "epoch": 4.733722287047842, "grad_norm": 0.62109375, "learning_rate": 0.00010839800279092644, "loss": 0.0097, "step": 10142 }, { "epoch": 4.734189031505251, "grad_norm": 0.5703125, "learning_rate": 0.00010838338105282273, "loss": 0.0133, "step": 10143 }, { "epoch": 4.73465577596266, "grad_norm": 0.65625, "learning_rate": 0.00010836875913421556, "loss": 0.0096, "step": 10144 }, { "epoch": 4.73512252042007, "grad_norm": 0.55859375, "learning_rate": 0.0001083541370354197, "loss": 0.0104, "step": 10145 }, { "epoch": 4.735589264877479, "grad_norm": 0.65234375, "learning_rate": 0.00010833951475675001, "loss": 0.0112, "step": 10146 }, { "epoch": 4.736056009334889, "grad_norm": 0.388671875, "learning_rate": 0.0001083248922985213, "loss": 0.0071, "step": 10147 }, { "epoch": 4.736522753792299, "grad_norm": 0.57421875, "learning_rate": 0.00010831026966104845, "loss": 0.0105, "step": 10148 }, { "epoch": 4.7369894982497085, "grad_norm": 0.61328125, "learning_rate": 0.00010829564684464627, "loss": 0.0071, "step": 10149 }, { "epoch": 4.737456242707118, "grad_norm": 0.4609375, "learning_rate": 0.00010828102384962955, "loss": 0.0149, "step": 10150 }, { "epoch": 4.737922987164527, "grad_norm": 0.60546875, "learning_rate": 0.00010826640067631326, "loss": 0.0095, "step": 10151 }, { "epoch": 4.738389731621937, "grad_norm": 0.6171875, "learning_rate": 0.00010825177732501217, "loss": 0.0177, "step": 10152 }, { "epoch": 4.738856476079347, "grad_norm": 0.66015625, "learning_rate": 0.00010823715379604116, "loss": 0.0153, "step": 10153 }, { "epoch": 4.739323220536756, "grad_norm": 0.439453125, "learning_rate": 0.00010822253008971512, "loss": 0.0062, "step": 10154 }, { "epoch": 4.7397899649941655, "grad_norm": 0.66796875, "learning_rate": 0.00010820790620634887, "loss": 0.0099, "step": 10155 }, { "epoch": 4.740256709451575, "grad_norm": 0.6171875, "learning_rate": 0.00010819328214625726, "loss": 0.0204, "step": 10156 }, { "epoch": 4.740723453908985, "grad_norm": 0.54296875, "learning_rate": 0.0001081786579097552, "loss": 0.0096, "step": 10157 }, { "epoch": 4.741190198366395, "grad_norm": 0.71484375, "learning_rate": 0.0001081640334971576, "loss": 0.0082, "step": 10158 }, { "epoch": 4.741656942823804, "grad_norm": 0.6015625, "learning_rate": 0.00010814940890877924, "loss": 0.0138, "step": 10159 }, { "epoch": 4.742123687281214, "grad_norm": 0.4765625, "learning_rate": 0.00010813478414493512, "loss": 0.0085, "step": 10160 }, { "epoch": 4.742590431738623, "grad_norm": 0.62890625, "learning_rate": 0.00010812015920594006, "loss": 0.0111, "step": 10161 }, { "epoch": 4.743057176196032, "grad_norm": 0.56640625, "learning_rate": 0.00010810553409210895, "loss": 0.0186, "step": 10162 }, { "epoch": 4.743523920653442, "grad_norm": 0.50390625, "learning_rate": 0.0001080909088037567, "loss": 0.0085, "step": 10163 }, { "epoch": 4.743990665110852, "grad_norm": 0.412109375, "learning_rate": 0.00010807628334119821, "loss": 0.0056, "step": 10164 }, { "epoch": 4.744457409568262, "grad_norm": 0.71484375, "learning_rate": 0.00010806165770474839, "loss": 0.019, "step": 10165 }, { "epoch": 4.744924154025671, "grad_norm": 0.5234375, "learning_rate": 0.0001080470318947221, "loss": 0.0083, "step": 10166 }, { "epoch": 4.74539089848308, "grad_norm": 0.48828125, "learning_rate": 0.0001080324059114343, "loss": 0.0088, "step": 10167 }, { "epoch": 4.74585764294049, "grad_norm": 1.1484375, "learning_rate": 0.00010801777975519991, "loss": 0.0155, "step": 10168 }, { "epoch": 4.7463243873979, "grad_norm": 0.6328125, "learning_rate": 0.0001080031534263338, "loss": 0.014, "step": 10169 }, { "epoch": 4.746791131855309, "grad_norm": 0.451171875, "learning_rate": 0.00010798852692515093, "loss": 0.0107, "step": 10170 }, { "epoch": 4.747257876312719, "grad_norm": 0.71875, "learning_rate": 0.00010797390025196623, "loss": 0.0115, "step": 10171 }, { "epoch": 4.7477246207701285, "grad_norm": 0.458984375, "learning_rate": 0.0001079592734070946, "loss": 0.0085, "step": 10172 }, { "epoch": 4.748191365227538, "grad_norm": 0.431640625, "learning_rate": 0.00010794464639085096, "loss": 0.0067, "step": 10173 }, { "epoch": 4.748658109684947, "grad_norm": 0.62890625, "learning_rate": 0.00010793001920355028, "loss": 0.0096, "step": 10174 }, { "epoch": 4.749124854142357, "grad_norm": 0.498046875, "learning_rate": 0.0001079153918455075, "loss": 0.0117, "step": 10175 }, { "epoch": 4.749591598599767, "grad_norm": 0.37109375, "learning_rate": 0.00010790076431703751, "loss": 0.0073, "step": 10176 }, { "epoch": 4.750058343057177, "grad_norm": 0.625, "learning_rate": 0.00010788613661845535, "loss": 0.01, "step": 10177 }, { "epoch": 4.7505250875145855, "grad_norm": 0.373046875, "learning_rate": 0.00010787150875007592, "loss": 0.0048, "step": 10178 }, { "epoch": 4.750991831971995, "grad_norm": 0.482421875, "learning_rate": 0.00010785688071221414, "loss": 0.0105, "step": 10179 }, { "epoch": 4.751458576429405, "grad_norm": 0.435546875, "learning_rate": 0.00010784225250518503, "loss": 0.0041, "step": 10180 }, { "epoch": 4.751925320886815, "grad_norm": 0.59765625, "learning_rate": 0.00010782762412930349, "loss": 0.0107, "step": 10181 }, { "epoch": 4.752392065344224, "grad_norm": 0.734375, "learning_rate": 0.00010781299558488452, "loss": 0.0112, "step": 10182 }, { "epoch": 4.7528588098016336, "grad_norm": 0.61328125, "learning_rate": 0.00010779836687224312, "loss": 0.0226, "step": 10183 }, { "epoch": 4.753325554259043, "grad_norm": 0.51953125, "learning_rate": 0.00010778373799169421, "loss": 0.0107, "step": 10184 }, { "epoch": 4.753325554259043, "eval_loss": 1.990020513534546, "eval_runtime": 55.9398, "eval_samples_per_second": 32.249, "eval_steps_per_second": 4.04, "step": 10184 }, { "epoch": 4.753792298716453, "grad_norm": 0.55078125, "learning_rate": 0.00010776910894355279, "loss": 0.0116, "step": 10185 }, { "epoch": 4.754259043173862, "grad_norm": 0.6015625, "learning_rate": 0.00010775447972813379, "loss": 0.0104, "step": 10186 }, { "epoch": 4.754725787631272, "grad_norm": 0.55078125, "learning_rate": 0.00010773985034575229, "loss": 0.0157, "step": 10187 }, { "epoch": 4.755192532088682, "grad_norm": 0.796875, "learning_rate": 0.00010772522079672322, "loss": 0.0149, "step": 10188 }, { "epoch": 4.7556592765460906, "grad_norm": 0.4921875, "learning_rate": 0.00010771059108136155, "loss": 0.007, "step": 10189 }, { "epoch": 4.7561260210035, "grad_norm": 0.4296875, "learning_rate": 0.00010769596119998231, "loss": 0.005, "step": 10190 }, { "epoch": 4.75659276546091, "grad_norm": 0.9765625, "learning_rate": 0.00010768133115290051, "loss": 0.0109, "step": 10191 }, { "epoch": 4.75705950991832, "grad_norm": 0.482421875, "learning_rate": 0.00010766670094043107, "loss": 0.0154, "step": 10192 }, { "epoch": 4.75752625437573, "grad_norm": 0.6328125, "learning_rate": 0.0001076520705628891, "loss": 0.0126, "step": 10193 }, { "epoch": 4.757992998833139, "grad_norm": 0.5546875, "learning_rate": 0.00010763744002058957, "loss": 0.0097, "step": 10194 }, { "epoch": 4.758459743290548, "grad_norm": 0.58984375, "learning_rate": 0.00010762280931384747, "loss": 0.0089, "step": 10195 }, { "epoch": 4.758926487747958, "grad_norm": 0.388671875, "learning_rate": 0.00010760817844297779, "loss": 0.0098, "step": 10196 }, { "epoch": 4.759393232205367, "grad_norm": 0.66015625, "learning_rate": 0.00010759354740829564, "loss": 0.0184, "step": 10197 }, { "epoch": 4.759859976662777, "grad_norm": 0.404296875, "learning_rate": 0.00010757891621011597, "loss": 0.0116, "step": 10198 }, { "epoch": 4.760326721120187, "grad_norm": 0.462890625, "learning_rate": 0.0001075642848487538, "loss": 0.0102, "step": 10199 }, { "epoch": 4.7607934655775965, "grad_norm": 0.6484375, "learning_rate": 0.00010754965332452422, "loss": 0.0086, "step": 10200 }, { "epoch": 4.761260210035006, "grad_norm": 0.64453125, "learning_rate": 0.00010753502163774221, "loss": 0.012, "step": 10201 }, { "epoch": 4.761726954492415, "grad_norm": 0.64453125, "learning_rate": 0.00010752038978872282, "loss": 0.0113, "step": 10202 }, { "epoch": 4.762193698949825, "grad_norm": 0.5, "learning_rate": 0.00010750575777778114, "loss": 0.008, "step": 10203 }, { "epoch": 4.762660443407235, "grad_norm": 0.59765625, "learning_rate": 0.00010749112560523211, "loss": 0.0076, "step": 10204 }, { "epoch": 4.763127187864644, "grad_norm": 0.416015625, "learning_rate": 0.00010747649327139084, "loss": 0.0092, "step": 10205 }, { "epoch": 4.7635939323220535, "grad_norm": 1.8125, "learning_rate": 0.00010746186077657241, "loss": 0.0072, "step": 10206 }, { "epoch": 4.764060676779463, "grad_norm": 0.58203125, "learning_rate": 0.00010744722812109183, "loss": 0.0096, "step": 10207 }, { "epoch": 4.764527421236873, "grad_norm": 0.69140625, "learning_rate": 0.00010743259530526413, "loss": 0.0151, "step": 10208 }, { "epoch": 4.764994165694282, "grad_norm": 0.6171875, "learning_rate": 0.00010741796232940441, "loss": 0.0187, "step": 10209 }, { "epoch": 4.765460910151692, "grad_norm": 0.71484375, "learning_rate": 0.00010740332919382776, "loss": 0.0171, "step": 10210 }, { "epoch": 4.765927654609102, "grad_norm": 0.5546875, "learning_rate": 0.00010738869589884922, "loss": 0.0081, "step": 10211 }, { "epoch": 4.766394399066511, "grad_norm": 1.0625, "learning_rate": 0.00010737406244478383, "loss": 0.0147, "step": 10212 }, { "epoch": 4.76686114352392, "grad_norm": 0.7734375, "learning_rate": 0.00010735942883194672, "loss": 0.0225, "step": 10213 }, { "epoch": 4.76732788798133, "grad_norm": 0.75, "learning_rate": 0.00010734479506065294, "loss": 0.0121, "step": 10214 }, { "epoch": 4.76779463243874, "grad_norm": 0.578125, "learning_rate": 0.00010733016113121754, "loss": 0.0177, "step": 10215 }, { "epoch": 4.76826137689615, "grad_norm": 0.7421875, "learning_rate": 0.00010731552704395566, "loss": 0.0206, "step": 10216 }, { "epoch": 4.768728121353559, "grad_norm": 0.482421875, "learning_rate": 0.00010730089279918238, "loss": 0.0074, "step": 10217 }, { "epoch": 4.769194865810968, "grad_norm": 0.62109375, "learning_rate": 0.00010728625839721275, "loss": 0.0112, "step": 10218 }, { "epoch": 4.769661610268378, "grad_norm": 0.703125, "learning_rate": 0.00010727162383836193, "loss": 0.017, "step": 10219 }, { "epoch": 4.770128354725788, "grad_norm": 0.5546875, "learning_rate": 0.00010725698912294497, "loss": 0.0114, "step": 10220 }, { "epoch": 4.770595099183197, "grad_norm": 0.51953125, "learning_rate": 0.00010724235425127698, "loss": 0.0134, "step": 10221 }, { "epoch": 4.771061843640607, "grad_norm": 0.5546875, "learning_rate": 0.00010722771922367303, "loss": 0.0113, "step": 10222 }, { "epoch": 4.7715285880980165, "grad_norm": 0.7265625, "learning_rate": 0.0001072130840404483, "loss": 0.0203, "step": 10223 }, { "epoch": 4.771995332555425, "grad_norm": 0.75390625, "learning_rate": 0.00010719844870191789, "loss": 0.0094, "step": 10224 }, { "epoch": 4.772462077012835, "grad_norm": 0.6953125, "learning_rate": 0.00010718381320839683, "loss": 0.0218, "step": 10225 }, { "epoch": 4.772928821470245, "grad_norm": 0.60546875, "learning_rate": 0.00010716917756020038, "loss": 0.0161, "step": 10226 }, { "epoch": 4.773395565927655, "grad_norm": 0.59765625, "learning_rate": 0.00010715454175764355, "loss": 0.0072, "step": 10227 }, { "epoch": 4.773862310385065, "grad_norm": 0.69921875, "learning_rate": 0.00010713990580104145, "loss": 0.0107, "step": 10228 }, { "epoch": 4.7743290548424735, "grad_norm": 0.44921875, "learning_rate": 0.0001071252696907093, "loss": 0.0076, "step": 10229 }, { "epoch": 4.774795799299883, "grad_norm": 0.71875, "learning_rate": 0.00010711063342696219, "loss": 0.0136, "step": 10230 }, { "epoch": 4.775262543757293, "grad_norm": 0.703125, "learning_rate": 0.00010709599701011523, "loss": 0.0111, "step": 10231 }, { "epoch": 4.775729288214702, "grad_norm": 0.55859375, "learning_rate": 0.0001070813604404836, "loss": 0.0118, "step": 10232 }, { "epoch": 4.776196032672112, "grad_norm": 0.63671875, "learning_rate": 0.00010706672371838242, "loss": 0.0121, "step": 10233 }, { "epoch": 4.776662777129522, "grad_norm": 0.50390625, "learning_rate": 0.00010705208684412684, "loss": 0.008, "step": 10234 }, { "epoch": 4.777129521586931, "grad_norm": 0.83203125, "learning_rate": 0.00010703744981803201, "loss": 0.0185, "step": 10235 }, { "epoch": 4.777596266044341, "grad_norm": 0.578125, "learning_rate": 0.00010702281264041307, "loss": 0.0107, "step": 10236 }, { "epoch": 4.77806301050175, "grad_norm": 0.77734375, "learning_rate": 0.00010700817531158518, "loss": 0.0105, "step": 10237 }, { "epoch": 4.77852975495916, "grad_norm": 0.53515625, "learning_rate": 0.00010699353783186347, "loss": 0.0119, "step": 10238 }, { "epoch": 4.77899649941657, "grad_norm": 0.380859375, "learning_rate": 0.00010697890020156319, "loss": 0.0067, "step": 10239 }, { "epoch": 4.779463243873979, "grad_norm": 0.70703125, "learning_rate": 0.00010696426242099941, "loss": 0.0111, "step": 10240 }, { "epoch": 4.779929988331388, "grad_norm": 0.71875, "learning_rate": 0.00010694962449048733, "loss": 0.0174, "step": 10241 }, { "epoch": 4.780396732788798, "grad_norm": 0.703125, "learning_rate": 0.00010693498641034214, "loss": 0.0109, "step": 10242 }, { "epoch": 4.780863477246208, "grad_norm": 0.62890625, "learning_rate": 0.00010692034818087901, "loss": 0.0106, "step": 10243 }, { "epoch": 4.781330221703618, "grad_norm": 0.373046875, "learning_rate": 0.0001069057098024131, "loss": 0.0062, "step": 10244 }, { "epoch": 4.781796966161027, "grad_norm": 0.50390625, "learning_rate": 0.00010689107127525954, "loss": 0.0094, "step": 10245 }, { "epoch": 4.7822637106184365, "grad_norm": 0.6328125, "learning_rate": 0.00010687643259973363, "loss": 0.0184, "step": 10246 }, { "epoch": 4.782730455075846, "grad_norm": 0.52734375, "learning_rate": 0.00010686179377615049, "loss": 0.0121, "step": 10247 }, { "epoch": 4.783197199533255, "grad_norm": 0.64453125, "learning_rate": 0.0001068471548048253, "loss": 0.0106, "step": 10248 }, { "epoch": 4.783663943990665, "grad_norm": 0.8046875, "learning_rate": 0.00010683251568607328, "loss": 0.02, "step": 10249 }, { "epoch": 4.784130688448075, "grad_norm": 0.7421875, "learning_rate": 0.00010681787642020964, "loss": 0.017, "step": 10250 }, { "epoch": 4.784597432905485, "grad_norm": 0.5625, "learning_rate": 0.0001068032370075495, "loss": 0.0127, "step": 10251 }, { "epoch": 4.7850641773628935, "grad_norm": 0.484375, "learning_rate": 0.00010678859744840816, "loss": 0.0065, "step": 10252 }, { "epoch": 4.785530921820303, "grad_norm": 0.40625, "learning_rate": 0.0001067739577431008, "loss": 0.0096, "step": 10253 }, { "epoch": 4.785997666277713, "grad_norm": 0.59765625, "learning_rate": 0.00010675931789194256, "loss": 0.0121, "step": 10254 }, { "epoch": 4.786464410735123, "grad_norm": 0.5859375, "learning_rate": 0.00010674467789524873, "loss": 0.0087, "step": 10255 }, { "epoch": 4.786931155192532, "grad_norm": 0.46484375, "learning_rate": 0.00010673003775333452, "loss": 0.0111, "step": 10256 }, { "epoch": 4.787397899649942, "grad_norm": 0.5546875, "learning_rate": 0.00010671539746651512, "loss": 0.008, "step": 10257 }, { "epoch": 4.787864644107351, "grad_norm": 0.63671875, "learning_rate": 0.00010670075703510574, "loss": 0.013, "step": 10258 }, { "epoch": 4.788331388564761, "grad_norm": 0.63671875, "learning_rate": 0.00010668611645942166, "loss": 0.0159, "step": 10259 }, { "epoch": 4.78879813302217, "grad_norm": 0.65234375, "learning_rate": 0.00010667147573977805, "loss": 0.0098, "step": 10260 }, { "epoch": 4.78926487747958, "grad_norm": 0.5546875, "learning_rate": 0.00010665683487649017, "loss": 0.0189, "step": 10261 }, { "epoch": 4.78973162193699, "grad_norm": 0.5, "learning_rate": 0.00010664219386987324, "loss": 0.0071, "step": 10262 }, { "epoch": 4.790198366394399, "grad_norm": 0.2890625, "learning_rate": 0.00010662755272024253, "loss": 0.0108, "step": 10263 }, { "epoch": 4.790665110851808, "grad_norm": 0.5546875, "learning_rate": 0.00010661291142791323, "loss": 0.0102, "step": 10264 }, { "epoch": 4.791131855309218, "grad_norm": 0.5078125, "learning_rate": 0.00010659826999320063, "loss": 0.0079, "step": 10265 }, { "epoch": 4.791598599766628, "grad_norm": 0.83203125, "learning_rate": 0.00010658362841641995, "loss": 0.0112, "step": 10266 }, { "epoch": 4.792065344224037, "grad_norm": 0.45703125, "learning_rate": 0.00010656898669788644, "loss": 0.0072, "step": 10267 }, { "epoch": 4.792532088681447, "grad_norm": 0.58203125, "learning_rate": 0.00010655434483791538, "loss": 0.0153, "step": 10268 }, { "epoch": 4.792998833138856, "grad_norm": 0.28125, "learning_rate": 0.00010653970283682198, "loss": 0.0058, "step": 10269 }, { "epoch": 4.793465577596266, "grad_norm": 0.474609375, "learning_rate": 0.00010652506069492154, "loss": 0.0085, "step": 10270 }, { "epoch": 4.793932322053676, "grad_norm": 0.5703125, "learning_rate": 0.00010651041841252928, "loss": 0.0084, "step": 10271 }, { "epoch": 4.794399066511085, "grad_norm": 0.43359375, "learning_rate": 0.0001064957759899605, "loss": 0.0081, "step": 10272 }, { "epoch": 4.794865810968495, "grad_norm": 0.5, "learning_rate": 0.00010648113342753047, "loss": 0.0094, "step": 10273 }, { "epoch": 4.7953325554259045, "grad_norm": 0.81640625, "learning_rate": 0.00010646649072555444, "loss": 0.0165, "step": 10274 }, { "epoch": 4.795799299883313, "grad_norm": 0.490234375, "learning_rate": 0.0001064518478843477, "loss": 0.0115, "step": 10275 }, { "epoch": 4.796266044340723, "grad_norm": 0.55859375, "learning_rate": 0.00010643720490422549, "loss": 0.0096, "step": 10276 }, { "epoch": 4.796732788798133, "grad_norm": 0.494140625, "learning_rate": 0.00010642256178550312, "loss": 0.0098, "step": 10277 }, { "epoch": 4.797199533255543, "grad_norm": 0.353515625, "learning_rate": 0.00010640791852849587, "loss": 0.0051, "step": 10278 }, { "epoch": 4.797666277712953, "grad_norm": 0.34375, "learning_rate": 0.00010639327513351905, "loss": 0.0051, "step": 10279 }, { "epoch": 4.7981330221703615, "grad_norm": 0.5859375, "learning_rate": 0.00010637863160088787, "loss": 0.0116, "step": 10280 }, { "epoch": 4.798599766627771, "grad_norm": 0.51953125, "learning_rate": 0.00010636398793091773, "loss": 0.0098, "step": 10281 }, { "epoch": 4.799066511085181, "grad_norm": 0.8125, "learning_rate": 0.00010634934412392384, "loss": 0.0155, "step": 10282 }, { "epoch": 4.79953325554259, "grad_norm": 0.447265625, "learning_rate": 0.00010633470018022152, "loss": 0.0082, "step": 10283 }, { "epoch": 4.8, "grad_norm": 0.48046875, "learning_rate": 0.0001063200561001261, "loss": 0.0076, "step": 10284 }, { "epoch": 4.80046674445741, "grad_norm": 0.58203125, "learning_rate": 0.00010630541188395284, "loss": 0.0143, "step": 10285 }, { "epoch": 4.800933488914819, "grad_norm": 0.48046875, "learning_rate": 0.0001062907675320171, "loss": 0.0099, "step": 10286 }, { "epoch": 4.801400233372228, "grad_norm": 0.458984375, "learning_rate": 0.0001062761230446341, "loss": 0.013, "step": 10287 }, { "epoch": 4.801866977829638, "grad_norm": 0.5234375, "learning_rate": 0.00010626147842211924, "loss": 0.0061, "step": 10288 }, { "epoch": 4.802333722287048, "grad_norm": 0.314453125, "learning_rate": 0.00010624683366478781, "loss": 0.0058, "step": 10289 }, { "epoch": 4.802800466744458, "grad_norm": 0.50390625, "learning_rate": 0.00010623218877295511, "loss": 0.0082, "step": 10290 }, { "epoch": 4.803267211201867, "grad_norm": 0.298828125, "learning_rate": 0.00010621754374693646, "loss": 0.004, "step": 10291 }, { "epoch": 4.803733955659276, "grad_norm": 0.494140625, "learning_rate": 0.00010620289858704722, "loss": 0.0109, "step": 10292 }, { "epoch": 4.804200700116686, "grad_norm": 0.451171875, "learning_rate": 0.00010618825329360267, "loss": 0.0087, "step": 10293 }, { "epoch": 4.804667444574096, "grad_norm": 0.80859375, "learning_rate": 0.00010617360786691817, "loss": 0.0183, "step": 10294 }, { "epoch": 4.805134189031505, "grad_norm": 0.474609375, "learning_rate": 0.00010615896230730904, "loss": 0.0105, "step": 10295 }, { "epoch": 4.805600933488915, "grad_norm": 0.373046875, "learning_rate": 0.00010614431661509064, "loss": 0.0056, "step": 10296 }, { "epoch": 4.8060676779463245, "grad_norm": 0.5546875, "learning_rate": 0.00010612967079057825, "loss": 0.0109, "step": 10297 }, { "epoch": 4.806534422403734, "grad_norm": 0.73046875, "learning_rate": 0.00010611502483408726, "loss": 0.0093, "step": 10298 }, { "epoch": 4.807001166861143, "grad_norm": 0.64453125, "learning_rate": 0.00010610037874593299, "loss": 0.012, "step": 10299 }, { "epoch": 4.807467911318553, "grad_norm": 0.5, "learning_rate": 0.00010608573252643081, "loss": 0.0073, "step": 10300 }, { "epoch": 4.807934655775963, "grad_norm": 0.427734375, "learning_rate": 0.00010607108617589607, "loss": 0.0047, "step": 10301 }, { "epoch": 4.808401400233373, "grad_norm": 0.57421875, "learning_rate": 0.0001060564396946441, "loss": 0.0096, "step": 10302 }, { "epoch": 4.8088681446907815, "grad_norm": 0.5390625, "learning_rate": 0.00010604179308299025, "loss": 0.0091, "step": 10303 }, { "epoch": 4.809334889148191, "grad_norm": 0.69140625, "learning_rate": 0.0001060271463412499, "loss": 0.0137, "step": 10304 }, { "epoch": 4.809801633605601, "grad_norm": 0.625, "learning_rate": 0.00010601249946973841, "loss": 0.0121, "step": 10305 }, { "epoch": 4.810268378063011, "grad_norm": 0.462890625, "learning_rate": 0.00010599785246877112, "loss": 0.0099, "step": 10306 }, { "epoch": 4.81073512252042, "grad_norm": 0.478515625, "learning_rate": 0.00010598320533866342, "loss": 0.0089, "step": 10307 }, { "epoch": 4.81120186697783, "grad_norm": 0.345703125, "learning_rate": 0.00010596855807973067, "loss": 0.0058, "step": 10308 }, { "epoch": 4.811668611435239, "grad_norm": 0.64453125, "learning_rate": 0.00010595391069228824, "loss": 0.0151, "step": 10309 }, { "epoch": 4.812135355892648, "grad_norm": 0.33203125, "learning_rate": 0.00010593926317665151, "loss": 0.0055, "step": 10310 }, { "epoch": 4.812602100350058, "grad_norm": 0.5859375, "learning_rate": 0.00010592461553313584, "loss": 0.0168, "step": 10311 }, { "epoch": 4.813068844807468, "grad_norm": 0.56640625, "learning_rate": 0.00010590996776205664, "loss": 0.0097, "step": 10312 }, { "epoch": 4.813535589264878, "grad_norm": 0.5859375, "learning_rate": 0.00010589531986372927, "loss": 0.0105, "step": 10313 }, { "epoch": 4.8140023337222875, "grad_norm": 0.3984375, "learning_rate": 0.00010588067183846913, "loss": 0.008, "step": 10314 }, { "epoch": 4.814469078179696, "grad_norm": 0.416015625, "learning_rate": 0.0001058660236865916, "loss": 0.0119, "step": 10315 }, { "epoch": 4.814935822637106, "grad_norm": 0.44140625, "learning_rate": 0.00010585137540841205, "loss": 0.0046, "step": 10316 }, { "epoch": 4.815402567094516, "grad_norm": 0.6015625, "learning_rate": 0.00010583672700424592, "loss": 0.0071, "step": 10317 }, { "epoch": 4.815869311551925, "grad_norm": 0.46875, "learning_rate": 0.00010582207847440858, "loss": 0.0079, "step": 10318 }, { "epoch": 4.816336056009335, "grad_norm": 0.79296875, "learning_rate": 0.00010580742981921543, "loss": 0.0132, "step": 10319 }, { "epoch": 4.8168028004667445, "grad_norm": 0.53515625, "learning_rate": 0.00010579278103898184, "loss": 0.0093, "step": 10320 }, { "epoch": 4.817269544924154, "grad_norm": 0.625, "learning_rate": 0.00010577813213402332, "loss": 0.0093, "step": 10321 }, { "epoch": 4.817736289381564, "grad_norm": 0.859375, "learning_rate": 0.00010576348310465514, "loss": 0.0174, "step": 10322 }, { "epoch": 4.818203033838973, "grad_norm": 0.380859375, "learning_rate": 0.00010574883395119279, "loss": 0.0066, "step": 10323 }, { "epoch": 4.818669778296383, "grad_norm": 0.439453125, "learning_rate": 0.00010573418467395168, "loss": 0.0075, "step": 10324 }, { "epoch": 4.819136522753793, "grad_norm": 0.60546875, "learning_rate": 0.0001057195352732472, "loss": 0.0174, "step": 10325 }, { "epoch": 4.8196032672112015, "grad_norm": 0.392578125, "learning_rate": 0.00010570488574939479, "loss": 0.0056, "step": 10326 }, { "epoch": 4.820070011668611, "grad_norm": 0.380859375, "learning_rate": 0.00010569023610270985, "loss": 0.0102, "step": 10327 }, { "epoch": 4.820536756126021, "grad_norm": 0.337890625, "learning_rate": 0.00010567558633350782, "loss": 0.0084, "step": 10328 }, { "epoch": 4.821003500583431, "grad_norm": 0.57421875, "learning_rate": 0.00010566093644210409, "loss": 0.0095, "step": 10329 }, { "epoch": 4.82147024504084, "grad_norm": 0.60546875, "learning_rate": 0.00010564628642881415, "loss": 0.0095, "step": 10330 }, { "epoch": 4.82193698949825, "grad_norm": 0.45703125, "learning_rate": 0.00010563163629395339, "loss": 0.0099, "step": 10331 }, { "epoch": 4.822403733955659, "grad_norm": 0.578125, "learning_rate": 0.00010561698603783725, "loss": 0.0126, "step": 10332 }, { "epoch": 4.822870478413069, "grad_norm": 0.56640625, "learning_rate": 0.00010560233566078118, "loss": 0.0078, "step": 10333 }, { "epoch": 4.823337222870478, "grad_norm": 0.79296875, "learning_rate": 0.00010558768516310058, "loss": 0.0167, "step": 10334 }, { "epoch": 4.823803967327888, "grad_norm": 0.34765625, "learning_rate": 0.00010557303454511097, "loss": 0.0055, "step": 10335 }, { "epoch": 4.824270711785298, "grad_norm": 0.32421875, "learning_rate": 0.00010555838380712768, "loss": 0.0034, "step": 10336 }, { "epoch": 4.8247374562427074, "grad_norm": 0.62109375, "learning_rate": 0.00010554373294946625, "loss": 0.0192, "step": 10337 }, { "epoch": 4.825204200700116, "grad_norm": 0.486328125, "learning_rate": 0.00010552908197244211, "loss": 0.0131, "step": 10338 }, { "epoch": 4.825670945157526, "grad_norm": 0.75390625, "learning_rate": 0.00010551443087637067, "loss": 0.0159, "step": 10339 }, { "epoch": 4.826137689614936, "grad_norm": 0.515625, "learning_rate": 0.00010549977966156743, "loss": 0.013, "step": 10340 }, { "epoch": 4.826604434072346, "grad_norm": 0.326171875, "learning_rate": 0.00010548512832834785, "loss": 0.0089, "step": 10341 }, { "epoch": 4.827071178529755, "grad_norm": 0.53125, "learning_rate": 0.00010547047687702733, "loss": 0.0086, "step": 10342 }, { "epoch": 4.827537922987164, "grad_norm": 0.6640625, "learning_rate": 0.00010545582530792142, "loss": 0.011, "step": 10343 }, { "epoch": 4.828004667444574, "grad_norm": 0.43359375, "learning_rate": 0.00010544117362134551, "loss": 0.007, "step": 10344 }, { "epoch": 4.828471411901984, "grad_norm": 0.60546875, "learning_rate": 0.00010542652181761512, "loss": 0.017, "step": 10345 }, { "epoch": 4.828938156359393, "grad_norm": 0.5, "learning_rate": 0.00010541186989704568, "loss": 0.0085, "step": 10346 }, { "epoch": 4.829404900816803, "grad_norm": 0.48046875, "learning_rate": 0.00010539721785995267, "loss": 0.0092, "step": 10347 }, { "epoch": 4.8298716452742125, "grad_norm": 0.37109375, "learning_rate": 0.00010538256570665157, "loss": 0.0057, "step": 10348 }, { "epoch": 4.830338389731622, "grad_norm": 0.474609375, "learning_rate": 0.00010536791343745787, "loss": 0.0136, "step": 10349 }, { "epoch": 4.830805134189031, "grad_norm": 0.5, "learning_rate": 0.00010535326105268701, "loss": 0.0124, "step": 10350 }, { "epoch": 4.831271878646441, "grad_norm": 0.53515625, "learning_rate": 0.00010533860855265454, "loss": 0.0203, "step": 10351 }, { "epoch": 4.831738623103851, "grad_norm": 0.59375, "learning_rate": 0.00010532395593767588, "loss": 0.0083, "step": 10352 }, { "epoch": 4.83220536756126, "grad_norm": 0.38671875, "learning_rate": 0.00010530930320806653, "loss": 0.0063, "step": 10353 }, { "epoch": 4.8326721120186695, "grad_norm": 0.50390625, "learning_rate": 0.00010529465036414203, "loss": 0.0088, "step": 10354 }, { "epoch": 4.833138856476079, "grad_norm": 0.609375, "learning_rate": 0.00010527999740621779, "loss": 0.0102, "step": 10355 }, { "epoch": 4.833605600933489, "grad_norm": 0.62890625, "learning_rate": 0.00010526534433460937, "loss": 0.0093, "step": 10356 }, { "epoch": 4.834072345390899, "grad_norm": 0.388671875, "learning_rate": 0.00010525069114963225, "loss": 0.006, "step": 10357 }, { "epoch": 4.834539089848308, "grad_norm": 0.486328125, "learning_rate": 0.0001052360378516019, "loss": 0.0092, "step": 10358 }, { "epoch": 4.835005834305718, "grad_norm": 0.578125, "learning_rate": 0.00010522138444083386, "loss": 0.0099, "step": 10359 }, { "epoch": 4.835472578763127, "grad_norm": 0.71484375, "learning_rate": 0.00010520673091764364, "loss": 0.0104, "step": 10360 }, { "epoch": 4.835939323220536, "grad_norm": 0.73828125, "learning_rate": 0.0001051920772823467, "loss": 0.0144, "step": 10361 }, { "epoch": 4.836406067677946, "grad_norm": 0.3984375, "learning_rate": 0.0001051774235352586, "loss": 0.0037, "step": 10362 }, { "epoch": 4.836872812135356, "grad_norm": 0.455078125, "learning_rate": 0.00010516276967669481, "loss": 0.0122, "step": 10363 }, { "epoch": 4.837339556592766, "grad_norm": 0.419921875, "learning_rate": 0.00010514811570697085, "loss": 0.0071, "step": 10364 }, { "epoch": 4.8378063010501755, "grad_norm": 0.34375, "learning_rate": 0.00010513346162640225, "loss": 0.0048, "step": 10365 }, { "epoch": 4.838273045507584, "grad_norm": 0.5859375, "learning_rate": 0.00010511880743530454, "loss": 0.0128, "step": 10366 }, { "epoch": 4.838739789964994, "grad_norm": 0.404296875, "learning_rate": 0.00010510415313399323, "loss": 0.0084, "step": 10367 }, { "epoch": 4.839206534422404, "grad_norm": 0.52734375, "learning_rate": 0.00010508949872278382, "loss": 0.0172, "step": 10368 }, { "epoch": 4.839673278879813, "grad_norm": 0.51953125, "learning_rate": 0.00010507484420199187, "loss": 0.0079, "step": 10369 }, { "epoch": 4.840140023337223, "grad_norm": 0.490234375, "learning_rate": 0.00010506018957193291, "loss": 0.0054, "step": 10370 }, { "epoch": 4.8406067677946325, "grad_norm": 0.40234375, "learning_rate": 0.00010504553483292241, "loss": 0.0053, "step": 10371 }, { "epoch": 4.841073512252042, "grad_norm": 0.333984375, "learning_rate": 0.00010503087998527596, "loss": 0.0094, "step": 10372 }, { "epoch": 4.841540256709451, "grad_norm": 0.6015625, "learning_rate": 0.00010501622502930912, "loss": 0.0126, "step": 10373 }, { "epoch": 4.842007001166861, "grad_norm": 0.61328125, "learning_rate": 0.00010500156996533737, "loss": 0.0073, "step": 10374 }, { "epoch": 4.842473745624271, "grad_norm": 0.470703125, "learning_rate": 0.00010498691479367624, "loss": 0.0119, "step": 10375 }, { "epoch": 4.842940490081681, "grad_norm": 0.6015625, "learning_rate": 0.00010497225951464131, "loss": 0.0159, "step": 10376 }, { "epoch": 4.8434072345390895, "grad_norm": 0.2421875, "learning_rate": 0.00010495760412854816, "loss": 0.0027, "step": 10377 }, { "epoch": 4.843873978996499, "grad_norm": 0.6015625, "learning_rate": 0.00010494294863571223, "loss": 0.0106, "step": 10378 }, { "epoch": 4.844340723453909, "grad_norm": 0.4375, "learning_rate": 0.00010492829303644917, "loss": 0.0098, "step": 10379 }, { "epoch": 4.844807467911319, "grad_norm": 0.4609375, "learning_rate": 0.00010491363733107449, "loss": 0.0076, "step": 10380 }, { "epoch": 4.845274212368728, "grad_norm": 0.59375, "learning_rate": 0.00010489898151990374, "loss": 0.0106, "step": 10381 }, { "epoch": 4.845740956826138, "grad_norm": 0.63671875, "learning_rate": 0.00010488432560325247, "loss": 0.0134, "step": 10382 }, { "epoch": 4.846207701283547, "grad_norm": 0.451171875, "learning_rate": 0.00010486966958143626, "loss": 0.0076, "step": 10383 }, { "epoch": 4.846674445740957, "grad_norm": 0.5625, "learning_rate": 0.00010485501345477069, "loss": 0.0078, "step": 10384 }, { "epoch": 4.847141190198366, "grad_norm": 0.69140625, "learning_rate": 0.00010484035722357126, "loss": 0.0086, "step": 10385 }, { "epoch": 4.847607934655776, "grad_norm": 0.6484375, "learning_rate": 0.00010482570088815357, "loss": 0.0076, "step": 10386 }, { "epoch": 4.848074679113186, "grad_norm": 0.671875, "learning_rate": 0.0001048110444488332, "loss": 0.0071, "step": 10387 }, { "epoch": 4.8485414235705955, "grad_norm": 0.7578125, "learning_rate": 0.0001047963879059257, "loss": 0.01, "step": 10388 }, { "epoch": 4.849008168028004, "grad_norm": 0.65234375, "learning_rate": 0.00010478173125974664, "loss": 0.0082, "step": 10389 }, { "epoch": 4.849474912485414, "grad_norm": 0.70703125, "learning_rate": 0.00010476707451061161, "loss": 0.0156, "step": 10390 }, { "epoch": 4.849941656942824, "grad_norm": 0.345703125, "learning_rate": 0.00010475241765883616, "loss": 0.0096, "step": 10391 }, { "epoch": 4.850408401400234, "grad_norm": 0.283203125, "learning_rate": 0.00010473776070473591, "loss": 0.0038, "step": 10392 }, { "epoch": 4.850875145857643, "grad_norm": 0.734375, "learning_rate": 0.00010472310364862643, "loss": 0.022, "step": 10393 }, { "epoch": 4.8513418903150525, "grad_norm": 0.6953125, "learning_rate": 0.00010470844649082324, "loss": 0.01, "step": 10394 }, { "epoch": 4.851808634772462, "grad_norm": 0.57421875, "learning_rate": 0.00010469378923164199, "loss": 0.0121, "step": 10395 }, { "epoch": 4.852275379229871, "grad_norm": 0.3125, "learning_rate": 0.00010467913187139826, "loss": 0.0045, "step": 10396 }, { "epoch": 4.852742123687281, "grad_norm": 0.306640625, "learning_rate": 0.0001046644744104076, "loss": 0.0059, "step": 10397 }, { "epoch": 4.853208868144691, "grad_norm": 0.6328125, "learning_rate": 0.00010464981684898568, "loss": 0.009, "step": 10398 }, { "epoch": 4.853675612602101, "grad_norm": 0.59765625, "learning_rate": 0.00010463515918744802, "loss": 0.0106, "step": 10399 }, { "epoch": 4.85414235705951, "grad_norm": 0.79296875, "learning_rate": 0.00010462050142611024, "loss": 0.0151, "step": 10400 }, { "epoch": 4.854609101516919, "grad_norm": 0.404296875, "learning_rate": 0.00010460584356528791, "loss": 0.0065, "step": 10401 }, { "epoch": 4.855075845974329, "grad_norm": 0.9296875, "learning_rate": 0.0001045911856052967, "loss": 0.0077, "step": 10402 }, { "epoch": 4.855542590431739, "grad_norm": 0.447265625, "learning_rate": 0.00010457652754645216, "loss": 0.007, "step": 10403 }, { "epoch": 4.856009334889148, "grad_norm": 0.38671875, "learning_rate": 0.0001045618693890699, "loss": 0.0069, "step": 10404 }, { "epoch": 4.856476079346558, "grad_norm": 0.484375, "learning_rate": 0.0001045472111334655, "loss": 0.01, "step": 10405 }, { "epoch": 4.856942823803967, "grad_norm": 0.62109375, "learning_rate": 0.00010453255277995464, "loss": 0.0108, "step": 10406 }, { "epoch": 4.857409568261377, "grad_norm": 0.478515625, "learning_rate": 0.00010451789432885287, "loss": 0.0076, "step": 10407 }, { "epoch": 4.857876312718787, "grad_norm": 0.4609375, "learning_rate": 0.00010450323578047583, "loss": 0.006, "step": 10408 }, { "epoch": 4.858343057176196, "grad_norm": 0.51171875, "learning_rate": 0.00010448857713513914, "loss": 0.0072, "step": 10409 }, { "epoch": 4.858809801633606, "grad_norm": 0.57421875, "learning_rate": 0.00010447391839315838, "loss": 0.0125, "step": 10410 }, { "epoch": 4.8592765460910154, "grad_norm": 0.45703125, "learning_rate": 0.00010445925955484918, "loss": 0.01, "step": 10411 }, { "epoch": 4.859743290548424, "grad_norm": 0.5234375, "learning_rate": 0.00010444460062052721, "loss": 0.0086, "step": 10412 }, { "epoch": 4.860210035005834, "grad_norm": 0.44140625, "learning_rate": 0.00010442994159050805, "loss": 0.007, "step": 10413 }, { "epoch": 4.860676779463244, "grad_norm": 0.74609375, "learning_rate": 0.00010441528246510732, "loss": 0.0106, "step": 10414 }, { "epoch": 4.861143523920654, "grad_norm": 0.7109375, "learning_rate": 0.00010440062324464069, "loss": 0.0152, "step": 10415 }, { "epoch": 4.861610268378063, "grad_norm": 0.67578125, "learning_rate": 0.00010438596392942372, "loss": 0.0161, "step": 10416 }, { "epoch": 4.8620770128354724, "grad_norm": 0.51171875, "learning_rate": 0.0001043713045197721, "loss": 0.0072, "step": 10417 }, { "epoch": 4.862543757292882, "grad_norm": 0.462890625, "learning_rate": 0.00010435664501600144, "loss": 0.0087, "step": 10418 }, { "epoch": 4.863010501750292, "grad_norm": 0.59375, "learning_rate": 0.0001043419854184274, "loss": 0.0135, "step": 10419 }, { "epoch": 4.863477246207701, "grad_norm": 0.67578125, "learning_rate": 0.00010432732572736556, "loss": 0.017, "step": 10420 }, { "epoch": 4.863943990665111, "grad_norm": 0.40625, "learning_rate": 0.0001043126659431316, "loss": 0.0078, "step": 10421 }, { "epoch": 4.8644107351225205, "grad_norm": 0.57421875, "learning_rate": 0.0001042980060660412, "loss": 0.0115, "step": 10422 }, { "epoch": 4.86487747957993, "grad_norm": 0.4921875, "learning_rate": 0.00010428334609640993, "loss": 0.0086, "step": 10423 }, { "epoch": 4.865344224037339, "grad_norm": 0.35546875, "learning_rate": 0.00010426868603455344, "loss": 0.0078, "step": 10424 }, { "epoch": 4.865810968494749, "grad_norm": 0.8203125, "learning_rate": 0.00010425402588078743, "loss": 0.0125, "step": 10425 }, { "epoch": 4.866277712952159, "grad_norm": 0.625, "learning_rate": 0.00010423936563542754, "loss": 0.0103, "step": 10426 }, { "epoch": 4.866744457409569, "grad_norm": 0.4921875, "learning_rate": 0.00010422470529878937, "loss": 0.0067, "step": 10427 }, { "epoch": 4.8672112018669775, "grad_norm": 0.482421875, "learning_rate": 0.00010421004487118861, "loss": 0.0049, "step": 10428 }, { "epoch": 4.867677946324387, "grad_norm": 0.59765625, "learning_rate": 0.00010419538435294094, "loss": 0.0155, "step": 10429 }, { "epoch": 4.868144690781797, "grad_norm": 0.42578125, "learning_rate": 0.00010418072374436196, "loss": 0.0059, "step": 10430 }, { "epoch": 4.868611435239207, "grad_norm": 0.35546875, "learning_rate": 0.0001041660630457674, "loss": 0.0074, "step": 10431 }, { "epoch": 4.869078179696616, "grad_norm": 0.58203125, "learning_rate": 0.00010415140225747284, "loss": 0.0109, "step": 10432 }, { "epoch": 4.869544924154026, "grad_norm": 0.49609375, "learning_rate": 0.00010413674137979402, "loss": 0.0142, "step": 10433 }, { "epoch": 4.870011668611435, "grad_norm": 0.376953125, "learning_rate": 0.00010412208041304652, "loss": 0.0055, "step": 10434 }, { "epoch": 4.870478413068845, "grad_norm": 0.828125, "learning_rate": 0.00010410741935754608, "loss": 0.0144, "step": 10435 }, { "epoch": 4.870945157526254, "grad_norm": 0.46875, "learning_rate": 0.00010409275821360836, "loss": 0.0104, "step": 10436 }, { "epoch": 4.871411901983664, "grad_norm": 0.47265625, "learning_rate": 0.000104078096981549, "loss": 0.008, "step": 10437 }, { "epoch": 4.871878646441074, "grad_norm": 0.5859375, "learning_rate": 0.00010406343566168369, "loss": 0.0093, "step": 10438 }, { "epoch": 4.872345390898483, "grad_norm": 0.5234375, "learning_rate": 0.00010404877425432811, "loss": 0.0087, "step": 10439 }, { "epoch": 4.872812135355892, "grad_norm": 0.49609375, "learning_rate": 0.00010403411275979789, "loss": 0.0105, "step": 10440 }, { "epoch": 4.873278879813302, "grad_norm": 0.427734375, "learning_rate": 0.00010401945117840881, "loss": 0.0098, "step": 10441 }, { "epoch": 4.873745624270712, "grad_norm": 0.462890625, "learning_rate": 0.00010400478951047645, "loss": 0.0152, "step": 10442 }, { "epoch": 4.874212368728122, "grad_norm": 0.640625, "learning_rate": 0.00010399012775631651, "loss": 0.0095, "step": 10443 }, { "epoch": 4.874679113185531, "grad_norm": 0.48046875, "learning_rate": 0.00010397546591624472, "loss": 0.0102, "step": 10444 }, { "epoch": 4.8751458576429405, "grad_norm": 0.44921875, "learning_rate": 0.00010396080399057672, "loss": 0.0088, "step": 10445 }, { "epoch": 4.87561260210035, "grad_norm": 0.51171875, "learning_rate": 0.00010394614197962822, "loss": 0.0075, "step": 10446 }, { "epoch": 4.876079346557759, "grad_norm": 0.56640625, "learning_rate": 0.00010393147988371492, "loss": 0.0108, "step": 10447 }, { "epoch": 4.876546091015169, "grad_norm": 0.251953125, "learning_rate": 0.00010391681770315249, "loss": 0.0073, "step": 10448 }, { "epoch": 4.877012835472579, "grad_norm": 0.69140625, "learning_rate": 0.00010390215543825664, "loss": 0.0135, "step": 10449 }, { "epoch": 4.877479579929989, "grad_norm": 0.349609375, "learning_rate": 0.00010388749308934303, "loss": 0.0041, "step": 10450 }, { "epoch": 4.877946324387398, "grad_norm": 0.69921875, "learning_rate": 0.0001038728306567274, "loss": 0.0095, "step": 10451 }, { "epoch": 4.878413068844807, "grad_norm": 0.275390625, "learning_rate": 0.0001038581681407254, "loss": 0.004, "step": 10452 }, { "epoch": 4.878879813302217, "grad_norm": 0.462890625, "learning_rate": 0.00010384350554165276, "loss": 0.0062, "step": 10453 }, { "epoch": 4.879346557759627, "grad_norm": 0.388671875, "learning_rate": 0.00010382884285982521, "loss": 0.0083, "step": 10454 }, { "epoch": 4.879813302217036, "grad_norm": 0.41796875, "learning_rate": 0.00010381418009555843, "loss": 0.0056, "step": 10455 }, { "epoch": 4.880280046674446, "grad_norm": 0.5234375, "learning_rate": 0.00010379951724916807, "loss": 0.0125, "step": 10456 }, { "epoch": 4.880746791131855, "grad_norm": 0.55078125, "learning_rate": 0.00010378485432096993, "loss": 0.0089, "step": 10457 }, { "epoch": 4.881213535589265, "grad_norm": 0.60546875, "learning_rate": 0.0001037701913112797, "loss": 0.0165, "step": 10458 }, { "epoch": 4.881680280046674, "grad_norm": 0.3984375, "learning_rate": 0.00010375552822041303, "loss": 0.0053, "step": 10459 }, { "epoch": 4.882147024504084, "grad_norm": 0.337890625, "learning_rate": 0.00010374086504868566, "loss": 0.0076, "step": 10460 }, { "epoch": 4.882613768961494, "grad_norm": 0.38671875, "learning_rate": 0.00010372620179641335, "loss": 0.0059, "step": 10461 }, { "epoch": 4.8830805134189035, "grad_norm": 0.59375, "learning_rate": 0.00010371153846391178, "loss": 0.0088, "step": 10462 }, { "epoch": 4.883547257876312, "grad_norm": 0.625, "learning_rate": 0.00010369687505149661, "loss": 0.0106, "step": 10463 }, { "epoch": 4.884014002333722, "grad_norm": 0.470703125, "learning_rate": 0.00010368221155948368, "loss": 0.0057, "step": 10464 }, { "epoch": 4.884480746791132, "grad_norm": 0.322265625, "learning_rate": 0.00010366754798818865, "loss": 0.0056, "step": 10465 }, { "epoch": 4.884947491248542, "grad_norm": 0.451171875, "learning_rate": 0.00010365288433792722, "loss": 0.0059, "step": 10466 }, { "epoch": 4.885414235705951, "grad_norm": 0.8828125, "learning_rate": 0.00010363822060901513, "loss": 0.0171, "step": 10467 }, { "epoch": 4.8858809801633605, "grad_norm": 0.45703125, "learning_rate": 0.00010362355680176814, "loss": 0.0101, "step": 10468 }, { "epoch": 4.88634772462077, "grad_norm": 0.326171875, "learning_rate": 0.00010360889291650193, "loss": 0.005, "step": 10469 }, { "epoch": 4.88681446907818, "grad_norm": 0.466796875, "learning_rate": 0.00010359422895353225, "loss": 0.0063, "step": 10470 }, { "epoch": 4.887281213535589, "grad_norm": 0.421875, "learning_rate": 0.00010357956491317484, "loss": 0.0064, "step": 10471 }, { "epoch": 4.887747957992999, "grad_norm": 0.61328125, "learning_rate": 0.00010356490079574541, "loss": 0.0111, "step": 10472 }, { "epoch": 4.888214702450409, "grad_norm": 0.443359375, "learning_rate": 0.00010355023660155974, "loss": 0.0074, "step": 10473 }, { "epoch": 4.8886814469078175, "grad_norm": 0.314453125, "learning_rate": 0.00010353557233093355, "loss": 0.0037, "step": 10474 }, { "epoch": 4.889148191365227, "grad_norm": 0.474609375, "learning_rate": 0.00010352090798418252, "loss": 0.0115, "step": 10475 }, { "epoch": 4.889614935822637, "grad_norm": 0.2353515625, "learning_rate": 0.00010350624356162244, "loss": 0.0036, "step": 10476 }, { "epoch": 4.890081680280047, "grad_norm": 0.46875, "learning_rate": 0.00010349157906356908, "loss": 0.0097, "step": 10477 }, { "epoch": 4.890548424737457, "grad_norm": 0.30859375, "learning_rate": 0.00010347691449033812, "loss": 0.0041, "step": 10478 }, { "epoch": 4.891015169194866, "grad_norm": 0.76171875, "learning_rate": 0.00010346224984224533, "loss": 0.0083, "step": 10479 }, { "epoch": 4.891481913652275, "grad_norm": 0.45703125, "learning_rate": 0.00010344758511960648, "loss": 0.0074, "step": 10480 }, { "epoch": 4.891948658109685, "grad_norm": 0.373046875, "learning_rate": 0.00010343292032273728, "loss": 0.0076, "step": 10481 }, { "epoch": 4.892415402567094, "grad_norm": 0.40234375, "learning_rate": 0.00010341825545195351, "loss": 0.0062, "step": 10482 }, { "epoch": 4.892882147024504, "grad_norm": 0.671875, "learning_rate": 0.00010340359050757089, "loss": 0.0056, "step": 10483 }, { "epoch": 4.893348891481914, "grad_norm": 0.4453125, "learning_rate": 0.00010338892548990522, "loss": 0.0067, "step": 10484 }, { "epoch": 4.8938156359393234, "grad_norm": 0.443359375, "learning_rate": 0.0001033742603992722, "loss": 0.0061, "step": 10485 }, { "epoch": 4.894282380396733, "grad_norm": 0.44921875, "learning_rate": 0.00010335959523598761, "loss": 0.012, "step": 10486 }, { "epoch": 4.894749124854142, "grad_norm": 0.6015625, "learning_rate": 0.00010334493000036722, "loss": 0.0097, "step": 10487 }, { "epoch": 4.895215869311552, "grad_norm": 0.240234375, "learning_rate": 0.00010333026469272679, "loss": 0.008, "step": 10488 }, { "epoch": 4.895682613768962, "grad_norm": 0.46484375, "learning_rate": 0.00010331559931338204, "loss": 0.0078, "step": 10489 }, { "epoch": 4.896149358226371, "grad_norm": 0.59765625, "learning_rate": 0.00010330093386264877, "loss": 0.0071, "step": 10490 }, { "epoch": 4.8966161026837804, "grad_norm": 0.45703125, "learning_rate": 0.00010328626834084272, "loss": 0.0048, "step": 10491 }, { "epoch": 4.89708284714119, "grad_norm": 0.62890625, "learning_rate": 0.00010327160274827965, "loss": 0.0099, "step": 10492 }, { "epoch": 4.8975495915986, "grad_norm": 0.49609375, "learning_rate": 0.00010325693708527537, "loss": 0.0175, "step": 10493 }, { "epoch": 4.89801633605601, "grad_norm": 0.431640625, "learning_rate": 0.00010324227135214561, "loss": 0.0039, "step": 10494 }, { "epoch": 4.898483080513419, "grad_norm": 0.380859375, "learning_rate": 0.00010322760554920616, "loss": 0.0073, "step": 10495 }, { "epoch": 4.8989498249708285, "grad_norm": 0.7890625, "learning_rate": 0.00010321293967677277, "loss": 0.0126, "step": 10496 }, { "epoch": 4.899416569428238, "grad_norm": 0.478515625, "learning_rate": 0.00010319827373516124, "loss": 0.0066, "step": 10497 }, { "epoch": 4.899883313885647, "grad_norm": 0.455078125, "learning_rate": 0.00010318360772468734, "loss": 0.0112, "step": 10498 }, { "epoch": 4.900350058343057, "grad_norm": 0.287109375, "learning_rate": 0.0001031689416456668, "loss": 0.0042, "step": 10499 }, { "epoch": 4.900816802800467, "grad_norm": 0.90234375, "learning_rate": 0.00010315427549841543, "loss": 0.0086, "step": 10500 }, { "epoch": 4.901283547257877, "grad_norm": 0.37890625, "learning_rate": 0.00010313960928324903, "loss": 0.0103, "step": 10501 }, { "epoch": 4.9017502917152855, "grad_norm": 0.1923828125, "learning_rate": 0.00010312494300048335, "loss": 0.0022, "step": 10502 }, { "epoch": 4.902217036172695, "grad_norm": 0.384765625, "learning_rate": 0.00010311027665043418, "loss": 0.0077, "step": 10503 }, { "epoch": 4.902683780630105, "grad_norm": 0.60546875, "learning_rate": 0.00010309561023341729, "loss": 0.0094, "step": 10504 }, { "epoch": 4.903150525087515, "grad_norm": 0.53125, "learning_rate": 0.00010308094374974848, "loss": 0.0108, "step": 10505 }, { "epoch": 4.903617269544924, "grad_norm": 0.31640625, "learning_rate": 0.00010306627719974355, "loss": 0.0038, "step": 10506 }, { "epoch": 4.904084014002334, "grad_norm": 0.49609375, "learning_rate": 0.00010305161058371826, "loss": 0.0061, "step": 10507 }, { "epoch": 4.904550758459743, "grad_norm": 0.55859375, "learning_rate": 0.00010303694390198841, "loss": 0.0151, "step": 10508 }, { "epoch": 4.905017502917153, "grad_norm": 0.71875, "learning_rate": 0.00010302227715486978, "loss": 0.0153, "step": 10509 }, { "epoch": 4.905484247374562, "grad_norm": 0.5234375, "learning_rate": 0.00010300761034267818, "loss": 0.0063, "step": 10510 }, { "epoch": 4.905950991831972, "grad_norm": 0.609375, "learning_rate": 0.0001029929434657294, "loss": 0.0118, "step": 10511 }, { "epoch": 4.906417736289382, "grad_norm": 0.330078125, "learning_rate": 0.00010297827652433922, "loss": 0.0035, "step": 10512 }, { "epoch": 4.9068844807467915, "grad_norm": 0.455078125, "learning_rate": 0.00010296360951882344, "loss": 0.0075, "step": 10513 }, { "epoch": 4.9073512252042, "grad_norm": 0.58203125, "learning_rate": 0.00010294894244949788, "loss": 0.0125, "step": 10514 }, { "epoch": 4.90781796966161, "grad_norm": 1.1015625, "learning_rate": 0.00010293427531667827, "loss": 0.0091, "step": 10515 }, { "epoch": 4.90828471411902, "grad_norm": 0.89453125, "learning_rate": 0.00010291960812068049, "loss": 0.0061, "step": 10516 }, { "epoch": 4.908751458576429, "grad_norm": 0.51953125, "learning_rate": 0.00010290494086182031, "loss": 0.0052, "step": 10517 }, { "epoch": 4.909218203033839, "grad_norm": 0.49609375, "learning_rate": 0.00010289027354041349, "loss": 0.0086, "step": 10518 }, { "epoch": 4.9096849474912485, "grad_norm": 0.263671875, "learning_rate": 0.00010287560615677592, "loss": 0.0053, "step": 10519 }, { "epoch": 4.910151691948658, "grad_norm": 0.345703125, "learning_rate": 0.00010286093871122335, "loss": 0.0062, "step": 10520 }, { "epoch": 4.910618436406068, "grad_norm": 0.4453125, "learning_rate": 0.00010284627120407158, "loss": 0.0054, "step": 10521 }, { "epoch": 4.911085180863477, "grad_norm": 0.427734375, "learning_rate": 0.00010283160363563644, "loss": 0.0051, "step": 10522 }, { "epoch": 4.911551925320887, "grad_norm": 0.283203125, "learning_rate": 0.00010281693600623374, "loss": 0.0032, "step": 10523 }, { "epoch": 4.912018669778297, "grad_norm": 0.67578125, "learning_rate": 0.00010280226831617928, "loss": 0.015, "step": 10524 }, { "epoch": 4.9124854142357055, "grad_norm": 0.435546875, "learning_rate": 0.00010278760056578885, "loss": 0.0101, "step": 10525 }, { "epoch": 4.912952158693115, "grad_norm": 0.6328125, "learning_rate": 0.00010277293275537832, "loss": 0.0096, "step": 10526 }, { "epoch": 4.913418903150525, "grad_norm": 0.921875, "learning_rate": 0.00010275826488526346, "loss": 0.0145, "step": 10527 }, { "epoch": 4.913885647607935, "grad_norm": 0.4921875, "learning_rate": 0.00010274359695576008, "loss": 0.0118, "step": 10528 }, { "epoch": 4.914352392065345, "grad_norm": 0.765625, "learning_rate": 0.00010272892896718403, "loss": 0.0125, "step": 10529 }, { "epoch": 4.914819136522754, "grad_norm": 0.77734375, "learning_rate": 0.00010271426091985112, "loss": 0.0134, "step": 10530 }, { "epoch": 4.915285880980163, "grad_norm": 0.494140625, "learning_rate": 0.00010269959281407712, "loss": 0.0072, "step": 10531 }, { "epoch": 4.915752625437573, "grad_norm": 0.47265625, "learning_rate": 0.00010268492465017794, "loss": 0.0077, "step": 10532 }, { "epoch": 4.916219369894982, "grad_norm": 0.4375, "learning_rate": 0.00010267025642846932, "loss": 0.0055, "step": 10533 }, { "epoch": 4.916686114352392, "grad_norm": 0.4609375, "learning_rate": 0.00010265558814926715, "loss": 0.008, "step": 10534 }, { "epoch": 4.917152858809802, "grad_norm": 0.9921875, "learning_rate": 0.0001026409198128872, "loss": 0.013, "step": 10535 }, { "epoch": 4.9176196032672115, "grad_norm": 0.4140625, "learning_rate": 0.00010262625141964532, "loss": 0.007, "step": 10536 }, { "epoch": 4.91808634772462, "grad_norm": 0.58984375, "learning_rate": 0.00010261158296985734, "loss": 0.0082, "step": 10537 }, { "epoch": 4.91855309218203, "grad_norm": 0.640625, "learning_rate": 0.00010259691446383906, "loss": 0.0161, "step": 10538 }, { "epoch": 4.91901983663944, "grad_norm": 0.416015625, "learning_rate": 0.00010258224590190634, "loss": 0.0054, "step": 10539 }, { "epoch": 4.91948658109685, "grad_norm": 0.392578125, "learning_rate": 0.00010256757728437503, "loss": 0.0105, "step": 10540 }, { "epoch": 4.919953325554259, "grad_norm": 0.494140625, "learning_rate": 0.00010255290861156089, "loss": 0.0112, "step": 10541 }, { "epoch": 4.9204200700116685, "grad_norm": 0.51171875, "learning_rate": 0.00010253823988377982, "loss": 0.0108, "step": 10542 }, { "epoch": 4.920886814469078, "grad_norm": 0.359375, "learning_rate": 0.0001025235711013476, "loss": 0.0041, "step": 10543 }, { "epoch": 4.921353558926488, "grad_norm": 0.5390625, "learning_rate": 0.00010250890226458012, "loss": 0.0084, "step": 10544 }, { "epoch": 4.921820303383897, "grad_norm": 0.390625, "learning_rate": 0.00010249423337379318, "loss": 0.0077, "step": 10545 }, { "epoch": 4.922287047841307, "grad_norm": 0.310546875, "learning_rate": 0.00010247956442930265, "loss": 0.0075, "step": 10546 }, { "epoch": 4.922753792298717, "grad_norm": 0.66015625, "learning_rate": 0.00010246489543142432, "loss": 0.0107, "step": 10547 }, { "epoch": 4.923220536756126, "grad_norm": 0.30078125, "learning_rate": 0.00010245022638047406, "loss": 0.0034, "step": 10548 }, { "epoch": 4.923687281213535, "grad_norm": 0.5859375, "learning_rate": 0.00010243555727676772, "loss": 0.0087, "step": 10549 }, { "epoch": 4.924154025670945, "grad_norm": 0.48828125, "learning_rate": 0.00010242088812062111, "loss": 0.0071, "step": 10550 }, { "epoch": 4.924620770128355, "grad_norm": 0.396484375, "learning_rate": 0.00010240621891235009, "loss": 0.0072, "step": 10551 }, { "epoch": 4.925087514585765, "grad_norm": 0.66796875, "learning_rate": 0.00010239154965227053, "loss": 0.0148, "step": 10552 }, { "epoch": 4.925554259043174, "grad_norm": 0.478515625, "learning_rate": 0.00010237688034069826, "loss": 0.0074, "step": 10553 }, { "epoch": 4.926021003500583, "grad_norm": 0.5546875, "learning_rate": 0.0001023622109779491, "loss": 0.0133, "step": 10554 }, { "epoch": 4.926487747957993, "grad_norm": 0.42578125, "learning_rate": 0.00010234754156433892, "loss": 0.006, "step": 10555 }, { "epoch": 4.926954492415403, "grad_norm": 0.65234375, "learning_rate": 0.00010233287210018357, "loss": 0.0109, "step": 10556 }, { "epoch": 4.927421236872812, "grad_norm": 0.375, "learning_rate": 0.00010231820258579889, "loss": 0.0049, "step": 10557 }, { "epoch": 4.927887981330222, "grad_norm": 0.396484375, "learning_rate": 0.00010230353302150073, "loss": 0.0047, "step": 10558 }, { "epoch": 4.9283547257876315, "grad_norm": 0.50390625, "learning_rate": 0.00010228886340760495, "loss": 0.0139, "step": 10559 }, { "epoch": 4.92882147024504, "grad_norm": 0.515625, "learning_rate": 0.00010227419374442742, "loss": 0.0075, "step": 10560 }, { "epoch": 4.92928821470245, "grad_norm": 0.498046875, "learning_rate": 0.00010225952403228395, "loss": 0.0138, "step": 10561 }, { "epoch": 4.92975495915986, "grad_norm": 0.33203125, "learning_rate": 0.00010224485427149047, "loss": 0.0038, "step": 10562 }, { "epoch": 4.93022170361727, "grad_norm": 0.4375, "learning_rate": 0.00010223018446236274, "loss": 0.0079, "step": 10563 }, { "epoch": 4.9306884480746795, "grad_norm": 0.40234375, "learning_rate": 0.00010221551460521666, "loss": 0.0057, "step": 10564 }, { "epoch": 4.9311551925320884, "grad_norm": 0.392578125, "learning_rate": 0.00010220084470036813, "loss": 0.0047, "step": 10565 }, { "epoch": 4.931621936989498, "grad_norm": 0.36328125, "learning_rate": 0.00010218617474813294, "loss": 0.004, "step": 10566 }, { "epoch": 4.932088681446908, "grad_norm": 0.458984375, "learning_rate": 0.00010217150474882697, "loss": 0.0069, "step": 10567 }, { "epoch": 4.932555425904317, "grad_norm": 0.55078125, "learning_rate": 0.00010215683470276611, "loss": 0.0084, "step": 10568 }, { "epoch": 4.933022170361727, "grad_norm": 0.494140625, "learning_rate": 0.00010214216461026623, "loss": 0.0063, "step": 10569 }, { "epoch": 4.9334889148191365, "grad_norm": 0.412109375, "learning_rate": 0.00010212749447164314, "loss": 0.0058, "step": 10570 }, { "epoch": 4.933955659276546, "grad_norm": 0.55078125, "learning_rate": 0.00010211282428721273, "loss": 0.0076, "step": 10571 }, { "epoch": 4.934422403733956, "grad_norm": 0.28125, "learning_rate": 0.0001020981540572909, "loss": 0.0031, "step": 10572 }, { "epoch": 4.934889148191365, "grad_norm": 0.46484375, "learning_rate": 0.00010208348378219348, "loss": 0.0082, "step": 10573 }, { "epoch": 4.935355892648775, "grad_norm": 0.6796875, "learning_rate": 0.00010206881346223632, "loss": 0.0144, "step": 10574 }, { "epoch": 4.935822637106185, "grad_norm": 0.3359375, "learning_rate": 0.00010205414309773532, "loss": 0.0034, "step": 10575 }, { "epoch": 4.9362893815635935, "grad_norm": 0.447265625, "learning_rate": 0.00010203947268900633, "loss": 0.0062, "step": 10576 }, { "epoch": 4.936756126021003, "grad_norm": 0.306640625, "learning_rate": 0.00010202480223636524, "loss": 0.003, "step": 10577 }, { "epoch": 4.937222870478413, "grad_norm": 0.578125, "learning_rate": 0.0001020101317401279, "loss": 0.0105, "step": 10578 }, { "epoch": 4.937689614935823, "grad_norm": 0.47265625, "learning_rate": 0.00010199546120061022, "loss": 0.0062, "step": 10579 }, { "epoch": 4.938156359393232, "grad_norm": 1.1796875, "learning_rate": 0.00010198079061812802, "loss": 0.0131, "step": 10580 }, { "epoch": 4.938623103850642, "grad_norm": 0.4140625, "learning_rate": 0.0001019661199929972, "loss": 0.004, "step": 10581 }, { "epoch": 4.939089848308051, "grad_norm": 0.462890625, "learning_rate": 0.00010195144932553366, "loss": 0.0101, "step": 10582 }, { "epoch": 4.939556592765461, "grad_norm": 0.4609375, "learning_rate": 0.00010193677861605325, "loss": 0.0075, "step": 10583 }, { "epoch": 4.94002333722287, "grad_norm": 0.384765625, "learning_rate": 0.00010192210786487184, "loss": 0.0058, "step": 10584 }, { "epoch": 4.94049008168028, "grad_norm": 0.5703125, "learning_rate": 0.0001019074370723053, "loss": 0.0118, "step": 10585 }, { "epoch": 4.94095682613769, "grad_norm": 0.4921875, "learning_rate": 0.00010189276623866953, "loss": 0.0061, "step": 10586 }, { "epoch": 4.9414235705950995, "grad_norm": 0.5, "learning_rate": 0.00010187809536428041, "loss": 0.0052, "step": 10587 }, { "epoch": 4.941890315052508, "grad_norm": 0.53125, "learning_rate": 0.0001018634244494538, "loss": 0.0125, "step": 10588 }, { "epoch": 4.942357059509918, "grad_norm": 0.44921875, "learning_rate": 0.00010184875349450562, "loss": 0.0067, "step": 10589 }, { "epoch": 4.942823803967328, "grad_norm": 0.388671875, "learning_rate": 0.00010183408249975169, "loss": 0.0049, "step": 10590 }, { "epoch": 4.943290548424738, "grad_norm": 0.5703125, "learning_rate": 0.00010181941146550796, "loss": 0.0091, "step": 10591 }, { "epoch": 4.943757292882147, "grad_norm": 0.39453125, "learning_rate": 0.00010180474039209026, "loss": 0.0096, "step": 10592 }, { "epoch": 4.9442240373395565, "grad_norm": 0.326171875, "learning_rate": 0.00010179006927981451, "loss": 0.0032, "step": 10593 }, { "epoch": 4.944690781796966, "grad_norm": 0.27734375, "learning_rate": 0.00010177539812899657, "loss": 0.0037, "step": 10594 }, { "epoch": 4.945157526254376, "grad_norm": 0.6171875, "learning_rate": 0.00010176072693995235, "loss": 0.0103, "step": 10595 }, { "epoch": 4.945624270711785, "grad_norm": 0.609375, "learning_rate": 0.00010174605571299772, "loss": 0.007, "step": 10596 }, { "epoch": 4.946091015169195, "grad_norm": 0.357421875, "learning_rate": 0.00010173138444844859, "loss": 0.0046, "step": 10597 }, { "epoch": 4.946557759626605, "grad_norm": 0.44921875, "learning_rate": 0.00010171671314662082, "loss": 0.0075, "step": 10598 }, { "epoch": 4.947024504084014, "grad_norm": 0.515625, "learning_rate": 0.00010170204180783032, "loss": 0.0077, "step": 10599 }, { "epoch": 4.947491248541423, "grad_norm": 0.333984375, "learning_rate": 0.00010168737043239293, "loss": 0.0038, "step": 10600 }, { "epoch": 4.947957992998833, "grad_norm": 0.51171875, "learning_rate": 0.00010167269902062461, "loss": 0.008, "step": 10601 }, { "epoch": 4.948424737456243, "grad_norm": 0.515625, "learning_rate": 0.00010165802757284124, "loss": 0.0078, "step": 10602 }, { "epoch": 4.948891481913652, "grad_norm": 0.5546875, "learning_rate": 0.00010164335608935869, "loss": 0.011, "step": 10603 }, { "epoch": 4.949358226371062, "grad_norm": 0.482421875, "learning_rate": 0.00010162868457049286, "loss": 0.0048, "step": 10604 }, { "epoch": 4.949824970828471, "grad_norm": 0.412109375, "learning_rate": 0.00010161401301655964, "loss": 0.0066, "step": 10605 }, { "epoch": 4.950291715285881, "grad_norm": 0.162109375, "learning_rate": 0.00010159934142787493, "loss": 0.0022, "step": 10606 }, { "epoch": 4.950758459743291, "grad_norm": 0.390625, "learning_rate": 0.00010158466980475461, "loss": 0.0069, "step": 10607 }, { "epoch": 4.9512252042007, "grad_norm": 0.4453125, "learning_rate": 0.00010156999814751462, "loss": 0.0067, "step": 10608 }, { "epoch": 4.95169194865811, "grad_norm": 0.671875, "learning_rate": 0.00010155532645647081, "loss": 0.0098, "step": 10609 }, { "epoch": 4.9521586931155195, "grad_norm": 0.5546875, "learning_rate": 0.0001015406547319391, "loss": 0.0084, "step": 10610 }, { "epoch": 4.952625437572928, "grad_norm": 0.4921875, "learning_rate": 0.00010152598297423539, "loss": 0.0082, "step": 10611 }, { "epoch": 4.953092182030338, "grad_norm": 0.51171875, "learning_rate": 0.00010151131118367556, "loss": 0.0081, "step": 10612 }, { "epoch": 4.953558926487748, "grad_norm": 0.33203125, "learning_rate": 0.00010149663936057552, "loss": 0.0084, "step": 10613 }, { "epoch": 4.954025670945158, "grad_norm": 0.376953125, "learning_rate": 0.00010148196750525118, "loss": 0.0053, "step": 10614 }, { "epoch": 4.954492415402568, "grad_norm": 0.462890625, "learning_rate": 0.00010146729561801844, "loss": 0.0086, "step": 10615 }, { "epoch": 4.9549591598599765, "grad_norm": 0.357421875, "learning_rate": 0.00010145262369919317, "loss": 0.0046, "step": 10616 }, { "epoch": 4.955425904317386, "grad_norm": 0.39453125, "learning_rate": 0.00010143795174909135, "loss": 0.0052, "step": 10617 }, { "epoch": 4.955892648774796, "grad_norm": 0.28125, "learning_rate": 0.0001014232797680288, "loss": 0.0055, "step": 10618 }, { "epoch": 4.956359393232205, "grad_norm": 0.53125, "learning_rate": 0.00010140860775632147, "loss": 0.0071, "step": 10619 }, { "epoch": 4.956826137689615, "grad_norm": 0.29296875, "learning_rate": 0.00010139393571428524, "loss": 0.0069, "step": 10620 }, { "epoch": 4.957292882147025, "grad_norm": 0.64453125, "learning_rate": 0.00010137926364223605, "loss": 0.0073, "step": 10621 }, { "epoch": 4.957759626604434, "grad_norm": 0.310546875, "learning_rate": 0.00010136459154048976, "loss": 0.0045, "step": 10622 }, { "epoch": 4.958226371061843, "grad_norm": 0.52734375, "learning_rate": 0.00010134991940936229, "loss": 0.0118, "step": 10623 }, { "epoch": 4.958693115519253, "grad_norm": 0.390625, "learning_rate": 0.00010133524724916959, "loss": 0.0088, "step": 10624 }, { "epoch": 4.959159859976663, "grad_norm": 0.69921875, "learning_rate": 0.00010132057506022752, "loss": 0.0127, "step": 10625 }, { "epoch": 4.959626604434073, "grad_norm": 0.48828125, "learning_rate": 0.000101305902842852, "loss": 0.0056, "step": 10626 }, { "epoch": 4.960093348891482, "grad_norm": 0.3125, "learning_rate": 0.00010129123059735895, "loss": 0.0038, "step": 10627 }, { "epoch": 4.960560093348891, "grad_norm": 0.58203125, "learning_rate": 0.00010127655832406428, "loss": 0.01, "step": 10628 }, { "epoch": 4.961026837806301, "grad_norm": 0.439453125, "learning_rate": 0.00010126188602328385, "loss": 0.0087, "step": 10629 }, { "epoch": 4.961493582263711, "grad_norm": 0.494140625, "learning_rate": 0.00010124721369533367, "loss": 0.0084, "step": 10630 }, { "epoch": 4.96196032672112, "grad_norm": 0.3359375, "learning_rate": 0.00010123254134052957, "loss": 0.0036, "step": 10631 }, { "epoch": 4.96242707117853, "grad_norm": 0.404296875, "learning_rate": 0.0001012178689591875, "loss": 0.0037, "step": 10632 }, { "epoch": 4.9628938156359395, "grad_norm": 0.390625, "learning_rate": 0.00010120319655162335, "loss": 0.0055, "step": 10633 }, { "epoch": 4.963360560093349, "grad_norm": 0.314453125, "learning_rate": 0.00010118852411815305, "loss": 0.0041, "step": 10634 }, { "epoch": 4.963827304550758, "grad_norm": 0.453125, "learning_rate": 0.0001011738516590925, "loss": 0.0063, "step": 10635 }, { "epoch": 4.964294049008168, "grad_norm": 0.3828125, "learning_rate": 0.00010115917917475762, "loss": 0.0074, "step": 10636 }, { "epoch": 4.964760793465578, "grad_norm": 0.2470703125, "learning_rate": 0.00010114450666546434, "loss": 0.0038, "step": 10637 }, { "epoch": 4.9652275379229875, "grad_norm": 0.2421875, "learning_rate": 0.00010112983413152854, "loss": 0.0037, "step": 10638 }, { "epoch": 4.9656942823803965, "grad_norm": 0.52734375, "learning_rate": 0.00010111516157326614, "loss": 0.0062, "step": 10639 }, { "epoch": 4.966161026837806, "grad_norm": 0.361328125, "learning_rate": 0.00010110048899099314, "loss": 0.004, "step": 10640 }, { "epoch": 4.966627771295216, "grad_norm": 0.283203125, "learning_rate": 0.00010108581638502535, "loss": 0.0046, "step": 10641 }, { "epoch": 4.967094515752626, "grad_norm": 0.63671875, "learning_rate": 0.0001010711437556787, "loss": 0.0077, "step": 10642 }, { "epoch": 4.967561260210035, "grad_norm": 0.3984375, "learning_rate": 0.00010105647110326919, "loss": 0.0086, "step": 10643 }, { "epoch": 4.9680280046674445, "grad_norm": 0.408203125, "learning_rate": 0.00010104179842811266, "loss": 0.0079, "step": 10644 }, { "epoch": 4.968494749124854, "grad_norm": 0.462890625, "learning_rate": 0.00010102712573052504, "loss": 0.006, "step": 10645 }, { "epoch": 4.968961493582263, "grad_norm": 0.5390625, "learning_rate": 0.0001010124530108223, "loss": 0.0108, "step": 10646 }, { "epoch": 4.969428238039673, "grad_norm": 0.41015625, "learning_rate": 0.0001009977802693203, "loss": 0.0054, "step": 10647 }, { "epoch": 4.969894982497083, "grad_norm": 0.408203125, "learning_rate": 0.00010098310750633499, "loss": 0.0106, "step": 10648 }, { "epoch": 4.970361726954493, "grad_norm": 0.37890625, "learning_rate": 0.00010096843472218227, "loss": 0.0067, "step": 10649 }, { "epoch": 4.970828471411902, "grad_norm": 0.322265625, "learning_rate": 0.00010095376191717809, "loss": 0.0059, "step": 10650 }, { "epoch": 4.971295215869311, "grad_norm": 0.423828125, "learning_rate": 0.00010093908909163837, "loss": 0.0044, "step": 10651 }, { "epoch": 4.971761960326721, "grad_norm": 0.58203125, "learning_rate": 0.00010092441624587899, "loss": 0.0092, "step": 10652 }, { "epoch": 4.972228704784131, "grad_norm": 0.41015625, "learning_rate": 0.00010090974338021591, "loss": 0.0027, "step": 10653 }, { "epoch": 4.97269544924154, "grad_norm": 0.37109375, "learning_rate": 0.00010089507049496505, "loss": 0.0051, "step": 10654 }, { "epoch": 4.97316219369895, "grad_norm": 0.4765625, "learning_rate": 0.00010088039759044235, "loss": 0.0059, "step": 10655 }, { "epoch": 4.973628938156359, "grad_norm": 0.51171875, "learning_rate": 0.00010086572466696369, "loss": 0.0079, "step": 10656 }, { "epoch": 4.974095682613769, "grad_norm": 0.609375, "learning_rate": 0.00010085105172484503, "loss": 0.0093, "step": 10657 }, { "epoch": 4.974562427071179, "grad_norm": 0.40234375, "learning_rate": 0.00010083637876440226, "loss": 0.0055, "step": 10658 }, { "epoch": 4.975029171528588, "grad_norm": 0.39453125, "learning_rate": 0.00010082170578595136, "loss": 0.0045, "step": 10659 }, { "epoch": 4.975495915985998, "grad_norm": 0.5546875, "learning_rate": 0.00010080703278980821, "loss": 0.0098, "step": 10660 }, { "epoch": 4.9759626604434075, "grad_norm": 0.474609375, "learning_rate": 0.00010079235977628874, "loss": 0.0053, "step": 10661 }, { "epoch": 4.976429404900816, "grad_norm": 0.34375, "learning_rate": 0.00010077768674570887, "loss": 0.006, "step": 10662 }, { "epoch": 4.976896149358226, "grad_norm": 0.390625, "learning_rate": 0.00010076301369838457, "loss": 0.0067, "step": 10663 }, { "epoch": 4.977362893815636, "grad_norm": 0.37109375, "learning_rate": 0.00010074834063463173, "loss": 0.0064, "step": 10664 }, { "epoch": 4.977829638273046, "grad_norm": 0.306640625, "learning_rate": 0.00010073366755476628, "loss": 0.0042, "step": 10665 }, { "epoch": 4.978296382730455, "grad_norm": 0.232421875, "learning_rate": 0.00010071899445910416, "loss": 0.0027, "step": 10666 }, { "epoch": 4.9787631271878645, "grad_norm": 0.546875, "learning_rate": 0.00010070432134796128, "loss": 0.012, "step": 10667 }, { "epoch": 4.979229871645274, "grad_norm": 0.306640625, "learning_rate": 0.00010068964822165358, "loss": 0.0048, "step": 10668 }, { "epoch": 4.979696616102684, "grad_norm": 0.443359375, "learning_rate": 0.00010067497508049701, "loss": 0.0098, "step": 10669 }, { "epoch": 4.980163360560093, "grad_norm": 0.443359375, "learning_rate": 0.00010066030192480746, "loss": 0.0041, "step": 10670 }, { "epoch": 4.980630105017503, "grad_norm": 0.4375, "learning_rate": 0.00010064562875490088, "loss": 0.0043, "step": 10671 }, { "epoch": 4.981096849474913, "grad_norm": 0.41796875, "learning_rate": 0.00010063095557109319, "loss": 0.0056, "step": 10672 }, { "epoch": 4.981563593932322, "grad_norm": 0.55859375, "learning_rate": 0.00010061628237370035, "loss": 0.0081, "step": 10673 }, { "epoch": 4.982030338389731, "grad_norm": 0.404296875, "learning_rate": 0.00010060160916303824, "loss": 0.0076, "step": 10674 }, { "epoch": 4.982497082847141, "grad_norm": 0.55859375, "learning_rate": 0.00010058693593942284, "loss": 0.0066, "step": 10675 }, { "epoch": 4.982963827304551, "grad_norm": 0.703125, "learning_rate": 0.00010057226270317003, "loss": 0.0071, "step": 10676 }, { "epoch": 4.983430571761961, "grad_norm": 0.5078125, "learning_rate": 0.00010055758945459582, "loss": 0.0099, "step": 10677 }, { "epoch": 4.98389731621937, "grad_norm": 0.50390625, "learning_rate": 0.00010054291619401604, "loss": 0.0049, "step": 10678 }, { "epoch": 4.984364060676779, "grad_norm": 0.2392578125, "learning_rate": 0.00010052824292174668, "loss": 0.0036, "step": 10679 }, { "epoch": 4.984830805134189, "grad_norm": 0.3828125, "learning_rate": 0.0001005135696381037, "loss": 0.0069, "step": 10680 }, { "epoch": 4.985297549591599, "grad_norm": 0.33203125, "learning_rate": 0.00010049889634340294, "loss": 0.0035, "step": 10681 }, { "epoch": 4.985764294049008, "grad_norm": 0.416015625, "learning_rate": 0.00010048422303796044, "loss": 0.0096, "step": 10682 }, { "epoch": 4.986231038506418, "grad_norm": 0.369140625, "learning_rate": 0.00010046954972209206, "loss": 0.0036, "step": 10683 }, { "epoch": 4.9866977829638275, "grad_norm": 0.5390625, "learning_rate": 0.00010045487639611373, "loss": 0.0096, "step": 10684 }, { "epoch": 4.987164527421237, "grad_norm": 0.353515625, "learning_rate": 0.00010044020306034143, "loss": 0.0052, "step": 10685 }, { "epoch": 4.987631271878646, "grad_norm": 0.7109375, "learning_rate": 0.00010042552971509107, "loss": 0.006, "step": 10686 }, { "epoch": 4.988098016336056, "grad_norm": 0.408203125, "learning_rate": 0.0001004108563606786, "loss": 0.0056, "step": 10687 }, { "epoch": 4.988564760793466, "grad_norm": 0.16015625, "learning_rate": 0.00010039618299741989, "loss": 0.0019, "step": 10688 }, { "epoch": 4.989031505250875, "grad_norm": 0.30078125, "learning_rate": 0.00010038150962563095, "loss": 0.0104, "step": 10689 }, { "epoch": 4.9894982497082845, "grad_norm": 0.44140625, "learning_rate": 0.00010036683624562771, "loss": 0.0059, "step": 10690 }, { "epoch": 4.989964994165694, "grad_norm": 0.41796875, "learning_rate": 0.00010035216285772602, "loss": 0.0111, "step": 10691 }, { "epoch": 4.990431738623104, "grad_norm": 0.478515625, "learning_rate": 0.00010033748946224191, "loss": 0.0077, "step": 10692 }, { "epoch": 4.990898483080514, "grad_norm": 0.34375, "learning_rate": 0.00010032281605949127, "loss": 0.005, "step": 10693 }, { "epoch": 4.991365227537923, "grad_norm": 0.5, "learning_rate": 0.00010030814264979003, "loss": 0.0047, "step": 10694 }, { "epoch": 4.991831971995333, "grad_norm": 0.181640625, "learning_rate": 0.00010029346923345415, "loss": 0.0019, "step": 10695 }, { "epoch": 4.992298716452742, "grad_norm": 0.427734375, "learning_rate": 0.00010027879581079955, "loss": 0.0038, "step": 10696 }, { "epoch": 4.992765460910151, "grad_norm": 0.298828125, "learning_rate": 0.00010026412238214218, "loss": 0.0032, "step": 10697 }, { "epoch": 4.993232205367561, "grad_norm": 0.396484375, "learning_rate": 0.00010024944894779796, "loss": 0.0051, "step": 10698 }, { "epoch": 4.993698949824971, "grad_norm": 0.42578125, "learning_rate": 0.0001002347755080828, "loss": 0.0057, "step": 10699 }, { "epoch": 4.994165694282381, "grad_norm": 0.357421875, "learning_rate": 0.00010022010206331269, "loss": 0.0042, "step": 10700 }, { "epoch": 4.9946324387397905, "grad_norm": 0.1943359375, "learning_rate": 0.00010020542861380351, "loss": 0.0023, "step": 10701 }, { "epoch": 4.995099183197199, "grad_norm": 0.263671875, "learning_rate": 0.00010019075515987127, "loss": 0.0051, "step": 10702 }, { "epoch": 4.995565927654609, "grad_norm": 0.29296875, "learning_rate": 0.00010017608170183181, "loss": 0.0036, "step": 10703 }, { "epoch": 4.996032672112019, "grad_norm": 0.375, "learning_rate": 0.00010016140824000115, "loss": 0.0052, "step": 10704 }, { "epoch": 4.996499416569428, "grad_norm": 0.3125, "learning_rate": 0.00010014673477469519, "loss": 0.004, "step": 10705 }, { "epoch": 4.996966161026838, "grad_norm": 0.52734375, "learning_rate": 0.00010013206130622988, "loss": 0.0082, "step": 10706 }, { "epoch": 4.9974329054842475, "grad_norm": 0.65234375, "learning_rate": 0.00010011738783492108, "loss": 0.0072, "step": 10707 }, { "epoch": 4.997899649941657, "grad_norm": 0.412109375, "learning_rate": 0.00010010271436108485, "loss": 0.0078, "step": 10708 }, { "epoch": 4.998366394399066, "grad_norm": 0.51171875, "learning_rate": 0.00010008804088503705, "loss": 0.0052, "step": 10709 }, { "epoch": 4.998833138856476, "grad_norm": 0.400390625, "learning_rate": 0.00010007336740709362, "loss": 0.0027, "step": 10710 }, { "epoch": 4.999299883313886, "grad_norm": 0.375, "learning_rate": 0.00010005869392757053, "loss": 0.0029, "step": 10711 }, { "epoch": 4.9997666277712955, "grad_norm": 0.353515625, "learning_rate": 0.00010004402044678368, "loss": 0.0052, "step": 10712 }, { "epoch": 5.0002333722287045, "grad_norm": 0.2275390625, "learning_rate": 0.00010002934696504902, "loss": 0.0025, "step": 10713 }, { "epoch": 5.000700116686114, "grad_norm": 0.1669921875, "learning_rate": 0.00010001467348268247, "loss": 0.0013, "step": 10714 }, { "epoch": 5.001166861143524, "grad_norm": 0.36328125, "learning_rate": 0.0001, "loss": 0.0041, "step": 10715 }, { "epoch": 5.001633605600934, "grad_norm": 0.361328125, "learning_rate": 9.998532651731755e-05, "loss": 0.0046, "step": 10716 }, { "epoch": 5.002100350058343, "grad_norm": 0.318359375, "learning_rate": 9.9970653034951e-05, "loss": 0.0029, "step": 10717 }, { "epoch": 5.0025670945157525, "grad_norm": 0.2275390625, "learning_rate": 9.995597955321635e-05, "loss": 0.0041, "step": 10718 }, { "epoch": 5.003033838973162, "grad_norm": 0.41796875, "learning_rate": 9.994130607242951e-05, "loss": 0.0023, "step": 10719 }, { "epoch": 5.003500583430572, "grad_norm": 0.3359375, "learning_rate": 9.992663259290641e-05, "loss": 0.0052, "step": 10720 }, { "epoch": 5.003500583430572, "eval_loss": 2.0806281566619873, "eval_runtime": 57.6303, "eval_samples_per_second": 31.303, "eval_steps_per_second": 3.922, "step": 10720 }, { "epoch": 5.003967327887981, "grad_norm": 0.42578125, "learning_rate": 9.991195911496297e-05, "loss": 0.0092, "step": 10721 }, { "epoch": 5.004434072345391, "grad_norm": 0.46875, "learning_rate": 9.989728563891517e-05, "loss": 0.0072, "step": 10722 }, { "epoch": 5.004900816802801, "grad_norm": 0.58984375, "learning_rate": 9.988261216507894e-05, "loss": 0.0107, "step": 10723 }, { "epoch": 5.00536756126021, "grad_norm": 0.283203125, "learning_rate": 9.986793869377016e-05, "loss": 0.0038, "step": 10724 }, { "epoch": 5.005834305717619, "grad_norm": 0.251953125, "learning_rate": 9.985326522530482e-05, "loss": 0.0023, "step": 10725 }, { "epoch": 5.006301050175029, "grad_norm": 0.3671875, "learning_rate": 9.983859175999887e-05, "loss": 0.0035, "step": 10726 }, { "epoch": 5.006767794632439, "grad_norm": 0.294921875, "learning_rate": 9.982391829816817e-05, "loss": 0.0021, "step": 10727 }, { "epoch": 5.007234539089849, "grad_norm": 0.2470703125, "learning_rate": 9.980924484012874e-05, "loss": 0.0019, "step": 10728 }, { "epoch": 5.007701283547258, "grad_norm": 0.400390625, "learning_rate": 9.97945713861965e-05, "loss": 0.0064, "step": 10729 }, { "epoch": 5.008168028004667, "grad_norm": 0.1640625, "learning_rate": 9.977989793668733e-05, "loss": 0.0018, "step": 10730 }, { "epoch": 5.008634772462077, "grad_norm": 0.478515625, "learning_rate": 9.97652244919172e-05, "loss": 0.0088, "step": 10731 }, { "epoch": 5.009101516919487, "grad_norm": 0.33984375, "learning_rate": 9.975055105220205e-05, "loss": 0.0041, "step": 10732 }, { "epoch": 5.009568261376896, "grad_norm": 0.1494140625, "learning_rate": 9.973587761785783e-05, "loss": 0.0017, "step": 10733 }, { "epoch": 5.010035005834306, "grad_norm": 0.1904296875, "learning_rate": 9.972120418920044e-05, "loss": 0.0019, "step": 10734 }, { "epoch": 5.0105017502917155, "grad_norm": 0.166015625, "learning_rate": 9.970653076654585e-05, "loss": 0.0012, "step": 10735 }, { "epoch": 5.010968494749124, "grad_norm": 0.240234375, "learning_rate": 9.969185735020997e-05, "loss": 0.002, "step": 10736 }, { "epoch": 5.011435239206534, "grad_norm": 0.1748046875, "learning_rate": 9.967718394050873e-05, "loss": 0.0018, "step": 10737 }, { "epoch": 5.011901983663944, "grad_norm": 0.5390625, "learning_rate": 9.966251053775809e-05, "loss": 0.0108, "step": 10738 }, { "epoch": 5.012368728121354, "grad_norm": 0.37890625, "learning_rate": 9.9647837142274e-05, "loss": 0.0037, "step": 10739 }, { "epoch": 5.012835472578763, "grad_norm": 0.330078125, "learning_rate": 9.963316375437235e-05, "loss": 0.0024, "step": 10740 }, { "epoch": 5.0133022170361725, "grad_norm": 0.4921875, "learning_rate": 9.961849037436909e-05, "loss": 0.0123, "step": 10741 }, { "epoch": 5.013768961493582, "grad_norm": 0.236328125, "learning_rate": 9.960381700258015e-05, "loss": 0.0043, "step": 10742 }, { "epoch": 5.014235705950992, "grad_norm": 0.21875, "learning_rate": 9.958914363932145e-05, "loss": 0.0015, "step": 10743 }, { "epoch": 5.014702450408401, "grad_norm": 0.5, "learning_rate": 9.957447028490898e-05, "loss": 0.0077, "step": 10744 }, { "epoch": 5.015169194865811, "grad_norm": 0.16796875, "learning_rate": 9.95597969396586e-05, "loss": 0.0034, "step": 10745 }, { "epoch": 5.015635939323221, "grad_norm": 0.310546875, "learning_rate": 9.954512360388629e-05, "loss": 0.0041, "step": 10746 }, { "epoch": 5.01610268378063, "grad_norm": 0.1484375, "learning_rate": 9.953045027790798e-05, "loss": 0.0016, "step": 10747 }, { "epoch": 5.016569428238039, "grad_norm": 0.42578125, "learning_rate": 9.951577696203961e-05, "loss": 0.0075, "step": 10748 }, { "epoch": 5.017036172695449, "grad_norm": 0.42578125, "learning_rate": 9.950110365659707e-05, "loss": 0.0042, "step": 10749 }, { "epoch": 5.017502917152859, "grad_norm": 0.458984375, "learning_rate": 9.948643036189634e-05, "loss": 0.0055, "step": 10750 }, { "epoch": 5.017969661610269, "grad_norm": 0.193359375, "learning_rate": 9.947175707825334e-05, "loss": 0.0022, "step": 10751 }, { "epoch": 5.018436406067678, "grad_norm": 0.2890625, "learning_rate": 9.9457083805984e-05, "loss": 0.0025, "step": 10752 }, { "epoch": 5.018903150525087, "grad_norm": 0.30078125, "learning_rate": 9.944241054540422e-05, "loss": 0.0026, "step": 10753 }, { "epoch": 5.019369894982497, "grad_norm": 0.5, "learning_rate": 9.942773729682998e-05, "loss": 0.0037, "step": 10754 }, { "epoch": 5.019836639439907, "grad_norm": 0.13671875, "learning_rate": 9.94130640605772e-05, "loss": 0.0014, "step": 10755 }, { "epoch": 5.020303383897316, "grad_norm": 0.328125, "learning_rate": 9.939839083696177e-05, "loss": 0.0038, "step": 10756 }, { "epoch": 5.020770128354726, "grad_norm": 0.404296875, "learning_rate": 9.938371762629967e-05, "loss": 0.0053, "step": 10757 }, { "epoch": 5.0212368728121355, "grad_norm": 0.28515625, "learning_rate": 9.936904442890682e-05, "loss": 0.0047, "step": 10758 }, { "epoch": 5.021703617269545, "grad_norm": 0.2578125, "learning_rate": 9.935437124509913e-05, "loss": 0.0026, "step": 10759 }, { "epoch": 5.022170361726954, "grad_norm": 0.251953125, "learning_rate": 9.933969807519256e-05, "loss": 0.0039, "step": 10760 }, { "epoch": 5.022637106184364, "grad_norm": 0.671875, "learning_rate": 9.932502491950301e-05, "loss": 0.0066, "step": 10761 }, { "epoch": 5.023103850641774, "grad_norm": 0.328125, "learning_rate": 9.931035177834644e-05, "loss": 0.007, "step": 10762 }, { "epoch": 5.023570595099184, "grad_norm": 0.24609375, "learning_rate": 9.929567865203873e-05, "loss": 0.0067, "step": 10763 }, { "epoch": 5.0240373395565925, "grad_norm": 0.322265625, "learning_rate": 9.928100554089587e-05, "loss": 0.0025, "step": 10764 }, { "epoch": 5.024504084014002, "grad_norm": 0.0986328125, "learning_rate": 9.926633244523376e-05, "loss": 0.0013, "step": 10765 }, { "epoch": 5.024970828471412, "grad_norm": 0.1650390625, "learning_rate": 9.925165936536828e-05, "loss": 0.0016, "step": 10766 }, { "epoch": 5.025437572928822, "grad_norm": 0.294921875, "learning_rate": 9.923698630161545e-05, "loss": 0.007, "step": 10767 }, { "epoch": 5.025904317386231, "grad_norm": 0.10791015625, "learning_rate": 9.922231325429115e-05, "loss": 0.0012, "step": 10768 }, { "epoch": 5.026371061843641, "grad_norm": 0.30078125, "learning_rate": 9.920764022371129e-05, "loss": 0.0022, "step": 10769 }, { "epoch": 5.02683780630105, "grad_norm": 0.283203125, "learning_rate": 9.919296721019183e-05, "loss": 0.0026, "step": 10770 }, { "epoch": 5.02730455075846, "grad_norm": 0.4140625, "learning_rate": 9.917829421404868e-05, "loss": 0.0056, "step": 10771 }, { "epoch": 5.027771295215869, "grad_norm": 0.462890625, "learning_rate": 9.916362123559775e-05, "loss": 0.0075, "step": 10772 }, { "epoch": 5.028238039673279, "grad_norm": 0.33984375, "learning_rate": 9.914894827515498e-05, "loss": 0.0021, "step": 10773 }, { "epoch": 5.028704784130689, "grad_norm": 0.37109375, "learning_rate": 9.913427533303634e-05, "loss": 0.0035, "step": 10774 }, { "epoch": 5.029171528588098, "grad_norm": 0.318359375, "learning_rate": 9.911960240955767e-05, "loss": 0.0024, "step": 10775 }, { "epoch": 5.029638273045507, "grad_norm": 0.1728515625, "learning_rate": 9.910492950503494e-05, "loss": 0.0019, "step": 10776 }, { "epoch": 5.030105017502917, "grad_norm": 0.2314453125, "learning_rate": 9.90902566197841e-05, "loss": 0.0023, "step": 10777 }, { "epoch": 5.030571761960327, "grad_norm": 0.35546875, "learning_rate": 9.907558375412102e-05, "loss": 0.007, "step": 10778 }, { "epoch": 5.031038506417736, "grad_norm": 0.15234375, "learning_rate": 9.906091090836167e-05, "loss": 0.0015, "step": 10779 }, { "epoch": 5.031505250875146, "grad_norm": 0.33984375, "learning_rate": 9.904623808282192e-05, "loss": 0.0054, "step": 10780 }, { "epoch": 5.0319719953325555, "grad_norm": 0.2216796875, "learning_rate": 9.903156527781774e-05, "loss": 0.0016, "step": 10781 }, { "epoch": 5.032438739789965, "grad_norm": 0.294921875, "learning_rate": 9.901689249366503e-05, "loss": 0.0053, "step": 10782 }, { "epoch": 5.032905484247374, "grad_norm": 0.353515625, "learning_rate": 9.900221973067971e-05, "loss": 0.0045, "step": 10783 }, { "epoch": 5.033372228704784, "grad_norm": 0.396484375, "learning_rate": 9.898754698917771e-05, "loss": 0.0075, "step": 10784 }, { "epoch": 5.033838973162194, "grad_norm": 0.546875, "learning_rate": 9.897287426947496e-05, "loss": 0.0084, "step": 10785 }, { "epoch": 5.0343057176196035, "grad_norm": 0.4296875, "learning_rate": 9.895820157188734e-05, "loss": 0.0048, "step": 10786 }, { "epoch": 5.0347724620770125, "grad_norm": 0.3125, "learning_rate": 9.894352889673086e-05, "loss": 0.0025, "step": 10787 }, { "epoch": 5.035239206534422, "grad_norm": 0.55859375, "learning_rate": 9.892885624432131e-05, "loss": 0.0039, "step": 10788 }, { "epoch": 5.035705950991832, "grad_norm": 0.6171875, "learning_rate": 9.89141836149747e-05, "loss": 0.0059, "step": 10789 }, { "epoch": 5.036172695449242, "grad_norm": 0.462890625, "learning_rate": 9.889951100900692e-05, "loss": 0.0105, "step": 10790 }, { "epoch": 5.036639439906651, "grad_norm": 0.2431640625, "learning_rate": 9.888483842673388e-05, "loss": 0.0023, "step": 10791 }, { "epoch": 5.0371061843640605, "grad_norm": 0.2265625, "learning_rate": 9.88701658684715e-05, "loss": 0.0024, "step": 10792 }, { "epoch": 5.03757292882147, "grad_norm": 0.4375, "learning_rate": 9.885549333453571e-05, "loss": 0.0057, "step": 10793 }, { "epoch": 5.03803967327888, "grad_norm": 0.1416015625, "learning_rate": 9.884082082524243e-05, "loss": 0.0045, "step": 10794 }, { "epoch": 5.038506417736289, "grad_norm": 0.169921875, "learning_rate": 9.882614834090753e-05, "loss": 0.0027, "step": 10795 }, { "epoch": 5.038973162193699, "grad_norm": 0.50390625, "learning_rate": 9.881147588184698e-05, "loss": 0.0056, "step": 10796 }, { "epoch": 5.039439906651109, "grad_norm": 0.0849609375, "learning_rate": 9.879680344837668e-05, "loss": 0.0011, "step": 10797 }, { "epoch": 5.039906651108518, "grad_norm": 0.337890625, "learning_rate": 9.878213104081252e-05, "loss": 0.0029, "step": 10798 }, { "epoch": 5.040373395565927, "grad_norm": 0.34765625, "learning_rate": 9.876745865947044e-05, "loss": 0.0022, "step": 10799 }, { "epoch": 5.040840140023337, "grad_norm": 0.291015625, "learning_rate": 9.875278630466635e-05, "loss": 0.002, "step": 10800 }, { "epoch": 5.041306884480747, "grad_norm": 0.42578125, "learning_rate": 9.873811397671616e-05, "loss": 0.0065, "step": 10801 }, { "epoch": 5.041773628938157, "grad_norm": 0.369140625, "learning_rate": 9.872344167593574e-05, "loss": 0.0052, "step": 10802 }, { "epoch": 5.042240373395566, "grad_norm": 0.359375, "learning_rate": 9.870876940264106e-05, "loss": 0.0035, "step": 10803 }, { "epoch": 5.042707117852975, "grad_norm": 0.1748046875, "learning_rate": 9.869409715714802e-05, "loss": 0.0023, "step": 10804 }, { "epoch": 5.043173862310385, "grad_norm": 0.224609375, "learning_rate": 9.867942493977249e-05, "loss": 0.0016, "step": 10805 }, { "epoch": 5.043640606767795, "grad_norm": 0.3359375, "learning_rate": 9.866475275083043e-05, "loss": 0.004, "step": 10806 }, { "epoch": 5.044107351225204, "grad_norm": 0.482421875, "learning_rate": 9.865008059063773e-05, "loss": 0.0047, "step": 10807 }, { "epoch": 5.044574095682614, "grad_norm": 0.3046875, "learning_rate": 9.863540845951025e-05, "loss": 0.0049, "step": 10808 }, { "epoch": 5.0450408401400235, "grad_norm": 0.306640625, "learning_rate": 9.862073635776398e-05, "loss": 0.0068, "step": 10809 }, { "epoch": 5.045507584597433, "grad_norm": 0.259765625, "learning_rate": 9.860606428571478e-05, "loss": 0.0111, "step": 10810 }, { "epoch": 5.045974329054842, "grad_norm": 0.27734375, "learning_rate": 9.859139224367856e-05, "loss": 0.0025, "step": 10811 }, { "epoch": 5.046441073512252, "grad_norm": 0.29296875, "learning_rate": 9.85767202319712e-05, "loss": 0.0033, "step": 10812 }, { "epoch": 5.046907817969662, "grad_norm": 0.169921875, "learning_rate": 9.856204825090868e-05, "loss": 0.0016, "step": 10813 }, { "epoch": 5.047374562427072, "grad_norm": 0.28125, "learning_rate": 9.854737630080684e-05, "loss": 0.0024, "step": 10814 }, { "epoch": 5.0478413068844805, "grad_norm": 0.359375, "learning_rate": 9.853270438198157e-05, "loss": 0.0041, "step": 10815 }, { "epoch": 5.04830805134189, "grad_norm": 0.1494140625, "learning_rate": 9.851803249474883e-05, "loss": 0.0012, "step": 10816 }, { "epoch": 5.0487747957993, "grad_norm": 0.380859375, "learning_rate": 9.850336063942451e-05, "loss": 0.0025, "step": 10817 }, { "epoch": 5.049241540256709, "grad_norm": 0.2734375, "learning_rate": 9.848868881632446e-05, "loss": 0.0027, "step": 10818 }, { "epoch": 5.049708284714119, "grad_norm": 0.326171875, "learning_rate": 9.847401702576465e-05, "loss": 0.0031, "step": 10819 }, { "epoch": 5.050175029171529, "grad_norm": 0.267578125, "learning_rate": 9.845934526806093e-05, "loss": 0.0028, "step": 10820 }, { "epoch": 5.050641773628938, "grad_norm": 0.2109375, "learning_rate": 9.844467354352921e-05, "loss": 0.0025, "step": 10821 }, { "epoch": 5.051108518086347, "grad_norm": 0.466796875, "learning_rate": 9.84300018524854e-05, "loss": 0.0075, "step": 10822 }, { "epoch": 5.051575262543757, "grad_norm": 0.32421875, "learning_rate": 9.84153301952454e-05, "loss": 0.0027, "step": 10823 }, { "epoch": 5.052042007001167, "grad_norm": 0.458984375, "learning_rate": 9.840065857212509e-05, "loss": 0.0039, "step": 10824 }, { "epoch": 5.052508751458577, "grad_norm": 0.056640625, "learning_rate": 9.838598698344036e-05, "loss": 0.0007, "step": 10825 }, { "epoch": 5.052975495915986, "grad_norm": 0.330078125, "learning_rate": 9.837131542950715e-05, "loss": 0.0051, "step": 10826 }, { "epoch": 5.053442240373395, "grad_norm": 0.486328125, "learning_rate": 9.835664391064132e-05, "loss": 0.0077, "step": 10827 }, { "epoch": 5.053908984830805, "grad_norm": 0.10888671875, "learning_rate": 9.834197242715877e-05, "loss": 0.0014, "step": 10828 }, { "epoch": 5.054375729288215, "grad_norm": 0.10302734375, "learning_rate": 9.832730097937537e-05, "loss": 0.0012, "step": 10829 }, { "epoch": 5.054842473745624, "grad_norm": 0.251953125, "learning_rate": 9.831262956760706e-05, "loss": 0.0034, "step": 10830 }, { "epoch": 5.055309218203034, "grad_norm": 0.390625, "learning_rate": 9.82979581921697e-05, "loss": 0.0043, "step": 10831 }, { "epoch": 5.0557759626604435, "grad_norm": 0.248046875, "learning_rate": 9.828328685337919e-05, "loss": 0.0023, "step": 10832 }, { "epoch": 5.056242707117853, "grad_norm": 0.26953125, "learning_rate": 9.826861555155141e-05, "loss": 0.0026, "step": 10833 }, { "epoch": 5.056709451575262, "grad_norm": 0.41015625, "learning_rate": 9.825394428700228e-05, "loss": 0.0109, "step": 10834 }, { "epoch": 5.057176196032672, "grad_norm": 0.439453125, "learning_rate": 9.823927306004768e-05, "loss": 0.0044, "step": 10835 }, { "epoch": 5.057642940490082, "grad_norm": 0.1123046875, "learning_rate": 9.822460187100346e-05, "loss": 0.0011, "step": 10836 }, { "epoch": 5.058109684947492, "grad_norm": 0.400390625, "learning_rate": 9.820993072018551e-05, "loss": 0.0043, "step": 10837 }, { "epoch": 5.0585764294049005, "grad_norm": 0.2373046875, "learning_rate": 9.819525960790977e-05, "loss": 0.0021, "step": 10838 }, { "epoch": 5.05904317386231, "grad_norm": 0.408203125, "learning_rate": 9.81805885344921e-05, "loss": 0.0061, "step": 10839 }, { "epoch": 5.05950991831972, "grad_norm": 0.267578125, "learning_rate": 9.816591750024835e-05, "loss": 0.004, "step": 10840 }, { "epoch": 5.05997666277713, "grad_norm": 0.3125, "learning_rate": 9.815124650549442e-05, "loss": 0.003, "step": 10841 }, { "epoch": 5.060443407234539, "grad_norm": 0.3515625, "learning_rate": 9.813657555054622e-05, "loss": 0.004, "step": 10842 }, { "epoch": 5.060910151691949, "grad_norm": 0.390625, "learning_rate": 9.812190463571964e-05, "loss": 0.0028, "step": 10843 }, { "epoch": 5.061376896149358, "grad_norm": 0.203125, "learning_rate": 9.81072337613305e-05, "loss": 0.0021, "step": 10844 }, { "epoch": 5.061843640606768, "grad_norm": 0.1455078125, "learning_rate": 9.809256292769473e-05, "loss": 0.0048, "step": 10845 }, { "epoch": 5.062310385064177, "grad_norm": 0.271484375, "learning_rate": 9.807789213512821e-05, "loss": 0.0025, "step": 10846 }, { "epoch": 5.062777129521587, "grad_norm": 0.4375, "learning_rate": 9.806322138394678e-05, "loss": 0.0093, "step": 10847 }, { "epoch": 5.063243873978997, "grad_norm": 0.453125, "learning_rate": 9.804855067446636e-05, "loss": 0.0098, "step": 10848 }, { "epoch": 5.0637106184364065, "grad_norm": 0.33203125, "learning_rate": 9.803388000700282e-05, "loss": 0.0024, "step": 10849 }, { "epoch": 5.064177362893815, "grad_norm": 0.2177734375, "learning_rate": 9.801920938187201e-05, "loss": 0.0018, "step": 10850 }, { "epoch": 5.064644107351225, "grad_norm": 0.197265625, "learning_rate": 9.800453879938981e-05, "loss": 0.0049, "step": 10851 }, { "epoch": 5.065110851808635, "grad_norm": 0.53515625, "learning_rate": 9.798986825987212e-05, "loss": 0.0108, "step": 10852 }, { "epoch": 5.065577596266045, "grad_norm": 0.29296875, "learning_rate": 9.79751977636348e-05, "loss": 0.003, "step": 10853 }, { "epoch": 5.066044340723454, "grad_norm": 0.44140625, "learning_rate": 9.796052731099369e-05, "loss": 0.0037, "step": 10854 }, { "epoch": 5.0665110851808635, "grad_norm": 0.49609375, "learning_rate": 9.79458569022647e-05, "loss": 0.004, "step": 10855 }, { "epoch": 5.066977829638273, "grad_norm": 0.458984375, "learning_rate": 9.793118653776372e-05, "loss": 0.0065, "step": 10856 }, { "epoch": 5.067444574095683, "grad_norm": 0.23828125, "learning_rate": 9.791651621780655e-05, "loss": 0.0099, "step": 10857 }, { "epoch": 5.067911318553092, "grad_norm": 0.435546875, "learning_rate": 9.790184594270912e-05, "loss": 0.0039, "step": 10858 }, { "epoch": 5.068378063010502, "grad_norm": 0.2412109375, "learning_rate": 9.788717571278728e-05, "loss": 0.0014, "step": 10859 }, { "epoch": 5.0688448074679116, "grad_norm": 0.20703125, "learning_rate": 9.78725055283569e-05, "loss": 0.0012, "step": 10860 }, { "epoch": 5.0693115519253205, "grad_norm": 0.248046875, "learning_rate": 9.785783538973378e-05, "loss": 0.0052, "step": 10861 }, { "epoch": 5.06977829638273, "grad_norm": 0.3828125, "learning_rate": 9.78431652972339e-05, "loss": 0.0046, "step": 10862 }, { "epoch": 5.07024504084014, "grad_norm": 0.400390625, "learning_rate": 9.782849525117305e-05, "loss": 0.0051, "step": 10863 }, { "epoch": 5.07071178529755, "grad_norm": 0.263671875, "learning_rate": 9.781382525186708e-05, "loss": 0.0041, "step": 10864 }, { "epoch": 5.071178529754959, "grad_norm": 0.259765625, "learning_rate": 9.779915529963191e-05, "loss": 0.0025, "step": 10865 }, { "epoch": 5.0716452742123685, "grad_norm": 0.283203125, "learning_rate": 9.778448539478335e-05, "loss": 0.0036, "step": 10866 }, { "epoch": 5.072112018669778, "grad_norm": 0.55078125, "learning_rate": 9.776981553763728e-05, "loss": 0.0061, "step": 10867 }, { "epoch": 5.072578763127188, "grad_norm": 0.12109375, "learning_rate": 9.775514572850957e-05, "loss": 0.0012, "step": 10868 }, { "epoch": 5.073045507584597, "grad_norm": 0.134765625, "learning_rate": 9.774047596771606e-05, "loss": 0.0012, "step": 10869 }, { "epoch": 5.073512252042007, "grad_norm": 0.1640625, "learning_rate": 9.77258062555726e-05, "loss": 0.0013, "step": 10870 }, { "epoch": 5.073978996499417, "grad_norm": 0.427734375, "learning_rate": 9.771113659239504e-05, "loss": 0.0045, "step": 10871 }, { "epoch": 5.074445740956826, "grad_norm": 0.306640625, "learning_rate": 9.769646697849927e-05, "loss": 0.0019, "step": 10872 }, { "epoch": 5.074912485414235, "grad_norm": 0.1552734375, "learning_rate": 9.768179741420113e-05, "loss": 0.0014, "step": 10873 }, { "epoch": 5.075379229871645, "grad_norm": 0.62890625, "learning_rate": 9.766712789981644e-05, "loss": 0.0027, "step": 10874 }, { "epoch": 5.075845974329055, "grad_norm": 0.40234375, "learning_rate": 9.765245843566109e-05, "loss": 0.0058, "step": 10875 }, { "epoch": 5.076312718786465, "grad_norm": 0.1484375, "learning_rate": 9.76377890220509e-05, "loss": 0.0015, "step": 10876 }, { "epoch": 5.076779463243874, "grad_norm": 0.2421875, "learning_rate": 9.762311965930174e-05, "loss": 0.0023, "step": 10877 }, { "epoch": 5.077246207701283, "grad_norm": 0.15234375, "learning_rate": 9.760845034772946e-05, "loss": 0.0017, "step": 10878 }, { "epoch": 5.077712952158693, "grad_norm": 0.2021484375, "learning_rate": 9.75937810876499e-05, "loss": 0.0048, "step": 10879 }, { "epoch": 5.078179696616103, "grad_norm": 0.447265625, "learning_rate": 9.75791118793789e-05, "loss": 0.0092, "step": 10880 }, { "epoch": 5.078646441073512, "grad_norm": 0.08984375, "learning_rate": 9.756444272323229e-05, "loss": 0.0012, "step": 10881 }, { "epoch": 5.079113185530922, "grad_norm": 0.2734375, "learning_rate": 9.754977361952594e-05, "loss": 0.0033, "step": 10882 }, { "epoch": 5.0795799299883315, "grad_norm": 0.4765625, "learning_rate": 9.753510456857572e-05, "loss": 0.0048, "step": 10883 }, { "epoch": 5.080046674445741, "grad_norm": 0.2275390625, "learning_rate": 9.75204355706974e-05, "loss": 0.0027, "step": 10884 }, { "epoch": 5.08051341890315, "grad_norm": 0.35546875, "learning_rate": 9.750576662620686e-05, "loss": 0.0036, "step": 10885 }, { "epoch": 5.08098016336056, "grad_norm": 0.404296875, "learning_rate": 9.749109773541991e-05, "loss": 0.0059, "step": 10886 }, { "epoch": 5.08144690781797, "grad_norm": 0.265625, "learning_rate": 9.747642889865243e-05, "loss": 0.0036, "step": 10887 }, { "epoch": 5.08191365227538, "grad_norm": 0.291015625, "learning_rate": 9.746176011622023e-05, "loss": 0.0032, "step": 10888 }, { "epoch": 5.0823803967327885, "grad_norm": 0.193359375, "learning_rate": 9.744709138843914e-05, "loss": 0.0018, "step": 10889 }, { "epoch": 5.082847141190198, "grad_norm": 0.41796875, "learning_rate": 9.743242271562502e-05, "loss": 0.0037, "step": 10890 }, { "epoch": 5.083313885647608, "grad_norm": 0.326171875, "learning_rate": 9.74177540980937e-05, "loss": 0.0025, "step": 10891 }, { "epoch": 5.083780630105018, "grad_norm": 0.1669921875, "learning_rate": 9.740308553616097e-05, "loss": 0.0027, "step": 10892 }, { "epoch": 5.084247374562427, "grad_norm": 0.26171875, "learning_rate": 9.73884170301427e-05, "loss": 0.0019, "step": 10893 }, { "epoch": 5.084714119019837, "grad_norm": 0.2451171875, "learning_rate": 9.73737485803547e-05, "loss": 0.0016, "step": 10894 }, { "epoch": 5.085180863477246, "grad_norm": 0.6328125, "learning_rate": 9.735908018711284e-05, "loss": 0.0088, "step": 10895 }, { "epoch": 5.085647607934656, "grad_norm": 0.369140625, "learning_rate": 9.734441185073286e-05, "loss": 0.006, "step": 10896 }, { "epoch": 5.086114352392065, "grad_norm": 0.1669921875, "learning_rate": 9.732974357153069e-05, "loss": 0.0045, "step": 10897 }, { "epoch": 5.086581096849475, "grad_norm": 0.18359375, "learning_rate": 9.731507534982208e-05, "loss": 0.0011, "step": 10898 }, { "epoch": 5.087047841306885, "grad_norm": 0.12890625, "learning_rate": 9.73004071859229e-05, "loss": 0.0013, "step": 10899 }, { "epoch": 5.0875145857642945, "grad_norm": 0.453125, "learning_rate": 9.72857390801489e-05, "loss": 0.0082, "step": 10900 }, { "epoch": 5.087981330221703, "grad_norm": 0.306640625, "learning_rate": 9.727107103281599e-05, "loss": 0.0026, "step": 10901 }, { "epoch": 5.088448074679113, "grad_norm": 0.150390625, "learning_rate": 9.725640304423994e-05, "loss": 0.0017, "step": 10902 }, { "epoch": 5.088914819136523, "grad_norm": 0.42578125, "learning_rate": 9.724173511473657e-05, "loss": 0.0044, "step": 10903 }, { "epoch": 5.089381563593932, "grad_norm": 0.28515625, "learning_rate": 9.72270672446217e-05, "loss": 0.0021, "step": 10904 }, { "epoch": 5.089848308051342, "grad_norm": 0.0888671875, "learning_rate": 9.721239943421116e-05, "loss": 0.0012, "step": 10905 }, { "epoch": 5.0903150525087515, "grad_norm": 0.53515625, "learning_rate": 9.719773168382073e-05, "loss": 0.005, "step": 10906 }, { "epoch": 5.090781796966161, "grad_norm": 0.40625, "learning_rate": 9.71830639937663e-05, "loss": 0.0021, "step": 10907 }, { "epoch": 5.09124854142357, "grad_norm": 0.1416015625, "learning_rate": 9.716839636436357e-05, "loss": 0.0011, "step": 10908 }, { "epoch": 5.09171528588098, "grad_norm": 0.365234375, "learning_rate": 9.715372879592844e-05, "loss": 0.0033, "step": 10909 }, { "epoch": 5.09218203033839, "grad_norm": 0.08642578125, "learning_rate": 9.713906128877667e-05, "loss": 0.001, "step": 10910 }, { "epoch": 5.0926487747958, "grad_norm": 0.3984375, "learning_rate": 9.712439384322409e-05, "loss": 0.0145, "step": 10911 }, { "epoch": 5.0931155192532085, "grad_norm": 0.232421875, "learning_rate": 9.710972645958652e-05, "loss": 0.0029, "step": 10912 }, { "epoch": 5.093582263710618, "grad_norm": 0.427734375, "learning_rate": 9.709505913817971e-05, "loss": 0.0035, "step": 10913 }, { "epoch": 5.094049008168028, "grad_norm": 0.10400390625, "learning_rate": 9.708039187931953e-05, "loss": 0.0013, "step": 10914 }, { "epoch": 5.094515752625438, "grad_norm": 0.267578125, "learning_rate": 9.706572468332174e-05, "loss": 0.0021, "step": 10915 }, { "epoch": 5.094982497082847, "grad_norm": 0.279296875, "learning_rate": 9.705105755050214e-05, "loss": 0.0055, "step": 10916 }, { "epoch": 5.095449241540257, "grad_norm": 0.1884765625, "learning_rate": 9.703639048117657e-05, "loss": 0.0019, "step": 10917 }, { "epoch": 5.095915985997666, "grad_norm": 0.1181640625, "learning_rate": 9.70217234756608e-05, "loss": 0.001, "step": 10918 }, { "epoch": 5.096382730455076, "grad_norm": 0.326171875, "learning_rate": 9.70070565342706e-05, "loss": 0.0041, "step": 10919 }, { "epoch": 5.096849474912485, "grad_norm": 0.185546875, "learning_rate": 9.699238965732181e-05, "loss": 0.0014, "step": 10920 }, { "epoch": 5.097316219369895, "grad_norm": 0.208984375, "learning_rate": 9.697772284513022e-05, "loss": 0.0053, "step": 10921 }, { "epoch": 5.097782963827305, "grad_norm": 0.146484375, "learning_rate": 9.69630560980116e-05, "loss": 0.0012, "step": 10922 }, { "epoch": 5.0982497082847145, "grad_norm": 0.3671875, "learning_rate": 9.694838941628174e-05, "loss": 0.0032, "step": 10923 }, { "epoch": 5.098716452742123, "grad_norm": 0.053466796875, "learning_rate": 9.693372280025646e-05, "loss": 0.0006, "step": 10924 }, { "epoch": 5.099183197199533, "grad_norm": 0.314453125, "learning_rate": 9.691905625025151e-05, "loss": 0.0045, "step": 10925 }, { "epoch": 5.099649941656943, "grad_norm": 0.080078125, "learning_rate": 9.69043897665827e-05, "loss": 0.0033, "step": 10926 }, { "epoch": 5.100116686114353, "grad_norm": 0.23046875, "learning_rate": 9.688972334956583e-05, "loss": 0.0014, "step": 10927 }, { "epoch": 5.100583430571762, "grad_norm": 0.51171875, "learning_rate": 9.687505699951666e-05, "loss": 0.0081, "step": 10928 }, { "epoch": 5.1010501750291715, "grad_norm": 0.2265625, "learning_rate": 9.686039071675099e-05, "loss": 0.0014, "step": 10929 }, { "epoch": 5.101516919486581, "grad_norm": 0.76953125, "learning_rate": 9.684572450158457e-05, "loss": 0.0036, "step": 10930 }, { "epoch": 5.101983663943991, "grad_norm": 0.41796875, "learning_rate": 9.683105835433326e-05, "loss": 0.0092, "step": 10931 }, { "epoch": 5.1024504084014, "grad_norm": 0.185546875, "learning_rate": 9.681639227531271e-05, "loss": 0.0095, "step": 10932 }, { "epoch": 5.10291715285881, "grad_norm": 0.33203125, "learning_rate": 9.680172626483879e-05, "loss": 0.0034, "step": 10933 }, { "epoch": 5.1033838973162196, "grad_norm": 0.1328125, "learning_rate": 9.678706032322727e-05, "loss": 0.005, "step": 10934 }, { "epoch": 5.103850641773629, "grad_norm": 0.314453125, "learning_rate": 9.677239445079387e-05, "loss": 0.0024, "step": 10935 }, { "epoch": 5.104317386231038, "grad_norm": 0.412109375, "learning_rate": 9.675772864785442e-05, "loss": 0.0074, "step": 10936 }, { "epoch": 5.104784130688448, "grad_norm": 0.27734375, "learning_rate": 9.674306291472466e-05, "loss": 0.0021, "step": 10937 }, { "epoch": 5.105250875145858, "grad_norm": 0.255859375, "learning_rate": 9.672839725172037e-05, "loss": 0.0029, "step": 10938 }, { "epoch": 5.105717619603268, "grad_norm": 0.25390625, "learning_rate": 9.671373165915733e-05, "loss": 0.002, "step": 10939 }, { "epoch": 5.1061843640606766, "grad_norm": 0.337890625, "learning_rate": 9.669906613735128e-05, "loss": 0.004, "step": 10940 }, { "epoch": 5.106651108518086, "grad_norm": 0.357421875, "learning_rate": 9.668440068661801e-05, "loss": 0.0028, "step": 10941 }, { "epoch": 5.107117852975496, "grad_norm": 0.267578125, "learning_rate": 9.666973530727326e-05, "loss": 0.005, "step": 10942 }, { "epoch": 5.107584597432905, "grad_norm": 0.2265625, "learning_rate": 9.66550699996328e-05, "loss": 0.0019, "step": 10943 }, { "epoch": 5.108051341890315, "grad_norm": 0.251953125, "learning_rate": 9.664040476401241e-05, "loss": 0.0019, "step": 10944 }, { "epoch": 5.108518086347725, "grad_norm": 0.11962890625, "learning_rate": 9.662573960072782e-05, "loss": 0.0012, "step": 10945 }, { "epoch": 5.108984830805134, "grad_norm": 0.154296875, "learning_rate": 9.661107451009482e-05, "loss": 0.0034, "step": 10946 }, { "epoch": 5.109451575262543, "grad_norm": 0.2431640625, "learning_rate": 9.659640949242912e-05, "loss": 0.0017, "step": 10947 }, { "epoch": 5.109918319719953, "grad_norm": 0.4140625, "learning_rate": 9.658174454804651e-05, "loss": 0.0058, "step": 10948 }, { "epoch": 5.110385064177363, "grad_norm": 0.515625, "learning_rate": 9.656707967726272e-05, "loss": 0.0054, "step": 10949 }, { "epoch": 5.110851808634773, "grad_norm": 0.126953125, "learning_rate": 9.655241488039356e-05, "loss": 0.0037, "step": 10950 }, { "epoch": 5.111318553092182, "grad_norm": 0.451171875, "learning_rate": 9.65377501577547e-05, "loss": 0.0048, "step": 10951 }, { "epoch": 5.111785297549591, "grad_norm": 0.1728515625, "learning_rate": 9.652308550966189e-05, "loss": 0.0015, "step": 10952 }, { "epoch": 5.112252042007001, "grad_norm": 0.201171875, "learning_rate": 9.650842093643095e-05, "loss": 0.002, "step": 10953 }, { "epoch": 5.112718786464411, "grad_norm": 0.4375, "learning_rate": 9.649375643837758e-05, "loss": 0.003, "step": 10954 }, { "epoch": 5.11318553092182, "grad_norm": 0.4375, "learning_rate": 9.64790920158175e-05, "loss": 0.0069, "step": 10955 }, { "epoch": 5.11365227537923, "grad_norm": 0.3046875, "learning_rate": 9.646442766906649e-05, "loss": 0.0045, "step": 10956 }, { "epoch": 5.1141190198366395, "grad_norm": 0.162109375, "learning_rate": 9.644976339844027e-05, "loss": 0.0012, "step": 10957 }, { "epoch": 5.114585764294049, "grad_norm": 0.24609375, "learning_rate": 9.64350992042546e-05, "loss": 0.0029, "step": 10958 }, { "epoch": 5.115052508751458, "grad_norm": 0.3828125, "learning_rate": 9.642043508682517e-05, "loss": 0.0052, "step": 10959 }, { "epoch": 5.115519253208868, "grad_norm": 0.455078125, "learning_rate": 9.640577104646776e-05, "loss": 0.0065, "step": 10960 }, { "epoch": 5.115985997666278, "grad_norm": 0.29296875, "learning_rate": 9.63911070834981e-05, "loss": 0.0023, "step": 10961 }, { "epoch": 5.116452742123688, "grad_norm": 0.5703125, "learning_rate": 9.637644319823189e-05, "loss": 0.0051, "step": 10962 }, { "epoch": 5.1169194865810965, "grad_norm": 1.109375, "learning_rate": 9.636177939098489e-05, "loss": 0.012, "step": 10963 }, { "epoch": 5.117386231038506, "grad_norm": 0.1103515625, "learning_rate": 9.63471156620728e-05, "loss": 0.0018, "step": 10964 }, { "epoch": 5.117852975495916, "grad_norm": 0.46484375, "learning_rate": 9.633245201181137e-05, "loss": 0.0143, "step": 10965 }, { "epoch": 5.118319719953326, "grad_norm": 0.326171875, "learning_rate": 9.631778844051632e-05, "loss": 0.0049, "step": 10966 }, { "epoch": 5.118786464410735, "grad_norm": 0.2060546875, "learning_rate": 9.630312494850337e-05, "loss": 0.002, "step": 10967 }, { "epoch": 5.119253208868145, "grad_norm": 0.359375, "learning_rate": 9.628846153608826e-05, "loss": 0.0032, "step": 10968 }, { "epoch": 5.119719953325554, "grad_norm": 0.140625, "learning_rate": 9.627379820358666e-05, "loss": 0.0013, "step": 10969 }, { "epoch": 5.120186697782964, "grad_norm": 0.220703125, "learning_rate": 9.625913495131434e-05, "loss": 0.0025, "step": 10970 }, { "epoch": 5.120653442240373, "grad_norm": 0.3125, "learning_rate": 9.624447177958699e-05, "loss": 0.0059, "step": 10971 }, { "epoch": 5.121120186697783, "grad_norm": 0.3359375, "learning_rate": 9.622980868872031e-05, "loss": 0.0068, "step": 10972 }, { "epoch": 5.121586931155193, "grad_norm": 0.287109375, "learning_rate": 9.621514567903006e-05, "loss": 0.0033, "step": 10973 }, { "epoch": 5.1220536756126025, "grad_norm": 0.40625, "learning_rate": 9.620048275083192e-05, "loss": 0.0073, "step": 10974 }, { "epoch": 5.122520420070011, "grad_norm": 0.1572265625, "learning_rate": 9.618581990444158e-05, "loss": 0.0015, "step": 10975 }, { "epoch": 5.122987164527421, "grad_norm": 0.2109375, "learning_rate": 9.617115714017479e-05, "loss": 0.0014, "step": 10976 }, { "epoch": 5.123453908984831, "grad_norm": 0.359375, "learning_rate": 9.615649445834724e-05, "loss": 0.0022, "step": 10977 }, { "epoch": 5.123920653442241, "grad_norm": 0.26953125, "learning_rate": 9.614183185927463e-05, "loss": 0.0024, "step": 10978 }, { "epoch": 5.12438739789965, "grad_norm": 0.1484375, "learning_rate": 9.612716934327266e-05, "loss": 0.0016, "step": 10979 }, { "epoch": 5.1248541423570595, "grad_norm": 0.5390625, "learning_rate": 9.611250691065704e-05, "loss": 0.0162, "step": 10980 }, { "epoch": 5.125320886814469, "grad_norm": 0.333984375, "learning_rate": 9.609784456174341e-05, "loss": 0.0046, "step": 10981 }, { "epoch": 5.125787631271878, "grad_norm": 0.369140625, "learning_rate": 9.608318229684755e-05, "loss": 0.0038, "step": 10982 }, { "epoch": 5.126254375729288, "grad_norm": 0.55859375, "learning_rate": 9.606852011628513e-05, "loss": 0.0049, "step": 10983 }, { "epoch": 5.126721120186698, "grad_norm": 0.1904296875, "learning_rate": 9.60538580203718e-05, "loss": 0.0022, "step": 10984 }, { "epoch": 5.127187864644108, "grad_norm": 0.51171875, "learning_rate": 9.60391960094233e-05, "loss": 0.0126, "step": 10985 }, { "epoch": 5.1276546091015165, "grad_norm": 0.298828125, "learning_rate": 9.602453408375533e-05, "loss": 0.0031, "step": 10986 }, { "epoch": 5.128121353558926, "grad_norm": 0.412109375, "learning_rate": 9.600987224368351e-05, "loss": 0.0028, "step": 10987 }, { "epoch": 5.128588098016336, "grad_norm": 0.310546875, "learning_rate": 9.599521048952359e-05, "loss": 0.0038, "step": 10988 }, { "epoch": 5.129054842473746, "grad_norm": 0.328125, "learning_rate": 9.598054882159124e-05, "loss": 0.009, "step": 10989 }, { "epoch": 5.129521586931155, "grad_norm": 0.2216796875, "learning_rate": 9.596588724020213e-05, "loss": 0.0018, "step": 10990 }, { "epoch": 5.129988331388565, "grad_norm": 0.25390625, "learning_rate": 9.595122574567193e-05, "loss": 0.002, "step": 10991 }, { "epoch": 5.130455075845974, "grad_norm": 0.1328125, "learning_rate": 9.593656433831634e-05, "loss": 0.0012, "step": 10992 }, { "epoch": 5.130921820303384, "grad_norm": 0.328125, "learning_rate": 9.592190301845104e-05, "loss": 0.003, "step": 10993 }, { "epoch": 5.131388564760793, "grad_norm": 0.3203125, "learning_rate": 9.590724178639165e-05, "loss": 0.0027, "step": 10994 }, { "epoch": 5.131855309218203, "grad_norm": 0.375, "learning_rate": 9.589258064245393e-05, "loss": 0.0045, "step": 10995 }, { "epoch": 5.132322053675613, "grad_norm": 0.2734375, "learning_rate": 9.58779195869535e-05, "loss": 0.0077, "step": 10996 }, { "epoch": 5.1327887981330225, "grad_norm": 0.2119140625, "learning_rate": 9.586325862020601e-05, "loss": 0.0023, "step": 10997 }, { "epoch": 5.133255542590431, "grad_norm": 0.318359375, "learning_rate": 9.584859774252717e-05, "loss": 0.002, "step": 10998 }, { "epoch": 5.133722287047841, "grad_norm": 0.138671875, "learning_rate": 9.583393695423264e-05, "loss": 0.0023, "step": 10999 }, { "epoch": 5.134189031505251, "grad_norm": 0.11572265625, "learning_rate": 9.581927625563806e-05, "loss": 0.0011, "step": 11000 }, { "epoch": 5.134655775962661, "grad_norm": 0.236328125, "learning_rate": 9.580461564705908e-05, "loss": 0.0026, "step": 11001 }, { "epoch": 5.13512252042007, "grad_norm": 0.44140625, "learning_rate": 9.57899551288114e-05, "loss": 0.0032, "step": 11002 }, { "epoch": 5.1355892648774795, "grad_norm": 0.158203125, "learning_rate": 9.577529470121066e-05, "loss": 0.0016, "step": 11003 }, { "epoch": 5.136056009334889, "grad_norm": 0.1962890625, "learning_rate": 9.576063436457249e-05, "loss": 0.0018, "step": 11004 }, { "epoch": 5.136522753792299, "grad_norm": 0.0751953125, "learning_rate": 9.574597411921259e-05, "loss": 0.0011, "step": 11005 }, { "epoch": 5.136989498249708, "grad_norm": 0.3671875, "learning_rate": 9.573131396544657e-05, "loss": 0.0032, "step": 11006 }, { "epoch": 5.137456242707118, "grad_norm": 0.353515625, "learning_rate": 9.57166539035901e-05, "loss": 0.0021, "step": 11007 }, { "epoch": 5.1379229871645276, "grad_norm": 0.173828125, "learning_rate": 9.570199393395883e-05, "loss": 0.0017, "step": 11008 }, { "epoch": 5.138389731621937, "grad_norm": 0.14453125, "learning_rate": 9.568733405686841e-05, "loss": 0.0012, "step": 11009 }, { "epoch": 5.138856476079346, "grad_norm": 0.39453125, "learning_rate": 9.567267427263446e-05, "loss": 0.0022, "step": 11010 }, { "epoch": 5.139323220536756, "grad_norm": 0.1630859375, "learning_rate": 9.565801458157261e-05, "loss": 0.0032, "step": 11011 }, { "epoch": 5.139789964994166, "grad_norm": 0.38671875, "learning_rate": 9.564335498399857e-05, "loss": 0.0132, "step": 11012 }, { "epoch": 5.140256709451576, "grad_norm": 0.166015625, "learning_rate": 9.562869548022792e-05, "loss": 0.0017, "step": 11013 }, { "epoch": 5.1407234539089846, "grad_norm": 0.2255859375, "learning_rate": 9.561403607057628e-05, "loss": 0.0019, "step": 11014 }, { "epoch": 5.141190198366394, "grad_norm": 0.1474609375, "learning_rate": 9.559937675535932e-05, "loss": 0.0014, "step": 11015 }, { "epoch": 5.141656942823804, "grad_norm": 0.41796875, "learning_rate": 9.558471753489268e-05, "loss": 0.0024, "step": 11016 }, { "epoch": 5.142123687281214, "grad_norm": 0.080078125, "learning_rate": 9.557005840949196e-05, "loss": 0.0009, "step": 11017 }, { "epoch": 5.142590431738623, "grad_norm": 0.2431640625, "learning_rate": 9.555539937947277e-05, "loss": 0.0014, "step": 11018 }, { "epoch": 5.143057176196033, "grad_norm": 0.22265625, "learning_rate": 9.55407404451508e-05, "loss": 0.0026, "step": 11019 }, { "epoch": 5.143523920653442, "grad_norm": 0.28125, "learning_rate": 9.552608160684164e-05, "loss": 0.0023, "step": 11020 }, { "epoch": 5.143990665110852, "grad_norm": 0.11083984375, "learning_rate": 9.551142286486087e-05, "loss": 0.0039, "step": 11021 }, { "epoch": 5.144457409568261, "grad_norm": 0.0673828125, "learning_rate": 9.549676421952417e-05, "loss": 0.0008, "step": 11022 }, { "epoch": 5.144924154025671, "grad_norm": 0.34765625, "learning_rate": 9.548210567114714e-05, "loss": 0.0021, "step": 11023 }, { "epoch": 5.145390898483081, "grad_norm": 0.37890625, "learning_rate": 9.546744722004535e-05, "loss": 0.0049, "step": 11024 }, { "epoch": 5.14585764294049, "grad_norm": 0.57421875, "learning_rate": 9.54527888665345e-05, "loss": 0.0092, "step": 11025 }, { "epoch": 5.146324387397899, "grad_norm": 0.234375, "learning_rate": 9.543813061093014e-05, "loss": 0.0019, "step": 11026 }, { "epoch": 5.146791131855309, "grad_norm": 0.419921875, "learning_rate": 9.54234724535479e-05, "loss": 0.0042, "step": 11027 }, { "epoch": 5.147257876312719, "grad_norm": 0.2578125, "learning_rate": 9.540881439470335e-05, "loss": 0.0026, "step": 11028 }, { "epoch": 5.147724620770128, "grad_norm": 0.1572265625, "learning_rate": 9.539415643471212e-05, "loss": 0.0012, "step": 11029 }, { "epoch": 5.148191365227538, "grad_norm": 0.1953125, "learning_rate": 9.53794985738898e-05, "loss": 0.0047, "step": 11030 }, { "epoch": 5.1486581096849475, "grad_norm": 0.55859375, "learning_rate": 9.536484081255202e-05, "loss": 0.0087, "step": 11031 }, { "epoch": 5.149124854142357, "grad_norm": 0.2216796875, "learning_rate": 9.535018315101437e-05, "loss": 0.0054, "step": 11032 }, { "epoch": 5.149591598599766, "grad_norm": 0.4609375, "learning_rate": 9.53355255895924e-05, "loss": 0.0065, "step": 11033 }, { "epoch": 5.150058343057176, "grad_norm": 0.458984375, "learning_rate": 9.532086812860178e-05, "loss": 0.0038, "step": 11034 }, { "epoch": 5.150525087514586, "grad_norm": 0.10400390625, "learning_rate": 9.530621076835805e-05, "loss": 0.001, "step": 11035 }, { "epoch": 5.150991831971996, "grad_norm": 0.30078125, "learning_rate": 9.529155350917679e-05, "loss": 0.0019, "step": 11036 }, { "epoch": 5.1514585764294045, "grad_norm": 0.734375, "learning_rate": 9.527689635137362e-05, "loss": 0.004, "step": 11037 }, { "epoch": 5.151925320886814, "grad_norm": 0.1943359375, "learning_rate": 9.526223929526412e-05, "loss": 0.0023, "step": 11038 }, { "epoch": 5.152392065344224, "grad_norm": 0.412109375, "learning_rate": 9.524758234116386e-05, "loss": 0.0045, "step": 11039 }, { "epoch": 5.152858809801634, "grad_norm": 0.1962890625, "learning_rate": 9.523292548938841e-05, "loss": 0.0014, "step": 11040 }, { "epoch": 5.153325554259043, "grad_norm": 0.22265625, "learning_rate": 9.521826874025338e-05, "loss": 0.0023, "step": 11041 }, { "epoch": 5.153792298716453, "grad_norm": 0.279296875, "learning_rate": 9.520361209407434e-05, "loss": 0.0029, "step": 11042 }, { "epoch": 5.154259043173862, "grad_norm": 0.11767578125, "learning_rate": 9.518895555116681e-05, "loss": 0.0014, "step": 11043 }, { "epoch": 5.154725787631272, "grad_norm": 0.208984375, "learning_rate": 9.517429911184644e-05, "loss": 0.0016, "step": 11044 }, { "epoch": 5.155192532088681, "grad_norm": 0.294921875, "learning_rate": 9.515964277642876e-05, "loss": 0.0025, "step": 11045 }, { "epoch": 5.155659276546091, "grad_norm": 0.248046875, "learning_rate": 9.514498654522933e-05, "loss": 0.0018, "step": 11046 }, { "epoch": 5.156126021003501, "grad_norm": 0.30078125, "learning_rate": 9.513033041856374e-05, "loss": 0.003, "step": 11047 }, { "epoch": 5.1565927654609105, "grad_norm": 0.2890625, "learning_rate": 9.511567439674754e-05, "loss": 0.0054, "step": 11048 }, { "epoch": 5.157059509918319, "grad_norm": 0.326171875, "learning_rate": 9.510101848009629e-05, "loss": 0.0049, "step": 11049 }, { "epoch": 5.157526254375729, "grad_norm": 0.28125, "learning_rate": 9.508636266892553e-05, "loss": 0.0029, "step": 11050 }, { "epoch": 5.157992998833139, "grad_norm": 0.0693359375, "learning_rate": 9.507170696355085e-05, "loss": 0.0007, "step": 11051 }, { "epoch": 5.158459743290549, "grad_norm": 0.1767578125, "learning_rate": 9.505705136428779e-05, "loss": 0.0013, "step": 11052 }, { "epoch": 5.158926487747958, "grad_norm": 0.2138671875, "learning_rate": 9.504239587145187e-05, "loss": 0.0015, "step": 11053 }, { "epoch": 5.1593932322053675, "grad_norm": 0.203125, "learning_rate": 9.50277404853587e-05, "loss": 0.0024, "step": 11054 }, { "epoch": 5.159859976662777, "grad_norm": 0.41015625, "learning_rate": 9.501308520632379e-05, "loss": 0.004, "step": 11055 }, { "epoch": 5.160326721120187, "grad_norm": 0.33203125, "learning_rate": 9.499843003466267e-05, "loss": 0.0025, "step": 11056 }, { "epoch": 5.160793465577596, "grad_norm": 0.265625, "learning_rate": 9.498377497069091e-05, "loss": 0.0045, "step": 11057 }, { "epoch": 5.161260210035006, "grad_norm": 0.15625, "learning_rate": 9.496912001472405e-05, "loss": 0.0007, "step": 11058 }, { "epoch": 5.161726954492416, "grad_norm": 0.2470703125, "learning_rate": 9.495446516707761e-05, "loss": 0.0023, "step": 11059 }, { "epoch": 5.162193698949825, "grad_norm": 0.333984375, "learning_rate": 9.493981042806712e-05, "loss": 0.0023, "step": 11060 }, { "epoch": 5.162660443407234, "grad_norm": 0.41796875, "learning_rate": 9.492515579800815e-05, "loss": 0.004, "step": 11061 }, { "epoch": 5.163127187864644, "grad_norm": 0.357421875, "learning_rate": 9.49105012772162e-05, "loss": 0.0033, "step": 11062 }, { "epoch": 5.163593932322054, "grad_norm": 0.1708984375, "learning_rate": 9.489584686600678e-05, "loss": 0.0017, "step": 11063 }, { "epoch": 5.164060676779464, "grad_norm": 0.494140625, "learning_rate": 9.488119256469546e-05, "loss": 0.0047, "step": 11064 }, { "epoch": 5.164527421236873, "grad_norm": 0.2734375, "learning_rate": 9.486653837359776e-05, "loss": 0.0045, "step": 11065 }, { "epoch": 5.164994165694282, "grad_norm": 0.171875, "learning_rate": 9.485188429302917e-05, "loss": 0.0015, "step": 11066 }, { "epoch": 5.165460910151692, "grad_norm": 0.37109375, "learning_rate": 9.48372303233052e-05, "loss": 0.0032, "step": 11067 }, { "epoch": 5.165927654609101, "grad_norm": 0.283203125, "learning_rate": 9.482257646474141e-05, "loss": 0.0019, "step": 11068 }, { "epoch": 5.166394399066511, "grad_norm": 0.2109375, "learning_rate": 9.48079227176533e-05, "loss": 0.0032, "step": 11069 }, { "epoch": 5.166861143523921, "grad_norm": 0.068359375, "learning_rate": 9.479326908235636e-05, "loss": 0.0009, "step": 11070 }, { "epoch": 5.1673278879813305, "grad_norm": 0.236328125, "learning_rate": 9.477861555916613e-05, "loss": 0.0022, "step": 11071 }, { "epoch": 5.167794632438739, "grad_norm": 0.263671875, "learning_rate": 9.47639621483981e-05, "loss": 0.0023, "step": 11072 }, { "epoch": 5.168261376896149, "grad_norm": 0.1884765625, "learning_rate": 9.474930885036775e-05, "loss": 0.0016, "step": 11073 }, { "epoch": 5.168728121353559, "grad_norm": 0.392578125, "learning_rate": 9.473465566539067e-05, "loss": 0.0035, "step": 11074 }, { "epoch": 5.169194865810969, "grad_norm": 0.37890625, "learning_rate": 9.472000259378223e-05, "loss": 0.0025, "step": 11075 }, { "epoch": 5.169661610268378, "grad_norm": 0.21875, "learning_rate": 9.470534963585802e-05, "loss": 0.0022, "step": 11076 }, { "epoch": 5.1701283547257875, "grad_norm": 0.2421875, "learning_rate": 9.46906967919335e-05, "loss": 0.0026, "step": 11077 }, { "epoch": 5.170595099183197, "grad_norm": 0.53515625, "learning_rate": 9.467604406232418e-05, "loss": 0.0025, "step": 11078 }, { "epoch": 5.171061843640607, "grad_norm": 0.1484375, "learning_rate": 9.46613914473455e-05, "loss": 0.0013, "step": 11079 }, { "epoch": 5.171528588098016, "grad_norm": 0.15625, "learning_rate": 9.464673894731302e-05, "loss": 0.0012, "step": 11080 }, { "epoch": 5.171995332555426, "grad_norm": 0.181640625, "learning_rate": 9.46320865625422e-05, "loss": 0.0013, "step": 11081 }, { "epoch": 5.172462077012836, "grad_norm": 0.259765625, "learning_rate": 9.461743429334847e-05, "loss": 0.0021, "step": 11082 }, { "epoch": 5.172928821470245, "grad_norm": 0.53125, "learning_rate": 9.460278214004738e-05, "loss": 0.0043, "step": 11083 }, { "epoch": 5.173395565927654, "grad_norm": 0.1318359375, "learning_rate": 9.458813010295438e-05, "loss": 0.001, "step": 11084 }, { "epoch": 5.173862310385064, "grad_norm": 0.2275390625, "learning_rate": 9.457347818238491e-05, "loss": 0.0017, "step": 11085 }, { "epoch": 5.174329054842474, "grad_norm": 0.35546875, "learning_rate": 9.455882637865451e-05, "loss": 0.0042, "step": 11086 }, { "epoch": 5.174795799299884, "grad_norm": 0.142578125, "learning_rate": 9.454417469207861e-05, "loss": 0.001, "step": 11087 }, { "epoch": 5.1752625437572926, "grad_norm": 0.2333984375, "learning_rate": 9.452952312297269e-05, "loss": 0.0058, "step": 11088 }, { "epoch": 5.175729288214702, "grad_norm": 0.1376953125, "learning_rate": 9.451487167165217e-05, "loss": 0.0012, "step": 11089 }, { "epoch": 5.176196032672112, "grad_norm": 0.28515625, "learning_rate": 9.450022033843259e-05, "loss": 0.0036, "step": 11090 }, { "epoch": 5.176662777129522, "grad_norm": 0.11474609375, "learning_rate": 9.448556912362936e-05, "loss": 0.0041, "step": 11091 }, { "epoch": 5.177129521586931, "grad_norm": 0.177734375, "learning_rate": 9.447091802755793e-05, "loss": 0.0013, "step": 11092 }, { "epoch": 5.177596266044341, "grad_norm": 0.1611328125, "learning_rate": 9.445626705053378e-05, "loss": 0.001, "step": 11093 }, { "epoch": 5.17806301050175, "grad_norm": 0.052490234375, "learning_rate": 9.444161619287235e-05, "loss": 0.0007, "step": 11094 }, { "epoch": 5.17852975495916, "grad_norm": 0.3203125, "learning_rate": 9.442696545488907e-05, "loss": 0.0027, "step": 11095 }, { "epoch": 5.178996499416569, "grad_norm": 0.1669921875, "learning_rate": 9.441231483689943e-05, "loss": 0.002, "step": 11096 }, { "epoch": 5.179463243873979, "grad_norm": 0.3515625, "learning_rate": 9.439766433921885e-05, "loss": 0.0029, "step": 11097 }, { "epoch": 5.179929988331389, "grad_norm": 0.2470703125, "learning_rate": 9.438301396216279e-05, "loss": 0.0019, "step": 11098 }, { "epoch": 5.1803967327887985, "grad_norm": 0.2001953125, "learning_rate": 9.436836370604662e-05, "loss": 0.0017, "step": 11099 }, { "epoch": 5.180863477246207, "grad_norm": 0.10400390625, "learning_rate": 9.435371357118587e-05, "loss": 0.001, "step": 11100 }, { "epoch": 5.181330221703617, "grad_norm": 0.435546875, "learning_rate": 9.433906355789592e-05, "loss": 0.0034, "step": 11101 }, { "epoch": 5.181796966161027, "grad_norm": 0.10791015625, "learning_rate": 9.43244136664922e-05, "loss": 0.0011, "step": 11102 }, { "epoch": 5.182263710618437, "grad_norm": 0.181640625, "learning_rate": 9.430976389729019e-05, "loss": 0.0012, "step": 11103 }, { "epoch": 5.182730455075846, "grad_norm": 0.1201171875, "learning_rate": 9.429511425060524e-05, "loss": 0.0011, "step": 11104 }, { "epoch": 5.1831971995332555, "grad_norm": 0.515625, "learning_rate": 9.42804647267528e-05, "loss": 0.0167, "step": 11105 }, { "epoch": 5.183663943990665, "grad_norm": 0.71875, "learning_rate": 9.426581532604834e-05, "loss": 0.0071, "step": 11106 }, { "epoch": 5.184130688448075, "grad_norm": 0.466796875, "learning_rate": 9.425116604880722e-05, "loss": 0.0067, "step": 11107 }, { "epoch": 5.184597432905484, "grad_norm": 0.16796875, "learning_rate": 9.423651689534488e-05, "loss": 0.0016, "step": 11108 }, { "epoch": 5.185064177362894, "grad_norm": 0.18359375, "learning_rate": 9.42218678659767e-05, "loss": 0.0011, "step": 11109 }, { "epoch": 5.185530921820304, "grad_norm": 0.29296875, "learning_rate": 9.420721896101815e-05, "loss": 0.0047, "step": 11110 }, { "epoch": 5.1859976662777125, "grad_norm": 0.1298828125, "learning_rate": 9.41925701807846e-05, "loss": 0.001, "step": 11111 }, { "epoch": 5.186464410735122, "grad_norm": 0.171875, "learning_rate": 9.417792152559142e-05, "loss": 0.0013, "step": 11112 }, { "epoch": 5.186931155192532, "grad_norm": 0.259765625, "learning_rate": 9.416327299575408e-05, "loss": 0.0068, "step": 11113 }, { "epoch": 5.187397899649942, "grad_norm": 0.408203125, "learning_rate": 9.414862459158796e-05, "loss": 0.0053, "step": 11114 }, { "epoch": 5.187864644107351, "grad_norm": 0.2109375, "learning_rate": 9.41339763134084e-05, "loss": 0.0013, "step": 11115 }, { "epoch": 5.188331388564761, "grad_norm": 0.4140625, "learning_rate": 9.411932816153088e-05, "loss": 0.0061, "step": 11116 }, { "epoch": 5.18879813302217, "grad_norm": 0.48046875, "learning_rate": 9.410468013627073e-05, "loss": 0.0053, "step": 11117 }, { "epoch": 5.18926487747958, "grad_norm": 0.400390625, "learning_rate": 9.409003223794337e-05, "loss": 0.0038, "step": 11118 }, { "epoch": 5.189731621936989, "grad_norm": 0.1640625, "learning_rate": 9.407538446686416e-05, "loss": 0.0013, "step": 11119 }, { "epoch": 5.190198366394399, "grad_norm": 0.1162109375, "learning_rate": 9.40607368233485e-05, "loss": 0.0009, "step": 11120 }, { "epoch": 5.190665110851809, "grad_norm": 0.279296875, "learning_rate": 9.404608930771177e-05, "loss": 0.0027, "step": 11121 }, { "epoch": 5.1911318553092185, "grad_norm": 0.41796875, "learning_rate": 9.403144192026938e-05, "loss": 0.003, "step": 11122 }, { "epoch": 5.191598599766627, "grad_norm": 0.21875, "learning_rate": 9.401679466133663e-05, "loss": 0.0057, "step": 11123 }, { "epoch": 5.192065344224037, "grad_norm": 0.1865234375, "learning_rate": 9.400214753122892e-05, "loss": 0.0028, "step": 11124 }, { "epoch": 5.192532088681447, "grad_norm": 0.19921875, "learning_rate": 9.398750053026164e-05, "loss": 0.0018, "step": 11125 }, { "epoch": 5.192998833138857, "grad_norm": 0.0966796875, "learning_rate": 9.397285365875013e-05, "loss": 0.002, "step": 11126 }, { "epoch": 5.193465577596266, "grad_norm": 0.1220703125, "learning_rate": 9.395820691700977e-05, "loss": 0.0012, "step": 11127 }, { "epoch": 5.1939323220536755, "grad_norm": 0.07568359375, "learning_rate": 9.394356030535594e-05, "loss": 0.0009, "step": 11128 }, { "epoch": 5.194399066511085, "grad_norm": 0.07666015625, "learning_rate": 9.392891382410397e-05, "loss": 0.0008, "step": 11129 }, { "epoch": 5.194865810968495, "grad_norm": 0.51953125, "learning_rate": 9.391426747356922e-05, "loss": 0.0072, "step": 11130 }, { "epoch": 5.195332555425904, "grad_norm": 0.44921875, "learning_rate": 9.389962125406703e-05, "loss": 0.0057, "step": 11131 }, { "epoch": 5.195799299883314, "grad_norm": 0.0859375, "learning_rate": 9.388497516591277e-05, "loss": 0.0011, "step": 11132 }, { "epoch": 5.196266044340724, "grad_norm": 0.474609375, "learning_rate": 9.387032920942179e-05, "loss": 0.004, "step": 11133 }, { "epoch": 5.196732788798133, "grad_norm": 0.1376953125, "learning_rate": 9.38556833849094e-05, "loss": 0.001, "step": 11134 }, { "epoch": 5.197199533255542, "grad_norm": 0.1806640625, "learning_rate": 9.384103769269098e-05, "loss": 0.0035, "step": 11135 }, { "epoch": 5.197666277712952, "grad_norm": 0.484375, "learning_rate": 9.382639213308186e-05, "loss": 0.0043, "step": 11136 }, { "epoch": 5.198133022170362, "grad_norm": 0.19921875, "learning_rate": 9.381174670639736e-05, "loss": 0.0013, "step": 11137 }, { "epoch": 5.198599766627772, "grad_norm": 0.326171875, "learning_rate": 9.379710141295281e-05, "loss": 0.002, "step": 11138 }, { "epoch": 5.199066511085181, "grad_norm": 0.2275390625, "learning_rate": 9.378245625306356e-05, "loss": 0.0014, "step": 11139 }, { "epoch": 5.19953325554259, "grad_norm": 0.65625, "learning_rate": 9.376781122704492e-05, "loss": 0.0053, "step": 11140 }, { "epoch": 5.2, "grad_norm": 0.248046875, "learning_rate": 9.37531663352122e-05, "loss": 0.0022, "step": 11141 }, { "epoch": 5.20046674445741, "grad_norm": 0.10986328125, "learning_rate": 9.373852157788077e-05, "loss": 0.0008, "step": 11142 }, { "epoch": 5.200933488914819, "grad_norm": 0.51953125, "learning_rate": 9.372387695536592e-05, "loss": 0.0062, "step": 11143 }, { "epoch": 5.201400233372229, "grad_norm": 0.390625, "learning_rate": 9.370923246798293e-05, "loss": 0.0069, "step": 11144 }, { "epoch": 5.2018669778296385, "grad_norm": 0.17578125, "learning_rate": 9.369458811604717e-05, "loss": 0.0046, "step": 11145 }, { "epoch": 5.202333722287048, "grad_norm": 0.1494140625, "learning_rate": 9.367994389987392e-05, "loss": 0.0011, "step": 11146 }, { "epoch": 5.202800466744457, "grad_norm": 0.26171875, "learning_rate": 9.366529981977849e-05, "loss": 0.0019, "step": 11147 }, { "epoch": 5.203267211201867, "grad_norm": 0.388671875, "learning_rate": 9.365065587607617e-05, "loss": 0.0076, "step": 11148 }, { "epoch": 5.203733955659277, "grad_norm": 0.5625, "learning_rate": 9.36360120690823e-05, "loss": 0.0041, "step": 11149 }, { "epoch": 5.204200700116687, "grad_norm": 0.443359375, "learning_rate": 9.362136839911214e-05, "loss": 0.0047, "step": 11150 }, { "epoch": 5.2046674445740955, "grad_norm": 0.1572265625, "learning_rate": 9.360672486648098e-05, "loss": 0.003, "step": 11151 }, { "epoch": 5.205134189031505, "grad_norm": 0.123046875, "learning_rate": 9.359208147150414e-05, "loss": 0.001, "step": 11152 }, { "epoch": 5.205600933488915, "grad_norm": 0.220703125, "learning_rate": 9.35774382144969e-05, "loss": 0.0026, "step": 11153 }, { "epoch": 5.206067677946324, "grad_norm": 0.09814453125, "learning_rate": 9.356279509577452e-05, "loss": 0.003, "step": 11154 }, { "epoch": 5.206534422403734, "grad_norm": 0.189453125, "learning_rate": 9.354815211565233e-05, "loss": 0.0015, "step": 11155 }, { "epoch": 5.207001166861144, "grad_norm": 0.1396484375, "learning_rate": 9.353350927444559e-05, "loss": 0.0011, "step": 11156 }, { "epoch": 5.207467911318553, "grad_norm": 0.1552734375, "learning_rate": 9.351886657246955e-05, "loss": 0.0013, "step": 11157 }, { "epoch": 5.207934655775962, "grad_norm": 0.31640625, "learning_rate": 9.350422401003949e-05, "loss": 0.0028, "step": 11158 }, { "epoch": 5.208401400233372, "grad_norm": 0.341796875, "learning_rate": 9.348958158747073e-05, "loss": 0.0031, "step": 11159 }, { "epoch": 5.208868144690782, "grad_norm": 0.37109375, "learning_rate": 9.347493930507848e-05, "loss": 0.0026, "step": 11160 }, { "epoch": 5.209334889148192, "grad_norm": 0.2216796875, "learning_rate": 9.3460297163178e-05, "loss": 0.0019, "step": 11161 }, { "epoch": 5.209801633605601, "grad_norm": 0.12060546875, "learning_rate": 9.344565516208463e-05, "loss": 0.0012, "step": 11162 }, { "epoch": 5.21026837806301, "grad_norm": 0.443359375, "learning_rate": 9.343101330211355e-05, "loss": 0.003, "step": 11163 }, { "epoch": 5.21073512252042, "grad_norm": 0.134765625, "learning_rate": 9.341637158358004e-05, "loss": 0.0016, "step": 11164 }, { "epoch": 5.21120186697783, "grad_norm": 0.3515625, "learning_rate": 9.340173000679938e-05, "loss": 0.0029, "step": 11165 }, { "epoch": 5.211668611435239, "grad_norm": 0.107421875, "learning_rate": 9.338708857208676e-05, "loss": 0.0009, "step": 11166 }, { "epoch": 5.212135355892649, "grad_norm": 0.337890625, "learning_rate": 9.337244727975749e-05, "loss": 0.0044, "step": 11167 }, { "epoch": 5.212602100350058, "grad_norm": 0.1123046875, "learning_rate": 9.335780613012674e-05, "loss": 0.003, "step": 11168 }, { "epoch": 5.213068844807468, "grad_norm": 0.37109375, "learning_rate": 9.334316512350984e-05, "loss": 0.0025, "step": 11169 }, { "epoch": 5.213535589264877, "grad_norm": 0.388671875, "learning_rate": 9.332852426022198e-05, "loss": 0.0021, "step": 11170 }, { "epoch": 5.214002333722287, "grad_norm": 0.154296875, "learning_rate": 9.33138835405784e-05, "loss": 0.0013, "step": 11171 }, { "epoch": 5.214469078179697, "grad_norm": 0.271484375, "learning_rate": 9.32992429648943e-05, "loss": 0.0015, "step": 11172 }, { "epoch": 5.2149358226371065, "grad_norm": 0.16796875, "learning_rate": 9.328460253348492e-05, "loss": 0.0013, "step": 11173 }, { "epoch": 5.215402567094515, "grad_norm": 0.11669921875, "learning_rate": 9.326996224666552e-05, "loss": 0.0009, "step": 11174 }, { "epoch": 5.215869311551925, "grad_norm": 0.14453125, "learning_rate": 9.32553221047513e-05, "loss": 0.0032, "step": 11175 }, { "epoch": 5.216336056009335, "grad_norm": 0.224609375, "learning_rate": 9.324068210805746e-05, "loss": 0.0022, "step": 11176 }, { "epoch": 5.216802800466745, "grad_norm": 0.1728515625, "learning_rate": 9.322604225689927e-05, "loss": 0.0013, "step": 11177 }, { "epoch": 5.217269544924154, "grad_norm": 0.431640625, "learning_rate": 9.321140255159187e-05, "loss": 0.0056, "step": 11178 }, { "epoch": 5.2177362893815635, "grad_norm": 0.20703125, "learning_rate": 9.319676299245053e-05, "loss": 0.0019, "step": 11179 }, { "epoch": 5.218203033838973, "grad_norm": 0.11865234375, "learning_rate": 9.31821235797904e-05, "loss": 0.0008, "step": 11180 }, { "epoch": 5.218669778296383, "grad_norm": 0.234375, "learning_rate": 9.316748431392674e-05, "loss": 0.0018, "step": 11181 }, { "epoch": 5.219136522753792, "grad_norm": 0.1298828125, "learning_rate": 9.315284519517473e-05, "loss": 0.0014, "step": 11182 }, { "epoch": 5.219603267211202, "grad_norm": 0.32421875, "learning_rate": 9.313820622384952e-05, "loss": 0.0018, "step": 11183 }, { "epoch": 5.220070011668612, "grad_norm": 0.1982421875, "learning_rate": 9.312356740026639e-05, "loss": 0.0043, "step": 11184 }, { "epoch": 5.220536756126021, "grad_norm": 0.361328125, "learning_rate": 9.310892872474047e-05, "loss": 0.0029, "step": 11185 }, { "epoch": 5.22100350058343, "grad_norm": 0.412109375, "learning_rate": 9.309429019758694e-05, "loss": 0.0034, "step": 11186 }, { "epoch": 5.22147024504084, "grad_norm": 0.71875, "learning_rate": 9.307965181912101e-05, "loss": 0.0286, "step": 11187 }, { "epoch": 5.22193698949825, "grad_norm": 0.267578125, "learning_rate": 9.306501358965787e-05, "loss": 0.0023, "step": 11188 }, { "epoch": 5.222403733955659, "grad_norm": 0.2392578125, "learning_rate": 9.305037550951268e-05, "loss": 0.0039, "step": 11189 }, { "epoch": 5.222870478413069, "grad_norm": 0.2890625, "learning_rate": 9.303573757900061e-05, "loss": 0.0059, "step": 11190 }, { "epoch": 5.223337222870478, "grad_norm": 0.224609375, "learning_rate": 9.302109979843684e-05, "loss": 0.0017, "step": 11191 }, { "epoch": 5.223803967327888, "grad_norm": 0.2392578125, "learning_rate": 9.300646216813654e-05, "loss": 0.0046, "step": 11192 }, { "epoch": 5.224270711785298, "grad_norm": 0.09814453125, "learning_rate": 9.299182468841484e-05, "loss": 0.0008, "step": 11193 }, { "epoch": 5.224737456242707, "grad_norm": 0.1826171875, "learning_rate": 9.297718735958695e-05, "loss": 0.0014, "step": 11194 }, { "epoch": 5.225204200700117, "grad_norm": 0.640625, "learning_rate": 9.296255018196803e-05, "loss": 0.0054, "step": 11195 }, { "epoch": 5.2256709451575265, "grad_norm": 0.146484375, "learning_rate": 9.294791315587319e-05, "loss": 0.0011, "step": 11196 }, { "epoch": 5.226137689614935, "grad_norm": 0.255859375, "learning_rate": 9.293327628161759e-05, "loss": 0.0039, "step": 11197 }, { "epoch": 5.226604434072345, "grad_norm": 0.046630859375, "learning_rate": 9.291863955951642e-05, "loss": 0.0028, "step": 11198 }, { "epoch": 5.227071178529755, "grad_norm": 0.050537109375, "learning_rate": 9.290400298988479e-05, "loss": 0.0006, "step": 11199 }, { "epoch": 5.227537922987165, "grad_norm": 0.3984375, "learning_rate": 9.288936657303783e-05, "loss": 0.0051, "step": 11200 }, { "epoch": 5.228004667444574, "grad_norm": 0.1318359375, "learning_rate": 9.287473030929071e-05, "loss": 0.0029, "step": 11201 }, { "epoch": 5.2284714119019835, "grad_norm": 0.34765625, "learning_rate": 9.286009419895856e-05, "loss": 0.0031, "step": 11202 }, { "epoch": 5.228938156359393, "grad_norm": 0.345703125, "learning_rate": 9.284545824235649e-05, "loss": 0.0027, "step": 11203 }, { "epoch": 5.229404900816803, "grad_norm": 0.34375, "learning_rate": 9.283082243979964e-05, "loss": 0.0046, "step": 11204 }, { "epoch": 5.229871645274212, "grad_norm": 0.2392578125, "learning_rate": 9.281618679160317e-05, "loss": 0.003, "step": 11205 }, { "epoch": 5.230338389731622, "grad_norm": 0.380859375, "learning_rate": 9.280155129808214e-05, "loss": 0.0043, "step": 11206 }, { "epoch": 5.230805134189032, "grad_norm": 0.1337890625, "learning_rate": 9.278691595955168e-05, "loss": 0.0011, "step": 11207 }, { "epoch": 5.231271878646441, "grad_norm": 0.34765625, "learning_rate": 9.277228077632696e-05, "loss": 0.0028, "step": 11208 }, { "epoch": 5.23173862310385, "grad_norm": 0.21875, "learning_rate": 9.275764574872306e-05, "loss": 0.004, "step": 11209 }, { "epoch": 5.23220536756126, "grad_norm": 0.287109375, "learning_rate": 9.274301087705504e-05, "loss": 0.0025, "step": 11210 }, { "epoch": 5.23267211201867, "grad_norm": 0.06201171875, "learning_rate": 9.272837616163808e-05, "loss": 0.0008, "step": 11211 }, { "epoch": 5.23313885647608, "grad_norm": 0.0625, "learning_rate": 9.271374160278724e-05, "loss": 0.0007, "step": 11212 }, { "epoch": 5.233605600933489, "grad_norm": 0.185546875, "learning_rate": 9.26991072008176e-05, "loss": 0.0015, "step": 11213 }, { "epoch": 5.234072345390898, "grad_norm": 0.1416015625, "learning_rate": 9.268447295604432e-05, "loss": 0.0039, "step": 11214 }, { "epoch": 5.234539089848308, "grad_norm": 0.314453125, "learning_rate": 9.266983886878245e-05, "loss": 0.0025, "step": 11215 }, { "epoch": 5.235005834305718, "grad_norm": 0.35546875, "learning_rate": 9.265520493934708e-05, "loss": 0.0017, "step": 11216 }, { "epoch": 5.235472578763127, "grad_norm": 0.35546875, "learning_rate": 9.264057116805333e-05, "loss": 0.0028, "step": 11217 }, { "epoch": 5.235939323220537, "grad_norm": 0.224609375, "learning_rate": 9.262593755521622e-05, "loss": 0.0069, "step": 11218 }, { "epoch": 5.2364060676779465, "grad_norm": 0.099609375, "learning_rate": 9.261130410115082e-05, "loss": 0.001, "step": 11219 }, { "epoch": 5.236872812135356, "grad_norm": 0.2119140625, "learning_rate": 9.259667080617227e-05, "loss": 0.0093, "step": 11220 }, { "epoch": 5.237339556592765, "grad_norm": 0.2255859375, "learning_rate": 9.258203767059562e-05, "loss": 0.0015, "step": 11221 }, { "epoch": 5.237806301050175, "grad_norm": 0.49609375, "learning_rate": 9.25674046947359e-05, "loss": 0.0052, "step": 11222 }, { "epoch": 5.238273045507585, "grad_norm": 0.1416015625, "learning_rate": 9.255277187890823e-05, "loss": 0.0012, "step": 11223 }, { "epoch": 5.238739789964995, "grad_norm": 0.298828125, "learning_rate": 9.253813922342764e-05, "loss": 0.0039, "step": 11224 }, { "epoch": 5.2392065344224035, "grad_norm": 0.341796875, "learning_rate": 9.252350672860918e-05, "loss": 0.0036, "step": 11225 }, { "epoch": 5.239673278879813, "grad_norm": 0.054443359375, "learning_rate": 9.250887439476792e-05, "loss": 0.0007, "step": 11226 }, { "epoch": 5.240140023337223, "grad_norm": 0.4140625, "learning_rate": 9.249424222221893e-05, "loss": 0.0023, "step": 11227 }, { "epoch": 5.240606767794633, "grad_norm": 0.248046875, "learning_rate": 9.24796102112772e-05, "loss": 0.0021, "step": 11228 }, { "epoch": 5.241073512252042, "grad_norm": 0.27734375, "learning_rate": 9.24649783622578e-05, "loss": 0.0046, "step": 11229 }, { "epoch": 5.241540256709452, "grad_norm": 0.1142578125, "learning_rate": 9.245034667547581e-05, "loss": 0.0046, "step": 11230 }, { "epoch": 5.242007001166861, "grad_norm": 0.07421875, "learning_rate": 9.243571515124621e-05, "loss": 0.0008, "step": 11231 }, { "epoch": 5.24247374562427, "grad_norm": 0.357421875, "learning_rate": 9.242108378988405e-05, "loss": 0.0041, "step": 11232 }, { "epoch": 5.24294049008168, "grad_norm": 0.263671875, "learning_rate": 9.240645259170438e-05, "loss": 0.0034, "step": 11233 }, { "epoch": 5.24340723453909, "grad_norm": 0.5625, "learning_rate": 9.239182155702222e-05, "loss": 0.0066, "step": 11234 }, { "epoch": 5.2438739789965, "grad_norm": 0.0849609375, "learning_rate": 9.237719068615256e-05, "loss": 0.0008, "step": 11235 }, { "epoch": 5.244340723453909, "grad_norm": 0.06689453125, "learning_rate": 9.236255997941045e-05, "loss": 0.0007, "step": 11236 }, { "epoch": 5.244807467911318, "grad_norm": 0.038818359375, "learning_rate": 9.234792943711091e-05, "loss": 0.0006, "step": 11237 }, { "epoch": 5.245274212368728, "grad_norm": 0.19140625, "learning_rate": 9.233329905956894e-05, "loss": 0.0036, "step": 11238 }, { "epoch": 5.245740956826138, "grad_norm": 0.0625, "learning_rate": 9.231866884709952e-05, "loss": 0.0008, "step": 11239 }, { "epoch": 5.246207701283547, "grad_norm": 0.322265625, "learning_rate": 9.23040388000177e-05, "loss": 0.0021, "step": 11240 }, { "epoch": 5.246674445740957, "grad_norm": 0.349609375, "learning_rate": 9.228940891863848e-05, "loss": 0.0026, "step": 11241 }, { "epoch": 5.2471411901983664, "grad_norm": 0.4375, "learning_rate": 9.22747792032768e-05, "loss": 0.0065, "step": 11242 }, { "epoch": 5.247607934655776, "grad_norm": 0.046875, "learning_rate": 9.226014965424774e-05, "loss": 0.0006, "step": 11243 }, { "epoch": 5.248074679113185, "grad_norm": 0.1875, "learning_rate": 9.224552027186622e-05, "loss": 0.0012, "step": 11244 }, { "epoch": 5.248541423570595, "grad_norm": 0.169921875, "learning_rate": 9.223089105644724e-05, "loss": 0.0013, "step": 11245 }, { "epoch": 5.249008168028005, "grad_norm": 0.259765625, "learning_rate": 9.221626200830582e-05, "loss": 0.0022, "step": 11246 }, { "epoch": 5.2494749124854145, "grad_norm": 0.2421875, "learning_rate": 9.220163312775692e-05, "loss": 0.0015, "step": 11247 }, { "epoch": 5.249941656942823, "grad_norm": 0.37890625, "learning_rate": 9.21870044151155e-05, "loss": 0.0027, "step": 11248 }, { "epoch": 5.250408401400233, "grad_norm": 0.203125, "learning_rate": 9.217237587069653e-05, "loss": 0.0018, "step": 11249 }, { "epoch": 5.250875145857643, "grad_norm": 0.109375, "learning_rate": 9.2157747494815e-05, "loss": 0.0011, "step": 11250 }, { "epoch": 5.251341890315053, "grad_norm": 0.080078125, "learning_rate": 9.214311928778588e-05, "loss": 0.0008, "step": 11251 }, { "epoch": 5.251808634772462, "grad_norm": 0.34375, "learning_rate": 9.21284912499241e-05, "loss": 0.0045, "step": 11252 }, { "epoch": 5.2522753792298715, "grad_norm": 0.06005859375, "learning_rate": 9.211386338154465e-05, "loss": 0.0007, "step": 11253 }, { "epoch": 5.252742123687281, "grad_norm": 0.1591796875, "learning_rate": 9.209923568296249e-05, "loss": 0.0013, "step": 11254 }, { "epoch": 5.253208868144691, "grad_norm": 0.3984375, "learning_rate": 9.208460815449254e-05, "loss": 0.0044, "step": 11255 }, { "epoch": 5.2536756126021, "grad_norm": 0.083984375, "learning_rate": 9.206998079644973e-05, "loss": 0.0067, "step": 11256 }, { "epoch": 5.2536756126021, "eval_loss": 2.1444499492645264, "eval_runtime": 55.9501, "eval_samples_per_second": 32.243, "eval_steps_per_second": 4.039, "step": 11256 }, { "epoch": 5.25414235705951, "grad_norm": 0.06884765625, "learning_rate": 9.205535360914905e-05, "loss": 0.0007, "step": 11257 }, { "epoch": 5.25460910151692, "grad_norm": 0.05419921875, "learning_rate": 9.204072659290544e-05, "loss": 0.0006, "step": 11258 }, { "epoch": 5.255075845974329, "grad_norm": 0.2158203125, "learning_rate": 9.202609974803378e-05, "loss": 0.0009, "step": 11259 }, { "epoch": 5.255542590431738, "grad_norm": 0.1767578125, "learning_rate": 9.201147307484907e-05, "loss": 0.0011, "step": 11260 }, { "epoch": 5.256009334889148, "grad_norm": 0.234375, "learning_rate": 9.199684657366621e-05, "loss": 0.0011, "step": 11261 }, { "epoch": 5.256476079346558, "grad_norm": 0.42578125, "learning_rate": 9.198222024480009e-05, "loss": 0.0044, "step": 11262 }, { "epoch": 5.256942823803968, "grad_norm": 0.30859375, "learning_rate": 9.196759408856569e-05, "loss": 0.0028, "step": 11263 }, { "epoch": 5.257409568261377, "grad_norm": 0.197265625, "learning_rate": 9.19529681052779e-05, "loss": 0.0011, "step": 11264 }, { "epoch": 5.257876312718786, "grad_norm": 0.0732421875, "learning_rate": 9.193834229525167e-05, "loss": 0.0008, "step": 11265 }, { "epoch": 5.258343057176196, "grad_norm": 0.208984375, "learning_rate": 9.192371665880184e-05, "loss": 0.0042, "step": 11266 }, { "epoch": 5.258809801633606, "grad_norm": 0.416015625, "learning_rate": 9.190909119624335e-05, "loss": 0.0031, "step": 11267 }, { "epoch": 5.259276546091015, "grad_norm": 0.1650390625, "learning_rate": 9.189446590789108e-05, "loss": 0.0013, "step": 11268 }, { "epoch": 5.259743290548425, "grad_norm": 0.287109375, "learning_rate": 9.187984079405999e-05, "loss": 0.0017, "step": 11269 }, { "epoch": 5.2602100350058345, "grad_norm": 0.181640625, "learning_rate": 9.186521585506492e-05, "loss": 0.0015, "step": 11270 }, { "epoch": 5.260676779463244, "grad_norm": 0.052490234375, "learning_rate": 9.185059109122077e-05, "loss": 0.0007, "step": 11271 }, { "epoch": 5.261143523920653, "grad_norm": 0.408203125, "learning_rate": 9.183596650284246e-05, "loss": 0.0027, "step": 11272 }, { "epoch": 5.261610268378063, "grad_norm": 0.2578125, "learning_rate": 9.182134209024483e-05, "loss": 0.0018, "step": 11273 }, { "epoch": 5.262077012835473, "grad_norm": 0.2216796875, "learning_rate": 9.180671785374278e-05, "loss": 0.0013, "step": 11274 }, { "epoch": 5.262543757292882, "grad_norm": 0.0810546875, "learning_rate": 9.17920937936512e-05, "loss": 0.0041, "step": 11275 }, { "epoch": 5.2630105017502915, "grad_norm": 0.11181640625, "learning_rate": 9.177746991028493e-05, "loss": 0.001, "step": 11276 }, { "epoch": 5.263477246207701, "grad_norm": 0.049560546875, "learning_rate": 9.176284620395887e-05, "loss": 0.0007, "step": 11277 }, { "epoch": 5.263943990665111, "grad_norm": 0.54296875, "learning_rate": 9.174822267498784e-05, "loss": 0.0023, "step": 11278 }, { "epoch": 5.264410735122521, "grad_norm": 0.2421875, "learning_rate": 9.173359932368677e-05, "loss": 0.0038, "step": 11279 }, { "epoch": 5.26487747957993, "grad_norm": 0.38671875, "learning_rate": 9.171897615037046e-05, "loss": 0.0026, "step": 11280 }, { "epoch": 5.26534422403734, "grad_norm": 0.275390625, "learning_rate": 9.170435315535377e-05, "loss": 0.0016, "step": 11281 }, { "epoch": 5.265810968494749, "grad_norm": 0.25, "learning_rate": 9.168973033895158e-05, "loss": 0.0018, "step": 11282 }, { "epoch": 5.266277712952158, "grad_norm": 0.423828125, "learning_rate": 9.167510770147872e-05, "loss": 0.0042, "step": 11283 }, { "epoch": 5.266744457409568, "grad_norm": 0.39453125, "learning_rate": 9.166048524325e-05, "loss": 0.0017, "step": 11284 }, { "epoch": 5.267211201866978, "grad_norm": 0.1337890625, "learning_rate": 9.164586296458031e-05, "loss": 0.0008, "step": 11285 }, { "epoch": 5.267677946324388, "grad_norm": 0.28515625, "learning_rate": 9.163124086578446e-05, "loss": 0.0014, "step": 11286 }, { "epoch": 5.268144690781797, "grad_norm": 0.30859375, "learning_rate": 9.161661894717728e-05, "loss": 0.0048, "step": 11287 }, { "epoch": 5.268611435239206, "grad_norm": 0.146484375, "learning_rate": 9.160199720907357e-05, "loss": 0.0012, "step": 11288 }, { "epoch": 5.269078179696616, "grad_norm": 0.1787109375, "learning_rate": 9.158737565178822e-05, "loss": 0.0018, "step": 11289 }, { "epoch": 5.269544924154026, "grad_norm": 0.08984375, "learning_rate": 9.157275427563598e-05, "loss": 0.0011, "step": 11290 }, { "epoch": 5.270011668611435, "grad_norm": 0.09716796875, "learning_rate": 9.155813308093167e-05, "loss": 0.0031, "step": 11291 }, { "epoch": 5.270478413068845, "grad_norm": 0.08349609375, "learning_rate": 9.154351206799016e-05, "loss": 0.0009, "step": 11292 }, { "epoch": 5.2709451575262545, "grad_norm": 0.2431640625, "learning_rate": 9.152889123712621e-05, "loss": 0.0011, "step": 11293 }, { "epoch": 5.271411901983664, "grad_norm": 0.361328125, "learning_rate": 9.151427058865461e-05, "loss": 0.0022, "step": 11294 }, { "epoch": 5.271878646441073, "grad_norm": 0.2060546875, "learning_rate": 9.149965012289019e-05, "loss": 0.0017, "step": 11295 }, { "epoch": 5.272345390898483, "grad_norm": 0.2333984375, "learning_rate": 9.148502984014774e-05, "loss": 0.0023, "step": 11296 }, { "epoch": 5.272812135355893, "grad_norm": 0.10546875, "learning_rate": 9.147040974074204e-05, "loss": 0.0014, "step": 11297 }, { "epoch": 5.273278879813303, "grad_norm": 0.275390625, "learning_rate": 9.145578982498788e-05, "loss": 0.0023, "step": 11298 }, { "epoch": 5.2737456242707115, "grad_norm": 0.373046875, "learning_rate": 9.144117009320006e-05, "loss": 0.0028, "step": 11299 }, { "epoch": 5.274212368728121, "grad_norm": 0.080078125, "learning_rate": 9.142655054569333e-05, "loss": 0.0007, "step": 11300 }, { "epoch": 5.274679113185531, "grad_norm": 0.1220703125, "learning_rate": 9.141193118278248e-05, "loss": 0.0036, "step": 11301 }, { "epoch": 5.275145857642941, "grad_norm": 0.2177734375, "learning_rate": 9.139731200478227e-05, "loss": 0.002, "step": 11302 }, { "epoch": 5.27561260210035, "grad_norm": 0.2490234375, "learning_rate": 9.138269301200751e-05, "loss": 0.0018, "step": 11303 }, { "epoch": 5.27607934655776, "grad_norm": 0.1689453125, "learning_rate": 9.136807420477291e-05, "loss": 0.0015, "step": 11304 }, { "epoch": 5.276546091015169, "grad_norm": 0.375, "learning_rate": 9.135345558339323e-05, "loss": 0.0045, "step": 11305 }, { "epoch": 5.277012835472579, "grad_norm": 0.1259765625, "learning_rate": 9.133883714818328e-05, "loss": 0.0008, "step": 11306 }, { "epoch": 5.277479579929988, "grad_norm": 0.0703125, "learning_rate": 9.132421889945776e-05, "loss": 0.0029, "step": 11307 }, { "epoch": 5.277946324387398, "grad_norm": 0.1630859375, "learning_rate": 9.130960083753142e-05, "loss": 0.0008, "step": 11308 }, { "epoch": 5.278413068844808, "grad_norm": 0.26953125, "learning_rate": 9.129498296271903e-05, "loss": 0.0016, "step": 11309 }, { "epoch": 5.2788798133022174, "grad_norm": 0.2060546875, "learning_rate": 9.128036527533532e-05, "loss": 0.0012, "step": 11310 }, { "epoch": 5.279346557759626, "grad_norm": 0.16796875, "learning_rate": 9.1265747775695e-05, "loss": 0.0013, "step": 11311 }, { "epoch": 5.279813302217036, "grad_norm": 0.29296875, "learning_rate": 9.125113046411285e-05, "loss": 0.0019, "step": 11312 }, { "epoch": 5.280280046674446, "grad_norm": 0.146484375, "learning_rate": 9.123651334090359e-05, "loss": 0.0026, "step": 11313 }, { "epoch": 5.280746791131856, "grad_norm": 0.20703125, "learning_rate": 9.122189640638188e-05, "loss": 0.0017, "step": 11314 }, { "epoch": 5.281213535589265, "grad_norm": 0.10595703125, "learning_rate": 9.120727966086248e-05, "loss": 0.001, "step": 11315 }, { "epoch": 5.2816802800466744, "grad_norm": 0.1328125, "learning_rate": 9.119266310466012e-05, "loss": 0.0027, "step": 11316 }, { "epoch": 5.282147024504084, "grad_norm": 0.318359375, "learning_rate": 9.117804673808947e-05, "loss": 0.0013, "step": 11317 }, { "epoch": 5.282613768961493, "grad_norm": 0.341796875, "learning_rate": 9.116343056146529e-05, "loss": 0.0037, "step": 11318 }, { "epoch": 5.283080513418903, "grad_norm": 0.1484375, "learning_rate": 9.114881457510225e-05, "loss": 0.0011, "step": 11319 }, { "epoch": 5.283547257876313, "grad_norm": 0.373046875, "learning_rate": 9.113419877931503e-05, "loss": 0.0015, "step": 11320 }, { "epoch": 5.2840140023337225, "grad_norm": 0.0556640625, "learning_rate": 9.111958317441836e-05, "loss": 0.0006, "step": 11321 }, { "epoch": 5.2844807467911314, "grad_norm": 0.193359375, "learning_rate": 9.110496776072693e-05, "loss": 0.0023, "step": 11322 }, { "epoch": 5.284947491248541, "grad_norm": 0.052490234375, "learning_rate": 9.109035253855538e-05, "loss": 0.0006, "step": 11323 }, { "epoch": 5.285414235705951, "grad_norm": 0.435546875, "learning_rate": 9.107573750821847e-05, "loss": 0.005, "step": 11324 }, { "epoch": 5.285880980163361, "grad_norm": 0.40234375, "learning_rate": 9.10611226700308e-05, "loss": 0.0063, "step": 11325 }, { "epoch": 5.28634772462077, "grad_norm": 0.158203125, "learning_rate": 9.104650802430709e-05, "loss": 0.0025, "step": 11326 }, { "epoch": 5.2868144690781795, "grad_norm": 0.07177734375, "learning_rate": 9.103189357136198e-05, "loss": 0.0008, "step": 11327 }, { "epoch": 5.287281213535589, "grad_norm": 0.33984375, "learning_rate": 9.101727931151017e-05, "loss": 0.0015, "step": 11328 }, { "epoch": 5.287747957992999, "grad_norm": 0.2451171875, "learning_rate": 9.100266524506632e-05, "loss": 0.0015, "step": 11329 }, { "epoch": 5.288214702450408, "grad_norm": 0.224609375, "learning_rate": 9.098805137234502e-05, "loss": 0.0017, "step": 11330 }, { "epoch": 5.288681446907818, "grad_norm": 0.1376953125, "learning_rate": 9.097343769366102e-05, "loss": 0.001, "step": 11331 }, { "epoch": 5.289148191365228, "grad_norm": 0.08154296875, "learning_rate": 9.09588242093289e-05, "loss": 0.0032, "step": 11332 }, { "epoch": 5.289614935822637, "grad_norm": 0.1552734375, "learning_rate": 9.094421091966332e-05, "loss": 0.001, "step": 11333 }, { "epoch": 5.290081680280046, "grad_norm": 0.1494140625, "learning_rate": 9.092959782497893e-05, "loss": 0.0009, "step": 11334 }, { "epoch": 5.290548424737456, "grad_norm": 0.126953125, "learning_rate": 9.091498492559038e-05, "loss": 0.0009, "step": 11335 }, { "epoch": 5.291015169194866, "grad_norm": 0.1708984375, "learning_rate": 9.090037222181227e-05, "loss": 0.0012, "step": 11336 }, { "epoch": 5.291481913652276, "grad_norm": 0.431640625, "learning_rate": 9.088575971395923e-05, "loss": 0.0018, "step": 11337 }, { "epoch": 5.291948658109685, "grad_norm": 0.1923828125, "learning_rate": 9.087114740234591e-05, "loss": 0.0015, "step": 11338 }, { "epoch": 5.292415402567094, "grad_norm": 0.2080078125, "learning_rate": 9.085653528728692e-05, "loss": 0.0017, "step": 11339 }, { "epoch": 5.292882147024504, "grad_norm": 0.244140625, "learning_rate": 9.084192336909686e-05, "loss": 0.0018, "step": 11340 }, { "epoch": 5.293348891481914, "grad_norm": 0.57421875, "learning_rate": 9.082731164809037e-05, "loss": 0.0082, "step": 11341 }, { "epoch": 5.293815635939323, "grad_norm": 0.1416015625, "learning_rate": 9.081270012458202e-05, "loss": 0.0012, "step": 11342 }, { "epoch": 5.294282380396733, "grad_norm": 0.1337890625, "learning_rate": 9.079808879888641e-05, "loss": 0.0007, "step": 11343 }, { "epoch": 5.2947491248541425, "grad_norm": 0.08935546875, "learning_rate": 9.078347767131819e-05, "loss": 0.0008, "step": 11344 }, { "epoch": 5.295215869311552, "grad_norm": 0.2275390625, "learning_rate": 9.076886674219193e-05, "loss": 0.0019, "step": 11345 }, { "epoch": 5.295682613768961, "grad_norm": 0.0888671875, "learning_rate": 9.07542560118222e-05, "loss": 0.0037, "step": 11346 }, { "epoch": 5.296149358226371, "grad_norm": 0.162109375, "learning_rate": 9.073964548052358e-05, "loss": 0.0013, "step": 11347 }, { "epoch": 5.296616102683781, "grad_norm": 0.09326171875, "learning_rate": 9.072503514861068e-05, "loss": 0.0008, "step": 11348 }, { "epoch": 5.297082847141191, "grad_norm": 0.30078125, "learning_rate": 9.071042501639806e-05, "loss": 0.002, "step": 11349 }, { "epoch": 5.2975495915985995, "grad_norm": 0.047119140625, "learning_rate": 9.069581508420029e-05, "loss": 0.0006, "step": 11350 }, { "epoch": 5.298016336056009, "grad_norm": 0.34375, "learning_rate": 9.068120535233195e-05, "loss": 0.0028, "step": 11351 }, { "epoch": 5.298483080513419, "grad_norm": 0.1611328125, "learning_rate": 9.066659582110761e-05, "loss": 0.0008, "step": 11352 }, { "epoch": 5.298949824970828, "grad_norm": 0.038330078125, "learning_rate": 9.065198649084178e-05, "loss": 0.0005, "step": 11353 }, { "epoch": 5.299416569428238, "grad_norm": 0.1396484375, "learning_rate": 9.063737736184909e-05, "loss": 0.0011, "step": 11354 }, { "epoch": 5.299883313885648, "grad_norm": 0.30859375, "learning_rate": 9.062276843444406e-05, "loss": 0.002, "step": 11355 }, { "epoch": 5.300350058343057, "grad_norm": 0.07861328125, "learning_rate": 9.060815970894122e-05, "loss": 0.001, "step": 11356 }, { "epoch": 5.300816802800467, "grad_norm": 0.15234375, "learning_rate": 9.05935511856551e-05, "loss": 0.0009, "step": 11357 }, { "epoch": 5.301283547257876, "grad_norm": 0.58984375, "learning_rate": 9.057894286490029e-05, "loss": 0.0049, "step": 11358 }, { "epoch": 5.301750291715286, "grad_norm": 0.0269775390625, "learning_rate": 9.056433474699127e-05, "loss": 0.0005, "step": 11359 }, { "epoch": 5.302217036172696, "grad_norm": 0.28125, "learning_rate": 9.054972683224259e-05, "loss": 0.002, "step": 11360 }, { "epoch": 5.302683780630105, "grad_norm": 0.357421875, "learning_rate": 9.053511912096884e-05, "loss": 0.0105, "step": 11361 }, { "epoch": 5.303150525087514, "grad_norm": 0.11181640625, "learning_rate": 9.05205116134844e-05, "loss": 0.0009, "step": 11362 }, { "epoch": 5.303617269544924, "grad_norm": 0.2236328125, "learning_rate": 9.05059043101039e-05, "loss": 0.0046, "step": 11363 }, { "epoch": 5.304084014002334, "grad_norm": 0.232421875, "learning_rate": 9.04912972111418e-05, "loss": 0.0033, "step": 11364 }, { "epoch": 5.304550758459743, "grad_norm": 0.1884765625, "learning_rate": 9.047669031691262e-05, "loss": 0.001, "step": 11365 }, { "epoch": 5.305017502917153, "grad_norm": 0.19921875, "learning_rate": 9.046208362773087e-05, "loss": 0.0036, "step": 11366 }, { "epoch": 5.3054842473745625, "grad_norm": 0.0791015625, "learning_rate": 9.044747714391103e-05, "loss": 0.0008, "step": 11367 }, { "epoch": 5.305950991831972, "grad_norm": 0.1875, "learning_rate": 9.043287086576763e-05, "loss": 0.0017, "step": 11368 }, { "epoch": 5.306417736289381, "grad_norm": 0.5078125, "learning_rate": 9.041826479361511e-05, "loss": 0.0077, "step": 11369 }, { "epoch": 5.306884480746791, "grad_norm": 0.2255859375, "learning_rate": 9.040365892776799e-05, "loss": 0.0014, "step": 11370 }, { "epoch": 5.307351225204201, "grad_norm": 0.06396484375, "learning_rate": 9.038905326854075e-05, "loss": 0.0007, "step": 11371 }, { "epoch": 5.307817969661611, "grad_norm": 0.14453125, "learning_rate": 9.037444781624781e-05, "loss": 0.0011, "step": 11372 }, { "epoch": 5.3082847141190195, "grad_norm": 0.6796875, "learning_rate": 9.035984257120375e-05, "loss": 0.0057, "step": 11373 }, { "epoch": 5.308751458576429, "grad_norm": 0.07958984375, "learning_rate": 9.034523753372295e-05, "loss": 0.0009, "step": 11374 }, { "epoch": 5.309218203033839, "grad_norm": 0.1728515625, "learning_rate": 9.03306327041199e-05, "loss": 0.0013, "step": 11375 }, { "epoch": 5.309684947491249, "grad_norm": 0.173828125, "learning_rate": 9.031602808270903e-05, "loss": 0.0014, "step": 11376 }, { "epoch": 5.310151691948658, "grad_norm": 0.0927734375, "learning_rate": 9.030142366980486e-05, "loss": 0.001, "step": 11377 }, { "epoch": 5.310618436406068, "grad_norm": 0.072265625, "learning_rate": 9.02868194657218e-05, "loss": 0.0007, "step": 11378 }, { "epoch": 5.311085180863477, "grad_norm": 0.314453125, "learning_rate": 9.027221547077424e-05, "loss": 0.002, "step": 11379 }, { "epoch": 5.311551925320887, "grad_norm": 0.0703125, "learning_rate": 9.025761168527673e-05, "loss": 0.0036, "step": 11380 }, { "epoch": 5.312018669778296, "grad_norm": 0.3515625, "learning_rate": 9.024300810954363e-05, "loss": 0.0029, "step": 11381 }, { "epoch": 5.312485414235706, "grad_norm": 0.251953125, "learning_rate": 9.022840474388938e-05, "loss": 0.0015, "step": 11382 }, { "epoch": 5.312952158693116, "grad_norm": 0.1240234375, "learning_rate": 9.021380158862843e-05, "loss": 0.0008, "step": 11383 }, { "epoch": 5.3134189031505255, "grad_norm": 0.10595703125, "learning_rate": 9.01991986440752e-05, "loss": 0.001, "step": 11384 }, { "epoch": 5.313885647607934, "grad_norm": 0.12060546875, "learning_rate": 9.018459591054408e-05, "loss": 0.0032, "step": 11385 }, { "epoch": 5.314352392065344, "grad_norm": 0.0673828125, "learning_rate": 9.016999338834951e-05, "loss": 0.0007, "step": 11386 }, { "epoch": 5.314819136522754, "grad_norm": 0.06884765625, "learning_rate": 9.015539107780589e-05, "loss": 0.0007, "step": 11387 }, { "epoch": 5.315285880980164, "grad_norm": 0.29296875, "learning_rate": 9.014078897922761e-05, "loss": 0.0025, "step": 11388 }, { "epoch": 5.315752625437573, "grad_norm": 0.26171875, "learning_rate": 9.012618709292908e-05, "loss": 0.0043, "step": 11389 }, { "epoch": 5.3162193698949824, "grad_norm": 0.373046875, "learning_rate": 9.011158541922473e-05, "loss": 0.0025, "step": 11390 }, { "epoch": 5.316686114352392, "grad_norm": 0.30859375, "learning_rate": 9.00969839584289e-05, "loss": 0.0018, "step": 11391 }, { "epoch": 5.317152858809802, "grad_norm": 0.2490234375, "learning_rate": 9.008238271085596e-05, "loss": 0.0018, "step": 11392 }, { "epoch": 5.317619603267211, "grad_norm": 0.3203125, "learning_rate": 9.006778167682036e-05, "loss": 0.0052, "step": 11393 }, { "epoch": 5.318086347724621, "grad_norm": 0.1337890625, "learning_rate": 9.005318085663644e-05, "loss": 0.0008, "step": 11394 }, { "epoch": 5.3185530921820305, "grad_norm": 0.13671875, "learning_rate": 9.003858025061857e-05, "loss": 0.0011, "step": 11395 }, { "epoch": 5.3190198366394394, "grad_norm": 0.041748046875, "learning_rate": 9.00239798590811e-05, "loss": 0.0005, "step": 11396 }, { "epoch": 5.319486581096849, "grad_norm": 0.2099609375, "learning_rate": 9.000937968233844e-05, "loss": 0.0013, "step": 11397 }, { "epoch": 5.319953325554259, "grad_norm": 0.26953125, "learning_rate": 8.999477972070492e-05, "loss": 0.0027, "step": 11398 }, { "epoch": 5.320420070011669, "grad_norm": 0.0810546875, "learning_rate": 8.998017997449485e-05, "loss": 0.0028, "step": 11399 }, { "epoch": 5.320886814469079, "grad_norm": 0.373046875, "learning_rate": 8.996558044402266e-05, "loss": 0.004, "step": 11400 }, { "epoch": 5.3213535589264875, "grad_norm": 0.10546875, "learning_rate": 8.995098112960266e-05, "loss": 0.0013, "step": 11401 }, { "epoch": 5.321820303383897, "grad_norm": 0.275390625, "learning_rate": 8.993638203154916e-05, "loss": 0.0032, "step": 11402 }, { "epoch": 5.322287047841307, "grad_norm": 0.1533203125, "learning_rate": 8.992178315017654e-05, "loss": 0.0006, "step": 11403 }, { "epoch": 5.322753792298716, "grad_norm": 0.208984375, "learning_rate": 8.99071844857991e-05, "loss": 0.0048, "step": 11404 }, { "epoch": 5.323220536756126, "grad_norm": 0.416015625, "learning_rate": 8.98925860387312e-05, "loss": 0.0073, "step": 11405 }, { "epoch": 5.323687281213536, "grad_norm": 0.0634765625, "learning_rate": 8.98779878092871e-05, "loss": 0.0006, "step": 11406 }, { "epoch": 5.324154025670945, "grad_norm": 0.11083984375, "learning_rate": 8.986338979778119e-05, "loss": 0.0009, "step": 11407 }, { "epoch": 5.324620770128354, "grad_norm": 0.1474609375, "learning_rate": 8.984879200452775e-05, "loss": 0.0009, "step": 11408 }, { "epoch": 5.325087514585764, "grad_norm": 0.11474609375, "learning_rate": 8.983419442984106e-05, "loss": 0.0009, "step": 11409 }, { "epoch": 5.325554259043174, "grad_norm": 0.197265625, "learning_rate": 8.981959707403545e-05, "loss": 0.0015, "step": 11410 }, { "epoch": 5.326021003500584, "grad_norm": 0.041259765625, "learning_rate": 8.980499993742519e-05, "loss": 0.0005, "step": 11411 }, { "epoch": 5.326487747957993, "grad_norm": 0.1455078125, "learning_rate": 8.979040302032461e-05, "loss": 0.0008, "step": 11412 }, { "epoch": 5.326954492415402, "grad_norm": 0.08935546875, "learning_rate": 8.977580632304799e-05, "loss": 0.0007, "step": 11413 }, { "epoch": 5.327421236872812, "grad_norm": 0.06884765625, "learning_rate": 8.976120984590957e-05, "loss": 0.0029, "step": 11414 }, { "epoch": 5.327887981330222, "grad_norm": 0.09423828125, "learning_rate": 8.974661358922368e-05, "loss": 0.0009, "step": 11415 }, { "epoch": 5.328354725787631, "grad_norm": 0.1044921875, "learning_rate": 8.973201755330458e-05, "loss": 0.0008, "step": 11416 }, { "epoch": 5.328821470245041, "grad_norm": 0.1943359375, "learning_rate": 8.971742173846655e-05, "loss": 0.0031, "step": 11417 }, { "epoch": 5.3292882147024505, "grad_norm": 0.19921875, "learning_rate": 8.97028261450238e-05, "loss": 0.0012, "step": 11418 }, { "epoch": 5.32975495915986, "grad_norm": 0.3046875, "learning_rate": 8.968823077329065e-05, "loss": 0.0022, "step": 11419 }, { "epoch": 5.330221703617269, "grad_norm": 0.236328125, "learning_rate": 8.967363562358134e-05, "loss": 0.0025, "step": 11420 }, { "epoch": 5.330688448074679, "grad_norm": 0.2421875, "learning_rate": 8.965904069621008e-05, "loss": 0.0035, "step": 11421 }, { "epoch": 5.331155192532089, "grad_norm": 0.32421875, "learning_rate": 8.964444599149119e-05, "loss": 0.0037, "step": 11422 }, { "epoch": 5.331621936989499, "grad_norm": 0.150390625, "learning_rate": 8.962985150973885e-05, "loss": 0.0011, "step": 11423 }, { "epoch": 5.3320886814469075, "grad_norm": 0.1318359375, "learning_rate": 8.961525725126729e-05, "loss": 0.001, "step": 11424 }, { "epoch": 5.332555425904317, "grad_norm": 0.1240234375, "learning_rate": 8.960066321639079e-05, "loss": 0.0026, "step": 11425 }, { "epoch": 5.333022170361727, "grad_norm": 0.22265625, "learning_rate": 8.958606940542355e-05, "loss": 0.0015, "step": 11426 }, { "epoch": 5.333488914819137, "grad_norm": 0.10986328125, "learning_rate": 8.95714758186798e-05, "loss": 0.0012, "step": 11427 }, { "epoch": 5.333955659276546, "grad_norm": 0.1513671875, "learning_rate": 8.95568824564737e-05, "loss": 0.0012, "step": 11428 }, { "epoch": 5.334422403733956, "grad_norm": 0.255859375, "learning_rate": 8.954228931911956e-05, "loss": 0.002, "step": 11429 }, { "epoch": 5.334889148191365, "grad_norm": 0.2021484375, "learning_rate": 8.952769640693151e-05, "loss": 0.0015, "step": 11430 }, { "epoch": 5.335355892648775, "grad_norm": 0.07568359375, "learning_rate": 8.951310372022377e-05, "loss": 0.0007, "step": 11431 }, { "epoch": 5.335822637106184, "grad_norm": 0.412109375, "learning_rate": 8.949851125931055e-05, "loss": 0.0018, "step": 11432 }, { "epoch": 5.336289381563594, "grad_norm": 0.0908203125, "learning_rate": 8.948391902450603e-05, "loss": 0.0007, "step": 11433 }, { "epoch": 5.336756126021004, "grad_norm": 0.166015625, "learning_rate": 8.946932701612442e-05, "loss": 0.0012, "step": 11434 }, { "epoch": 5.3372228704784135, "grad_norm": 0.2890625, "learning_rate": 8.945473523447985e-05, "loss": 0.0017, "step": 11435 }, { "epoch": 5.337689614935822, "grad_norm": 0.03759765625, "learning_rate": 8.944014367988656e-05, "loss": 0.0004, "step": 11436 }, { "epoch": 5.338156359393232, "grad_norm": 0.052001953125, "learning_rate": 8.942555235265868e-05, "loss": 0.0005, "step": 11437 }, { "epoch": 5.338623103850642, "grad_norm": 0.384765625, "learning_rate": 8.941096125311038e-05, "loss": 0.0014, "step": 11438 }, { "epoch": 5.339089848308051, "grad_norm": 0.09765625, "learning_rate": 8.939637038155585e-05, "loss": 0.0007, "step": 11439 }, { "epoch": 5.339556592765461, "grad_norm": 0.251953125, "learning_rate": 8.938177973830923e-05, "loss": 0.0015, "step": 11440 }, { "epoch": 5.3400233372228705, "grad_norm": 0.1259765625, "learning_rate": 8.936718932368466e-05, "loss": 0.0007, "step": 11441 }, { "epoch": 5.34049008168028, "grad_norm": 0.306640625, "learning_rate": 8.935259913799632e-05, "loss": 0.0091, "step": 11442 }, { "epoch": 5.34095682613769, "grad_norm": 0.482421875, "learning_rate": 8.933800918155832e-05, "loss": 0.0117, "step": 11443 }, { "epoch": 5.341423570595099, "grad_norm": 0.28125, "learning_rate": 8.932341945468484e-05, "loss": 0.0013, "step": 11444 }, { "epoch": 5.341890315052509, "grad_norm": 0.15625, "learning_rate": 8.930882995768993e-05, "loss": 0.0011, "step": 11445 }, { "epoch": 5.342357059509919, "grad_norm": 0.23046875, "learning_rate": 8.929424069088781e-05, "loss": 0.0012, "step": 11446 }, { "epoch": 5.3428238039673275, "grad_norm": 0.283203125, "learning_rate": 8.927965165459256e-05, "loss": 0.0017, "step": 11447 }, { "epoch": 5.343290548424737, "grad_norm": 0.13671875, "learning_rate": 8.926506284911829e-05, "loss": 0.0028, "step": 11448 }, { "epoch": 5.343757292882147, "grad_norm": 0.515625, "learning_rate": 8.925047427477916e-05, "loss": 0.0058, "step": 11449 }, { "epoch": 5.344224037339557, "grad_norm": 0.15234375, "learning_rate": 8.923588593188923e-05, "loss": 0.0011, "step": 11450 }, { "epoch": 5.344690781796966, "grad_norm": 0.322265625, "learning_rate": 8.92212978207626e-05, "loss": 0.0019, "step": 11451 }, { "epoch": 5.345157526254376, "grad_norm": 0.2578125, "learning_rate": 8.920670994171341e-05, "loss": 0.0019, "step": 11452 }, { "epoch": 5.345624270711785, "grad_norm": 0.310546875, "learning_rate": 8.919212229505574e-05, "loss": 0.0017, "step": 11453 }, { "epoch": 5.346091015169195, "grad_norm": 0.1796875, "learning_rate": 8.917753488110366e-05, "loss": 0.0044, "step": 11454 }, { "epoch": 5.346557759626604, "grad_norm": 0.248046875, "learning_rate": 8.916294770017123e-05, "loss": 0.0014, "step": 11455 }, { "epoch": 5.347024504084014, "grad_norm": 0.306640625, "learning_rate": 8.914836075257264e-05, "loss": 0.0049, "step": 11456 }, { "epoch": 5.347491248541424, "grad_norm": 0.27734375, "learning_rate": 8.913377403862181e-05, "loss": 0.0013, "step": 11457 }, { "epoch": 5.3479579929988335, "grad_norm": 0.2333984375, "learning_rate": 8.91191875586329e-05, "loss": 0.0012, "step": 11458 }, { "epoch": 5.348424737456242, "grad_norm": 0.134765625, "learning_rate": 8.910460131291997e-05, "loss": 0.0012, "step": 11459 }, { "epoch": 5.348891481913652, "grad_norm": 0.310546875, "learning_rate": 8.909001530179702e-05, "loss": 0.0057, "step": 11460 }, { "epoch": 5.349358226371062, "grad_norm": 0.2255859375, "learning_rate": 8.907542952557817e-05, "loss": 0.0024, "step": 11461 }, { "epoch": 5.349824970828472, "grad_norm": 0.126953125, "learning_rate": 8.906084398457746e-05, "loss": 0.0007, "step": 11462 }, { "epoch": 5.350291715285881, "grad_norm": 0.220703125, "learning_rate": 8.904625867910888e-05, "loss": 0.0017, "step": 11463 }, { "epoch": 5.3507584597432905, "grad_norm": 0.05517578125, "learning_rate": 8.903167360948653e-05, "loss": 0.0008, "step": 11464 }, { "epoch": 5.3512252042007, "grad_norm": 0.130859375, "learning_rate": 8.901708877602443e-05, "loss": 0.0028, "step": 11465 }, { "epoch": 5.35169194865811, "grad_norm": 0.095703125, "learning_rate": 8.900250417903657e-05, "loss": 0.0006, "step": 11466 }, { "epoch": 5.352158693115519, "grad_norm": 0.1396484375, "learning_rate": 8.898791981883699e-05, "loss": 0.001, "step": 11467 }, { "epoch": 5.352625437572929, "grad_norm": 1.359375, "learning_rate": 8.897333569573974e-05, "loss": 0.0062, "step": 11468 }, { "epoch": 5.3530921820303385, "grad_norm": 0.06787109375, "learning_rate": 8.89587518100588e-05, "loss": 0.0005, "step": 11469 }, { "epoch": 5.353558926487748, "grad_norm": 0.05908203125, "learning_rate": 8.894416816210816e-05, "loss": 0.0005, "step": 11470 }, { "epoch": 5.354025670945157, "grad_norm": 0.26171875, "learning_rate": 8.892958475220186e-05, "loss": 0.0021, "step": 11471 }, { "epoch": 5.354492415402567, "grad_norm": 0.1484375, "learning_rate": 8.89150015806539e-05, "loss": 0.001, "step": 11472 }, { "epoch": 5.354959159859977, "grad_norm": 0.1669921875, "learning_rate": 8.890041864777823e-05, "loss": 0.0025, "step": 11473 }, { "epoch": 5.355425904317387, "grad_norm": 0.396484375, "learning_rate": 8.888583595388888e-05, "loss": 0.0035, "step": 11474 }, { "epoch": 5.3558926487747955, "grad_norm": 0.05078125, "learning_rate": 8.887125349929981e-05, "loss": 0.0008, "step": 11475 }, { "epoch": 5.356359393232205, "grad_norm": 0.302734375, "learning_rate": 8.885667128432499e-05, "loss": 0.0023, "step": 11476 }, { "epoch": 5.356826137689615, "grad_norm": 0.2314453125, "learning_rate": 8.88420893092784e-05, "loss": 0.0061, "step": 11477 }, { "epoch": 5.357292882147025, "grad_norm": 0.1611328125, "learning_rate": 8.882750757447402e-05, "loss": 0.0014, "step": 11478 }, { "epoch": 5.357759626604434, "grad_norm": 0.22265625, "learning_rate": 8.88129260802258e-05, "loss": 0.0015, "step": 11479 }, { "epoch": 5.358226371061844, "grad_norm": 0.1298828125, "learning_rate": 8.879834482684768e-05, "loss": 0.0041, "step": 11480 }, { "epoch": 5.358693115519253, "grad_norm": 0.203125, "learning_rate": 8.878376381465364e-05, "loss": 0.0026, "step": 11481 }, { "epoch": 5.359159859976662, "grad_norm": 0.427734375, "learning_rate": 8.87691830439576e-05, "loss": 0.003, "step": 11482 }, { "epoch": 5.359626604434072, "grad_norm": 0.318359375, "learning_rate": 8.875460251507352e-05, "loss": 0.0039, "step": 11483 }, { "epoch": 5.360093348891482, "grad_norm": 0.1015625, "learning_rate": 8.87400222283153e-05, "loss": 0.0041, "step": 11484 }, { "epoch": 5.360560093348892, "grad_norm": 0.07080078125, "learning_rate": 8.872544218399691e-05, "loss": 0.0006, "step": 11485 }, { "epoch": 5.3610268378063015, "grad_norm": 0.50390625, "learning_rate": 8.871086238243227e-05, "loss": 0.0034, "step": 11486 }, { "epoch": 5.36149358226371, "grad_norm": 0.1494140625, "learning_rate": 8.869628282393526e-05, "loss": 0.0012, "step": 11487 }, { "epoch": 5.36196032672112, "grad_norm": 0.060546875, "learning_rate": 8.868170350881984e-05, "loss": 0.0006, "step": 11488 }, { "epoch": 5.36242707117853, "grad_norm": 0.40625, "learning_rate": 8.86671244373999e-05, "loss": 0.0023, "step": 11489 }, { "epoch": 5.362893815635939, "grad_norm": 0.062255859375, "learning_rate": 8.865254560998935e-05, "loss": 0.0006, "step": 11490 }, { "epoch": 5.363360560093349, "grad_norm": 0.1640625, "learning_rate": 8.863796702690208e-05, "loss": 0.0009, "step": 11491 }, { "epoch": 5.3638273045507585, "grad_norm": 0.2119140625, "learning_rate": 8.8623388688452e-05, "loss": 0.0014, "step": 11492 }, { "epoch": 5.364294049008168, "grad_norm": 0.13671875, "learning_rate": 8.860881059495297e-05, "loss": 0.001, "step": 11493 }, { "epoch": 5.364760793465577, "grad_norm": 0.296875, "learning_rate": 8.859423274671887e-05, "loss": 0.0022, "step": 11494 }, { "epoch": 5.365227537922987, "grad_norm": 0.1396484375, "learning_rate": 8.857965514406361e-05, "loss": 0.0009, "step": 11495 }, { "epoch": 5.365694282380397, "grad_norm": 0.263671875, "learning_rate": 8.856507778730106e-05, "loss": 0.0016, "step": 11496 }, { "epoch": 5.366161026837807, "grad_norm": 0.30078125, "learning_rate": 8.855050067674504e-05, "loss": 0.0018, "step": 11497 }, { "epoch": 5.3666277712952155, "grad_norm": 0.486328125, "learning_rate": 8.853592381270947e-05, "loss": 0.0027, "step": 11498 }, { "epoch": 5.367094515752625, "grad_norm": 0.0693359375, "learning_rate": 8.852134719550817e-05, "loss": 0.0007, "step": 11499 }, { "epoch": 5.367561260210035, "grad_norm": 0.1533203125, "learning_rate": 8.8506770825455e-05, "loss": 0.0009, "step": 11500 }, { "epoch": 5.368028004667445, "grad_norm": 0.1982421875, "learning_rate": 8.849219470286381e-05, "loss": 0.0014, "step": 11501 }, { "epoch": 5.368494749124854, "grad_norm": 0.044921875, "learning_rate": 8.847761882804844e-05, "loss": 0.0004, "step": 11502 }, { "epoch": 5.368961493582264, "grad_norm": 0.1611328125, "learning_rate": 8.846304320132272e-05, "loss": 0.0011, "step": 11503 }, { "epoch": 5.369428238039673, "grad_norm": 0.038330078125, "learning_rate": 8.844846782300052e-05, "loss": 0.0005, "step": 11504 }, { "epoch": 5.369894982497083, "grad_norm": 0.134765625, "learning_rate": 8.843389269339558e-05, "loss": 0.0011, "step": 11505 }, { "epoch": 5.370361726954492, "grad_norm": 0.322265625, "learning_rate": 8.841931781282175e-05, "loss": 0.0016, "step": 11506 }, { "epoch": 5.370828471411902, "grad_norm": 0.10205078125, "learning_rate": 8.840474318159287e-05, "loss": 0.0044, "step": 11507 }, { "epoch": 5.371295215869312, "grad_norm": 0.1513671875, "learning_rate": 8.839016880002275e-05, "loss": 0.0038, "step": 11508 }, { "epoch": 5.3717619603267215, "grad_norm": 0.3203125, "learning_rate": 8.837559466842515e-05, "loss": 0.0016, "step": 11509 }, { "epoch": 5.37222870478413, "grad_norm": 0.08447265625, "learning_rate": 8.83610207871139e-05, "loss": 0.0006, "step": 11510 }, { "epoch": 5.37269544924154, "grad_norm": 0.14453125, "learning_rate": 8.83464471564028e-05, "loss": 0.0007, "step": 11511 }, { "epoch": 5.37316219369895, "grad_norm": 0.103515625, "learning_rate": 8.83318737766056e-05, "loss": 0.0046, "step": 11512 }, { "epoch": 5.37362893815636, "grad_norm": 0.099609375, "learning_rate": 8.831730064803613e-05, "loss": 0.0007, "step": 11513 }, { "epoch": 5.374095682613769, "grad_norm": 0.09326171875, "learning_rate": 8.830272777100813e-05, "loss": 0.0032, "step": 11514 }, { "epoch": 5.3745624270711785, "grad_norm": 0.0966796875, "learning_rate": 8.828815514583538e-05, "loss": 0.001, "step": 11515 }, { "epoch": 5.375029171528588, "grad_norm": 0.185546875, "learning_rate": 8.827358277283163e-05, "loss": 0.0013, "step": 11516 }, { "epoch": 5.375495915985998, "grad_norm": 0.0791015625, "learning_rate": 8.825901065231067e-05, "loss": 0.0037, "step": 11517 }, { "epoch": 5.375962660443407, "grad_norm": 0.08984375, "learning_rate": 8.824443878458624e-05, "loss": 0.0006, "step": 11518 }, { "epoch": 5.376429404900817, "grad_norm": 0.251953125, "learning_rate": 8.822986716997206e-05, "loss": 0.0027, "step": 11519 }, { "epoch": 5.376896149358227, "grad_norm": 0.2060546875, "learning_rate": 8.821529580878192e-05, "loss": 0.0015, "step": 11520 }, { "epoch": 5.377362893815636, "grad_norm": 0.0751953125, "learning_rate": 8.820072470132955e-05, "loss": 0.0008, "step": 11521 }, { "epoch": 5.377829638273045, "grad_norm": 0.0306396484375, "learning_rate": 8.818615384792863e-05, "loss": 0.0004, "step": 11522 }, { "epoch": 5.378296382730455, "grad_norm": 0.087890625, "learning_rate": 8.817158324889297e-05, "loss": 0.0009, "step": 11523 }, { "epoch": 5.378763127187865, "grad_norm": 0.1796875, "learning_rate": 8.815701290453622e-05, "loss": 0.0034, "step": 11524 }, { "epoch": 5.379229871645274, "grad_norm": 0.1787109375, "learning_rate": 8.814244281517213e-05, "loss": 0.0013, "step": 11525 }, { "epoch": 5.379696616102684, "grad_norm": 0.146484375, "learning_rate": 8.812787298111439e-05, "loss": 0.0009, "step": 11526 }, { "epoch": 5.380163360560093, "grad_norm": 0.228515625, "learning_rate": 8.811330340267672e-05, "loss": 0.0014, "step": 11527 }, { "epoch": 5.380630105017503, "grad_norm": 0.142578125, "learning_rate": 8.809873408017284e-05, "loss": 0.001, "step": 11528 }, { "epoch": 5.381096849474912, "grad_norm": 0.193359375, "learning_rate": 8.808416501391638e-05, "loss": 0.0011, "step": 11529 }, { "epoch": 5.381563593932322, "grad_norm": 0.140625, "learning_rate": 8.806959620422111e-05, "loss": 0.0011, "step": 11530 }, { "epoch": 5.382030338389732, "grad_norm": 0.16796875, "learning_rate": 8.805502765140066e-05, "loss": 0.001, "step": 11531 }, { "epoch": 5.3824970828471415, "grad_norm": 0.0673828125, "learning_rate": 8.804045935576869e-05, "loss": 0.0006, "step": 11532 }, { "epoch": 5.38296382730455, "grad_norm": 0.1806640625, "learning_rate": 8.802589131763892e-05, "loss": 0.0024, "step": 11533 }, { "epoch": 5.38343057176196, "grad_norm": 0.06689453125, "learning_rate": 8.801132353732499e-05, "loss": 0.0006, "step": 11534 }, { "epoch": 5.38389731621937, "grad_norm": 0.06396484375, "learning_rate": 8.799675601514059e-05, "loss": 0.0006, "step": 11535 }, { "epoch": 5.38436406067678, "grad_norm": 0.10107421875, "learning_rate": 8.798218875139932e-05, "loss": 0.0008, "step": 11536 }, { "epoch": 5.384830805134189, "grad_norm": 0.0966796875, "learning_rate": 8.796762174641487e-05, "loss": 0.0007, "step": 11537 }, { "epoch": 5.3852975495915985, "grad_norm": 0.042236328125, "learning_rate": 8.795305500050089e-05, "loss": 0.002, "step": 11538 }, { "epoch": 5.385764294049008, "grad_norm": 0.26171875, "learning_rate": 8.793848851397096e-05, "loss": 0.0022, "step": 11539 }, { "epoch": 5.386231038506418, "grad_norm": 0.244140625, "learning_rate": 8.79239222871388e-05, "loss": 0.0035, "step": 11540 }, { "epoch": 5.386697782963827, "grad_norm": 0.0294189453125, "learning_rate": 8.790935632031797e-05, "loss": 0.0004, "step": 11541 }, { "epoch": 5.387164527421237, "grad_norm": 0.033447265625, "learning_rate": 8.789479061382213e-05, "loss": 0.0004, "step": 11542 }, { "epoch": 5.3876312718786465, "grad_norm": 0.1103515625, "learning_rate": 8.788022516796485e-05, "loss": 0.0008, "step": 11543 }, { "epoch": 5.388098016336056, "grad_norm": 0.07177734375, "learning_rate": 8.78656599830598e-05, "loss": 0.0007, "step": 11544 }, { "epoch": 5.388564760793465, "grad_norm": 0.435546875, "learning_rate": 8.785109505942052e-05, "loss": 0.005, "step": 11545 }, { "epoch": 5.389031505250875, "grad_norm": 0.8203125, "learning_rate": 8.783653039736067e-05, "loss": 0.0103, "step": 11546 }, { "epoch": 5.389498249708285, "grad_norm": 0.11279296875, "learning_rate": 8.78219659971938e-05, "loss": 0.0009, "step": 11547 }, { "epoch": 5.389964994165695, "grad_norm": 0.10400390625, "learning_rate": 8.780740185923353e-05, "loss": 0.0007, "step": 11548 }, { "epoch": 5.3904317386231035, "grad_norm": 0.1484375, "learning_rate": 8.77928379837934e-05, "loss": 0.0029, "step": 11549 }, { "epoch": 5.390898483080513, "grad_norm": 0.0908203125, "learning_rate": 8.777827437118704e-05, "loss": 0.0007, "step": 11550 }, { "epoch": 5.391365227537923, "grad_norm": 0.0302734375, "learning_rate": 8.776371102172799e-05, "loss": 0.0004, "step": 11551 }, { "epoch": 5.391831971995333, "grad_norm": 0.083984375, "learning_rate": 8.774914793572983e-05, "loss": 0.0051, "step": 11552 }, { "epoch": 5.392298716452742, "grad_norm": 0.16015625, "learning_rate": 8.77345851135061e-05, "loss": 0.003, "step": 11553 }, { "epoch": 5.392765460910152, "grad_norm": 0.047607421875, "learning_rate": 8.772002255537037e-05, "loss": 0.0006, "step": 11554 }, { "epoch": 5.393232205367561, "grad_norm": 0.07373046875, "learning_rate": 8.770546026163612e-05, "loss": 0.0043, "step": 11555 }, { "epoch": 5.393698949824971, "grad_norm": 0.34375, "learning_rate": 8.769089823261701e-05, "loss": 0.0036, "step": 11556 }, { "epoch": 5.39416569428238, "grad_norm": 0.6484375, "learning_rate": 8.767633646862651e-05, "loss": 0.0044, "step": 11557 }, { "epoch": 5.39463243873979, "grad_norm": 0.052978515625, "learning_rate": 8.766177496997812e-05, "loss": 0.0007, "step": 11558 }, { "epoch": 5.3950991831972, "grad_norm": 0.22265625, "learning_rate": 8.764721373698545e-05, "loss": 0.0018, "step": 11559 }, { "epoch": 5.3955659276546095, "grad_norm": 0.2412109375, "learning_rate": 8.763265276996196e-05, "loss": 0.004, "step": 11560 }, { "epoch": 5.396032672112018, "grad_norm": 0.115234375, "learning_rate": 8.761809206922116e-05, "loss": 0.0008, "step": 11561 }, { "epoch": 5.396499416569428, "grad_norm": 0.208984375, "learning_rate": 8.76035316350766e-05, "loss": 0.0009, "step": 11562 }, { "epoch": 5.396966161026838, "grad_norm": 0.2734375, "learning_rate": 8.758897146784175e-05, "loss": 0.0025, "step": 11563 }, { "epoch": 5.397432905484248, "grad_norm": 0.287109375, "learning_rate": 8.757441156783012e-05, "loss": 0.0018, "step": 11564 }, { "epoch": 5.397899649941657, "grad_norm": 0.05859375, "learning_rate": 8.755985193535518e-05, "loss": 0.0006, "step": 11565 }, { "epoch": 5.3983663943990665, "grad_norm": 0.099609375, "learning_rate": 8.754529257073044e-05, "loss": 0.001, "step": 11566 }, { "epoch": 5.398833138856476, "grad_norm": 0.06982421875, "learning_rate": 8.753073347426937e-05, "loss": 0.0007, "step": 11567 }, { "epoch": 5.399299883313885, "grad_norm": 0.193359375, "learning_rate": 8.751617464628544e-05, "loss": 0.0015, "step": 11568 }, { "epoch": 5.399766627771295, "grad_norm": 0.2578125, "learning_rate": 8.750161608709212e-05, "loss": 0.0011, "step": 11569 }, { "epoch": 5.400233372228705, "grad_norm": 0.1640625, "learning_rate": 8.748705779700289e-05, "loss": 0.001, "step": 11570 }, { "epoch": 5.400700116686115, "grad_norm": 0.061767578125, "learning_rate": 8.747249977633114e-05, "loss": 0.0005, "step": 11571 }, { "epoch": 5.4011668611435235, "grad_norm": 0.0849609375, "learning_rate": 8.745794202539042e-05, "loss": 0.0006, "step": 11572 }, { "epoch": 5.401633605600933, "grad_norm": 0.1298828125, "learning_rate": 8.744338454449412e-05, "loss": 0.0009, "step": 11573 }, { "epoch": 5.402100350058343, "grad_norm": 0.07080078125, "learning_rate": 8.742882733395566e-05, "loss": 0.001, "step": 11574 }, { "epoch": 5.402567094515753, "grad_norm": 0.37109375, "learning_rate": 8.74142703940885e-05, "loss": 0.0018, "step": 11575 }, { "epoch": 5.403033838973162, "grad_norm": 0.0703125, "learning_rate": 8.739971372520607e-05, "loss": 0.0006, "step": 11576 }, { "epoch": 5.403500583430572, "grad_norm": 0.0673828125, "learning_rate": 8.738515732762179e-05, "loss": 0.0008, "step": 11577 }, { "epoch": 5.403967327887981, "grad_norm": 0.234375, "learning_rate": 8.737060120164904e-05, "loss": 0.0014, "step": 11578 }, { "epoch": 5.404434072345391, "grad_norm": 0.07275390625, "learning_rate": 8.735604534760129e-05, "loss": 0.0009, "step": 11579 }, { "epoch": 5.4049008168028, "grad_norm": 0.053955078125, "learning_rate": 8.73414897657919e-05, "loss": 0.0005, "step": 11580 }, { "epoch": 5.40536756126021, "grad_norm": 0.2138671875, "learning_rate": 8.732693445653427e-05, "loss": 0.0032, "step": 11581 }, { "epoch": 5.40583430571762, "grad_norm": 0.06591796875, "learning_rate": 8.731237942014182e-05, "loss": 0.0024, "step": 11582 }, { "epoch": 5.4063010501750295, "grad_norm": 0.32421875, "learning_rate": 8.729782465692792e-05, "loss": 0.0053, "step": 11583 }, { "epoch": 5.406767794632438, "grad_norm": 0.244140625, "learning_rate": 8.728327016720595e-05, "loss": 0.0082, "step": 11584 }, { "epoch": 5.407234539089848, "grad_norm": 0.330078125, "learning_rate": 8.726871595128925e-05, "loss": 0.0055, "step": 11585 }, { "epoch": 5.407701283547258, "grad_norm": 0.0908203125, "learning_rate": 8.725416200949126e-05, "loss": 0.0008, "step": 11586 }, { "epoch": 5.408168028004668, "grad_norm": 0.263671875, "learning_rate": 8.723960834212529e-05, "loss": 0.0015, "step": 11587 }, { "epoch": 5.408634772462077, "grad_norm": 0.134765625, "learning_rate": 8.72250549495047e-05, "loss": 0.0012, "step": 11588 }, { "epoch": 5.4091015169194865, "grad_norm": 0.12353515625, "learning_rate": 8.721050183194286e-05, "loss": 0.0008, "step": 11589 }, { "epoch": 5.409568261376896, "grad_norm": 0.1357421875, "learning_rate": 8.719594898975311e-05, "loss": 0.0014, "step": 11590 }, { "epoch": 5.410035005834306, "grad_norm": 0.1787109375, "learning_rate": 8.718139642324876e-05, "loss": 0.0008, "step": 11591 }, { "epoch": 5.410501750291715, "grad_norm": 0.251953125, "learning_rate": 8.71668441327432e-05, "loss": 0.0014, "step": 11592 }, { "epoch": 5.410968494749125, "grad_norm": 0.322265625, "learning_rate": 8.715229211854972e-05, "loss": 0.0032, "step": 11593 }, { "epoch": 5.411435239206535, "grad_norm": 0.275390625, "learning_rate": 8.713774038098164e-05, "loss": 0.004, "step": 11594 }, { "epoch": 5.411901983663944, "grad_norm": 0.33984375, "learning_rate": 8.712318892035226e-05, "loss": 0.0021, "step": 11595 }, { "epoch": 5.412368728121353, "grad_norm": 0.90234375, "learning_rate": 8.710863773697493e-05, "loss": 0.0032, "step": 11596 }, { "epoch": 5.412835472578763, "grad_norm": 0.54296875, "learning_rate": 8.709408683116294e-05, "loss": 0.0068, "step": 11597 }, { "epoch": 5.413302217036173, "grad_norm": 0.11767578125, "learning_rate": 8.707953620322955e-05, "loss": 0.0008, "step": 11598 }, { "epoch": 5.413768961493583, "grad_norm": 0.1572265625, "learning_rate": 8.706498585348811e-05, "loss": 0.0013, "step": 11599 }, { "epoch": 5.414235705950992, "grad_norm": 0.435546875, "learning_rate": 8.705043578225189e-05, "loss": 0.0036, "step": 11600 }, { "epoch": 5.414702450408401, "grad_norm": 0.2578125, "learning_rate": 8.703588598983415e-05, "loss": 0.0033, "step": 11601 }, { "epoch": 5.415169194865811, "grad_norm": 0.13671875, "learning_rate": 8.702133647654815e-05, "loss": 0.0011, "step": 11602 }, { "epoch": 5.41563593932322, "grad_norm": 0.11279296875, "learning_rate": 8.700678724270717e-05, "loss": 0.0009, "step": 11603 }, { "epoch": 5.41610268378063, "grad_norm": 0.482421875, "learning_rate": 8.699223828862448e-05, "loss": 0.0056, "step": 11604 }, { "epoch": 5.41656942823804, "grad_norm": 0.224609375, "learning_rate": 8.697768961461334e-05, "loss": 0.0019, "step": 11605 }, { "epoch": 5.4170361726954495, "grad_norm": 0.2294921875, "learning_rate": 8.6963141220987e-05, "loss": 0.0033, "step": 11606 }, { "epoch": 5.417502917152859, "grad_norm": 0.04150390625, "learning_rate": 8.694859310805867e-05, "loss": 0.0004, "step": 11607 }, { "epoch": 5.417969661610268, "grad_norm": 0.1591796875, "learning_rate": 8.693404527614163e-05, "loss": 0.001, "step": 11608 }, { "epoch": 5.418436406067678, "grad_norm": 0.10595703125, "learning_rate": 8.691949772554908e-05, "loss": 0.0009, "step": 11609 }, { "epoch": 5.418903150525088, "grad_norm": 0.09521484375, "learning_rate": 8.690495045659425e-05, "loss": 0.0007, "step": 11610 }, { "epoch": 5.419369894982497, "grad_norm": 0.1767578125, "learning_rate": 8.689040346959038e-05, "loss": 0.0018, "step": 11611 }, { "epoch": 5.4198366394399065, "grad_norm": 0.1904296875, "learning_rate": 8.687585676485069e-05, "loss": 0.0019, "step": 11612 }, { "epoch": 5.420303383897316, "grad_norm": 0.46875, "learning_rate": 8.686131034268834e-05, "loss": 0.0049, "step": 11613 }, { "epoch": 5.420770128354726, "grad_norm": 0.2392578125, "learning_rate": 8.684676420341653e-05, "loss": 0.0012, "step": 11614 }, { "epoch": 5.421236872812135, "grad_norm": 0.25, "learning_rate": 8.683221834734851e-05, "loss": 0.0075, "step": 11615 }, { "epoch": 5.421703617269545, "grad_norm": 0.291015625, "learning_rate": 8.681767277479745e-05, "loss": 0.0034, "step": 11616 }, { "epoch": 5.4221703617269545, "grad_norm": 0.19140625, "learning_rate": 8.68031274860765e-05, "loss": 0.0013, "step": 11617 }, { "epoch": 5.422637106184364, "grad_norm": 0.056640625, "learning_rate": 8.678858248149888e-05, "loss": 0.0008, "step": 11618 }, { "epoch": 5.423103850641773, "grad_norm": 0.0615234375, "learning_rate": 8.677403776137772e-05, "loss": 0.0006, "step": 11619 }, { "epoch": 5.423570595099183, "grad_norm": 0.16015625, "learning_rate": 8.675949332602619e-05, "loss": 0.0014, "step": 11620 }, { "epoch": 5.424037339556593, "grad_norm": 0.357421875, "learning_rate": 8.674494917575748e-05, "loss": 0.0103, "step": 11621 }, { "epoch": 5.424504084014003, "grad_norm": 0.1474609375, "learning_rate": 8.673040531088473e-05, "loss": 0.0012, "step": 11622 }, { "epoch": 5.4249708284714115, "grad_norm": 0.140625, "learning_rate": 8.671586173172105e-05, "loss": 0.001, "step": 11623 }, { "epoch": 5.425437572928821, "grad_norm": 0.06005859375, "learning_rate": 8.67013184385796e-05, "loss": 0.0007, "step": 11624 }, { "epoch": 5.425904317386231, "grad_norm": 0.1796875, "learning_rate": 8.668677543177355e-05, "loss": 0.0013, "step": 11625 }, { "epoch": 5.426371061843641, "grad_norm": 0.306640625, "learning_rate": 8.667223271161598e-05, "loss": 0.0022, "step": 11626 }, { "epoch": 5.42683780630105, "grad_norm": 0.28125, "learning_rate": 8.665769027842003e-05, "loss": 0.0021, "step": 11627 }, { "epoch": 5.42730455075846, "grad_norm": 0.466796875, "learning_rate": 8.664314813249881e-05, "loss": 0.0053, "step": 11628 }, { "epoch": 5.427771295215869, "grad_norm": 0.1591796875, "learning_rate": 8.662860627416543e-05, "loss": 0.0007, "step": 11629 }, { "epoch": 5.428238039673279, "grad_norm": 0.400390625, "learning_rate": 8.6614064703733e-05, "loss": 0.0024, "step": 11630 }, { "epoch": 5.428704784130688, "grad_norm": 0.037109375, "learning_rate": 8.659952342151459e-05, "loss": 0.0005, "step": 11631 }, { "epoch": 5.429171528588098, "grad_norm": 0.1123046875, "learning_rate": 8.658498242782333e-05, "loss": 0.0011, "step": 11632 }, { "epoch": 5.429638273045508, "grad_norm": 0.2578125, "learning_rate": 8.657044172297228e-05, "loss": 0.0009, "step": 11633 }, { "epoch": 5.4301050175029175, "grad_norm": 0.0859375, "learning_rate": 8.65559013072745e-05, "loss": 0.0009, "step": 11634 }, { "epoch": 5.430571761960326, "grad_norm": 0.455078125, "learning_rate": 8.65413611810431e-05, "loss": 0.0082, "step": 11635 }, { "epoch": 5.431038506417736, "grad_norm": 0.279296875, "learning_rate": 8.652682134459114e-05, "loss": 0.0016, "step": 11636 }, { "epoch": 5.431505250875146, "grad_norm": 0.2109375, "learning_rate": 8.651228179823163e-05, "loss": 0.0048, "step": 11637 }, { "epoch": 5.431971995332556, "grad_norm": 0.2353515625, "learning_rate": 8.649774254227767e-05, "loss": 0.0032, "step": 11638 }, { "epoch": 5.432438739789965, "grad_norm": 0.3359375, "learning_rate": 8.648320357704231e-05, "loss": 0.0022, "step": 11639 }, { "epoch": 5.4329054842473745, "grad_norm": 0.458984375, "learning_rate": 8.646866490283854e-05, "loss": 0.0022, "step": 11640 }, { "epoch": 5.433372228704784, "grad_norm": 0.08154296875, "learning_rate": 8.645412651997947e-05, "loss": 0.0009, "step": 11641 }, { "epoch": 5.433838973162194, "grad_norm": 0.1357421875, "learning_rate": 8.643958842877808e-05, "loss": 0.001, "step": 11642 }, { "epoch": 5.434305717619603, "grad_norm": 0.220703125, "learning_rate": 8.642505062954738e-05, "loss": 0.0016, "step": 11643 }, { "epoch": 5.434772462077013, "grad_norm": 0.1669921875, "learning_rate": 8.64105131226004e-05, "loss": 0.0041, "step": 11644 }, { "epoch": 5.435239206534423, "grad_norm": 0.279296875, "learning_rate": 8.639597590825014e-05, "loss": 0.0035, "step": 11645 }, { "epoch": 5.4357059509918315, "grad_norm": 0.2255859375, "learning_rate": 8.638143898680964e-05, "loss": 0.0018, "step": 11646 }, { "epoch": 5.436172695449241, "grad_norm": 0.0634765625, "learning_rate": 8.636690235859189e-05, "loss": 0.0005, "step": 11647 }, { "epoch": 5.436639439906651, "grad_norm": 0.142578125, "learning_rate": 8.635236602390983e-05, "loss": 0.0011, "step": 11648 }, { "epoch": 5.437106184364061, "grad_norm": 0.333984375, "learning_rate": 8.633782998307646e-05, "loss": 0.0018, "step": 11649 }, { "epoch": 5.437572928821471, "grad_norm": 0.2177734375, "learning_rate": 8.632329423640478e-05, "loss": 0.0016, "step": 11650 }, { "epoch": 5.43803967327888, "grad_norm": 0.33203125, "learning_rate": 8.630875878420775e-05, "loss": 0.0027, "step": 11651 }, { "epoch": 5.438506417736289, "grad_norm": 0.27734375, "learning_rate": 8.629422362679831e-05, "loss": 0.011, "step": 11652 }, { "epoch": 5.438973162193699, "grad_norm": 0.25390625, "learning_rate": 8.627968876448947e-05, "loss": 0.0036, "step": 11653 }, { "epoch": 5.439439906651108, "grad_norm": 0.1474609375, "learning_rate": 8.626515419759416e-05, "loss": 0.0009, "step": 11654 }, { "epoch": 5.439906651108518, "grad_norm": 0.4921875, "learning_rate": 8.625061992642531e-05, "loss": 0.0028, "step": 11655 }, { "epoch": 5.440373395565928, "grad_norm": 0.34375, "learning_rate": 8.623608595129584e-05, "loss": 0.0027, "step": 11656 }, { "epoch": 5.4408401400233375, "grad_norm": 0.16015625, "learning_rate": 8.622155227251875e-05, "loss": 0.0026, "step": 11657 }, { "epoch": 5.441306884480746, "grad_norm": 0.1689453125, "learning_rate": 8.620701889040691e-05, "loss": 0.0012, "step": 11658 }, { "epoch": 5.441773628938156, "grad_norm": 0.271484375, "learning_rate": 8.619248580527324e-05, "loss": 0.0026, "step": 11659 }, { "epoch": 5.442240373395566, "grad_norm": 0.26953125, "learning_rate": 8.61779530174307e-05, "loss": 0.0032, "step": 11660 }, { "epoch": 5.442707117852976, "grad_norm": 0.11083984375, "learning_rate": 8.616342052719217e-05, "loss": 0.0011, "step": 11661 }, { "epoch": 5.443173862310385, "grad_norm": 0.09228515625, "learning_rate": 8.614888833487051e-05, "loss": 0.0007, "step": 11662 }, { "epoch": 5.4436406067677945, "grad_norm": 0.396484375, "learning_rate": 8.613435644077868e-05, "loss": 0.0038, "step": 11663 }, { "epoch": 5.444107351225204, "grad_norm": 0.10302734375, "learning_rate": 8.611982484522953e-05, "loss": 0.0028, "step": 11664 }, { "epoch": 5.444574095682614, "grad_norm": 0.16015625, "learning_rate": 8.610529354853599e-05, "loss": 0.0015, "step": 11665 }, { "epoch": 5.445040840140023, "grad_norm": 0.283203125, "learning_rate": 8.609076255101083e-05, "loss": 0.0015, "step": 11666 }, { "epoch": 5.445507584597433, "grad_norm": 0.216796875, "learning_rate": 8.607623185296702e-05, "loss": 0.0027, "step": 11667 }, { "epoch": 5.445974329054843, "grad_norm": 0.12060546875, "learning_rate": 8.606170145471739e-05, "loss": 0.001, "step": 11668 }, { "epoch": 5.446441073512252, "grad_norm": 0.169921875, "learning_rate": 8.604717135657479e-05, "loss": 0.0007, "step": 11669 }, { "epoch": 5.446907817969661, "grad_norm": 0.302734375, "learning_rate": 8.603264155885207e-05, "loss": 0.0017, "step": 11670 }, { "epoch": 5.447374562427071, "grad_norm": 0.361328125, "learning_rate": 8.601811206186207e-05, "loss": 0.0032, "step": 11671 }, { "epoch": 5.447841306884481, "grad_norm": 0.30859375, "learning_rate": 8.600358286591764e-05, "loss": 0.0031, "step": 11672 }, { "epoch": 5.448308051341891, "grad_norm": 0.048828125, "learning_rate": 8.598905397133159e-05, "loss": 0.0019, "step": 11673 }, { "epoch": 5.4487747957993, "grad_norm": 0.2890625, "learning_rate": 8.597452537841676e-05, "loss": 0.0024, "step": 11674 }, { "epoch": 5.449241540256709, "grad_norm": 0.1923828125, "learning_rate": 8.595999708748596e-05, "loss": 0.0014, "step": 11675 }, { "epoch": 5.449708284714119, "grad_norm": 0.154296875, "learning_rate": 8.594546909885198e-05, "loss": 0.0007, "step": 11676 }, { "epoch": 5.450175029171529, "grad_norm": 0.41796875, "learning_rate": 8.593094141282767e-05, "loss": 0.0059, "step": 11677 }, { "epoch": 5.450641773628938, "grad_norm": 0.044677734375, "learning_rate": 8.59164140297258e-05, "loss": 0.0006, "step": 11678 }, { "epoch": 5.451108518086348, "grad_norm": 0.2177734375, "learning_rate": 8.590188694985914e-05, "loss": 0.0016, "step": 11679 }, { "epoch": 5.4515752625437575, "grad_norm": 0.1943359375, "learning_rate": 8.588736017354051e-05, "loss": 0.0013, "step": 11680 }, { "epoch": 5.452042007001167, "grad_norm": 0.26171875, "learning_rate": 8.587283370108269e-05, "loss": 0.0011, "step": 11681 }, { "epoch": 5.452508751458576, "grad_norm": 0.07861328125, "learning_rate": 8.585830753279844e-05, "loss": 0.0009, "step": 11682 }, { "epoch": 5.452975495915986, "grad_norm": 0.07958984375, "learning_rate": 8.584378166900047e-05, "loss": 0.0006, "step": 11683 }, { "epoch": 5.453442240373396, "grad_norm": 0.3046875, "learning_rate": 8.582925611000164e-05, "loss": 0.0032, "step": 11684 }, { "epoch": 5.4539089848308056, "grad_norm": 0.0595703125, "learning_rate": 8.581473085611462e-05, "loss": 0.0005, "step": 11685 }, { "epoch": 5.4543757292882145, "grad_norm": 0.150390625, "learning_rate": 8.580020590765216e-05, "loss": 0.0008, "step": 11686 }, { "epoch": 5.454842473745624, "grad_norm": 0.1962890625, "learning_rate": 8.578568126492706e-05, "loss": 0.0053, "step": 11687 }, { "epoch": 5.455309218203034, "grad_norm": 0.28125, "learning_rate": 8.577115692825201e-05, "loss": 0.002, "step": 11688 }, { "epoch": 5.455775962660443, "grad_norm": 0.169921875, "learning_rate": 8.575663289793973e-05, "loss": 0.0012, "step": 11689 }, { "epoch": 5.456242707117853, "grad_norm": 0.1953125, "learning_rate": 8.574210917430296e-05, "loss": 0.0037, "step": 11690 }, { "epoch": 5.4567094515752625, "grad_norm": 0.0703125, "learning_rate": 8.572758575765438e-05, "loss": 0.0006, "step": 11691 }, { "epoch": 5.457176196032672, "grad_norm": 0.1962890625, "learning_rate": 8.571306264830673e-05, "loss": 0.0014, "step": 11692 }, { "epoch": 5.457642940490082, "grad_norm": 0.1943359375, "learning_rate": 8.569853984657268e-05, "loss": 0.0014, "step": 11693 }, { "epoch": 5.458109684947491, "grad_norm": 0.1435546875, "learning_rate": 8.568401735276495e-05, "loss": 0.0013, "step": 11694 }, { "epoch": 5.458576429404901, "grad_norm": 0.0830078125, "learning_rate": 8.566949516719622e-05, "loss": 0.0009, "step": 11695 }, { "epoch": 5.459043173862311, "grad_norm": 0.08740234375, "learning_rate": 8.565497329017914e-05, "loss": 0.0009, "step": 11696 }, { "epoch": 5.4595099183197195, "grad_norm": 0.3359375, "learning_rate": 8.564045172202642e-05, "loss": 0.0018, "step": 11697 }, { "epoch": 5.459976662777129, "grad_norm": 0.3046875, "learning_rate": 8.562593046305066e-05, "loss": 0.003, "step": 11698 }, { "epoch": 5.460443407234539, "grad_norm": 0.2119140625, "learning_rate": 8.56114095135646e-05, "loss": 0.0067, "step": 11699 }, { "epoch": 5.460910151691949, "grad_norm": 0.10400390625, "learning_rate": 8.559688887388086e-05, "loss": 0.0007, "step": 11700 }, { "epoch": 5.461376896149358, "grad_norm": 0.28515625, "learning_rate": 8.558236854431205e-05, "loss": 0.0025, "step": 11701 }, { "epoch": 5.461843640606768, "grad_norm": 0.03125, "learning_rate": 8.556784852517088e-05, "loss": 0.0004, "step": 11702 }, { "epoch": 5.462310385064177, "grad_norm": 0.2158203125, "learning_rate": 8.555332881676993e-05, "loss": 0.0038, "step": 11703 }, { "epoch": 5.462777129521587, "grad_norm": 0.248046875, "learning_rate": 8.553880941942185e-05, "loss": 0.0015, "step": 11704 }, { "epoch": 5.463243873978996, "grad_norm": 0.1767578125, "learning_rate": 8.552429033343922e-05, "loss": 0.0008, "step": 11705 }, { "epoch": 5.463710618436406, "grad_norm": 0.1552734375, "learning_rate": 8.55097715591347e-05, "loss": 0.0011, "step": 11706 }, { "epoch": 5.464177362893816, "grad_norm": 0.2041015625, "learning_rate": 8.549525309682088e-05, "loss": 0.0087, "step": 11707 }, { "epoch": 5.4646441073512255, "grad_norm": 0.2578125, "learning_rate": 8.548073494681032e-05, "loss": 0.0018, "step": 11708 }, { "epoch": 5.465110851808634, "grad_norm": 0.236328125, "learning_rate": 8.546621710941569e-05, "loss": 0.0017, "step": 11709 }, { "epoch": 5.465577596266044, "grad_norm": 0.08740234375, "learning_rate": 8.545169958494952e-05, "loss": 0.0008, "step": 11710 }, { "epoch": 5.466044340723454, "grad_norm": 0.2392578125, "learning_rate": 8.543718237372437e-05, "loss": 0.003, "step": 11711 }, { "epoch": 5.466511085180864, "grad_norm": 0.474609375, "learning_rate": 8.542266547605288e-05, "loss": 0.0046, "step": 11712 }, { "epoch": 5.466977829638273, "grad_norm": 0.17578125, "learning_rate": 8.540814889224756e-05, "loss": 0.001, "step": 11713 }, { "epoch": 5.4674445740956825, "grad_norm": 0.050537109375, "learning_rate": 8.539363262262098e-05, "loss": 0.0023, "step": 11714 }, { "epoch": 5.467911318553092, "grad_norm": 0.10791015625, "learning_rate": 8.53791166674857e-05, "loss": 0.0008, "step": 11715 }, { "epoch": 5.468378063010502, "grad_norm": 0.1455078125, "learning_rate": 8.536460102715427e-05, "loss": 0.0026, "step": 11716 }, { "epoch": 5.468844807467911, "grad_norm": 0.212890625, "learning_rate": 8.535008570193921e-05, "loss": 0.0048, "step": 11717 }, { "epoch": 5.469311551925321, "grad_norm": 0.31640625, "learning_rate": 8.533557069215305e-05, "loss": 0.005, "step": 11718 }, { "epoch": 5.469778296382731, "grad_norm": 0.10693359375, "learning_rate": 8.532105599810834e-05, "loss": 0.0007, "step": 11719 }, { "epoch": 5.47024504084014, "grad_norm": 0.103515625, "learning_rate": 8.530654162011757e-05, "loss": 0.0009, "step": 11720 }, { "epoch": 5.470711785297549, "grad_norm": 0.1943359375, "learning_rate": 8.529202755849327e-05, "loss": 0.0015, "step": 11721 }, { "epoch": 5.471178529754959, "grad_norm": 0.0257568359375, "learning_rate": 8.527751381354792e-05, "loss": 0.0005, "step": 11722 }, { "epoch": 5.471645274212369, "grad_norm": 0.12255859375, "learning_rate": 8.526300038559405e-05, "loss": 0.0009, "step": 11723 }, { "epoch": 5.472112018669779, "grad_norm": 0.259765625, "learning_rate": 8.524848727494412e-05, "loss": 0.0015, "step": 11724 }, { "epoch": 5.472578763127188, "grad_norm": 0.1484375, "learning_rate": 8.523397448191061e-05, "loss": 0.0008, "step": 11725 }, { "epoch": 5.473045507584597, "grad_norm": 0.388671875, "learning_rate": 8.521946200680605e-05, "loss": 0.0077, "step": 11726 }, { "epoch": 5.473512252042007, "grad_norm": 0.037353515625, "learning_rate": 8.520494984994286e-05, "loss": 0.0005, "step": 11727 }, { "epoch": 5.473978996499417, "grad_norm": 0.02392578125, "learning_rate": 8.51904380116335e-05, "loss": 0.0005, "step": 11728 }, { "epoch": 5.474445740956826, "grad_norm": 0.0908203125, "learning_rate": 8.517592649219046e-05, "loss": 0.0006, "step": 11729 }, { "epoch": 5.474912485414236, "grad_norm": 0.12158203125, "learning_rate": 8.516141529192617e-05, "loss": 0.0036, "step": 11730 }, { "epoch": 5.4753792298716455, "grad_norm": 0.046630859375, "learning_rate": 8.514690441115307e-05, "loss": 0.0005, "step": 11731 }, { "epoch": 5.475845974329054, "grad_norm": 0.1044921875, "learning_rate": 8.513239385018357e-05, "loss": 0.0005, "step": 11732 }, { "epoch": 5.476312718786464, "grad_norm": 0.193359375, "learning_rate": 8.511788360933016e-05, "loss": 0.0058, "step": 11733 }, { "epoch": 5.476779463243874, "grad_norm": 0.11376953125, "learning_rate": 8.510337368890522e-05, "loss": 0.0009, "step": 11734 }, { "epoch": 5.477246207701284, "grad_norm": 0.043212890625, "learning_rate": 8.508886408922115e-05, "loss": 0.0005, "step": 11735 }, { "epoch": 5.477712952158694, "grad_norm": 0.0869140625, "learning_rate": 8.50743548105904e-05, "loss": 0.0008, "step": 11736 }, { "epoch": 5.4781796966161025, "grad_norm": 0.193359375, "learning_rate": 8.505984585332537e-05, "loss": 0.0018, "step": 11737 }, { "epoch": 5.478646441073512, "grad_norm": 0.07666015625, "learning_rate": 8.50453372177384e-05, "loss": 0.0006, "step": 11738 }, { "epoch": 5.479113185530922, "grad_norm": 0.056640625, "learning_rate": 8.503082890414193e-05, "loss": 0.0006, "step": 11739 }, { "epoch": 5.479579929988331, "grad_norm": 0.248046875, "learning_rate": 8.501632091284832e-05, "loss": 0.0012, "step": 11740 }, { "epoch": 5.480046674445741, "grad_norm": 0.06689453125, "learning_rate": 8.500181324416996e-05, "loss": 0.0008, "step": 11741 }, { "epoch": 5.480513418903151, "grad_norm": 0.038330078125, "learning_rate": 8.498730589841916e-05, "loss": 0.0004, "step": 11742 }, { "epoch": 5.48098016336056, "grad_norm": 0.21484375, "learning_rate": 8.49727988759084e-05, "loss": 0.0007, "step": 11743 }, { "epoch": 5.481446907817969, "grad_norm": 0.1298828125, "learning_rate": 8.495829217694989e-05, "loss": 0.0042, "step": 11744 }, { "epoch": 5.481913652275379, "grad_norm": 0.2353515625, "learning_rate": 8.494378580185607e-05, "loss": 0.0041, "step": 11745 }, { "epoch": 5.482380396732789, "grad_norm": 0.1611328125, "learning_rate": 8.492927975093922e-05, "loss": 0.001, "step": 11746 }, { "epoch": 5.482847141190199, "grad_norm": 0.58984375, "learning_rate": 8.491477402451169e-05, "loss": 0.0023, "step": 11747 }, { "epoch": 5.483313885647608, "grad_norm": 0.16796875, "learning_rate": 8.490026862288583e-05, "loss": 0.0018, "step": 11748 }, { "epoch": 5.483780630105017, "grad_norm": 0.1337890625, "learning_rate": 8.488576354637395e-05, "loss": 0.0008, "step": 11749 }, { "epoch": 5.484247374562427, "grad_norm": 0.140625, "learning_rate": 8.487125879528832e-05, "loss": 0.0036, "step": 11750 }, { "epoch": 5.484714119019837, "grad_norm": 0.1396484375, "learning_rate": 8.48567543699413e-05, "loss": 0.0008, "step": 11751 }, { "epoch": 5.485180863477246, "grad_norm": 0.0289306640625, "learning_rate": 8.484225027064516e-05, "loss": 0.0004, "step": 11752 }, { "epoch": 5.485647607934656, "grad_norm": 0.51171875, "learning_rate": 8.482774649771219e-05, "loss": 0.002, "step": 11753 }, { "epoch": 5.4861143523920655, "grad_norm": 0.09814453125, "learning_rate": 8.481324305145464e-05, "loss": 0.0034, "step": 11754 }, { "epoch": 5.486581096849475, "grad_norm": 0.08447265625, "learning_rate": 8.479873993218486e-05, "loss": 0.0008, "step": 11755 }, { "epoch": 5.487047841306884, "grad_norm": 0.166015625, "learning_rate": 8.478423714021506e-05, "loss": 0.001, "step": 11756 }, { "epoch": 5.487514585764294, "grad_norm": 0.059814453125, "learning_rate": 8.476973467585749e-05, "loss": 0.0007, "step": 11757 }, { "epoch": 5.487981330221704, "grad_norm": 0.376953125, "learning_rate": 8.475523253942445e-05, "loss": 0.0034, "step": 11758 }, { "epoch": 5.4884480746791136, "grad_norm": 0.10498046875, "learning_rate": 8.474073073122819e-05, "loss": 0.0011, "step": 11759 }, { "epoch": 5.4889148191365225, "grad_norm": 0.05029296875, "learning_rate": 8.47262292515809e-05, "loss": 0.0004, "step": 11760 }, { "epoch": 5.489381563593932, "grad_norm": 0.046875, "learning_rate": 8.471172810079485e-05, "loss": 0.0005, "step": 11761 }, { "epoch": 5.489848308051342, "grad_norm": 0.1416015625, "learning_rate": 8.469722727918226e-05, "loss": 0.0017, "step": 11762 }, { "epoch": 5.490315052508752, "grad_norm": 0.46484375, "learning_rate": 8.468272678705534e-05, "loss": 0.002, "step": 11763 }, { "epoch": 5.490781796966161, "grad_norm": 0.12353515625, "learning_rate": 8.46682266247263e-05, "loss": 0.0016, "step": 11764 }, { "epoch": 5.4912485414235706, "grad_norm": 0.10498046875, "learning_rate": 8.465372679250737e-05, "loss": 0.0008, "step": 11765 }, { "epoch": 5.49171528588098, "grad_norm": 0.1220703125, "learning_rate": 8.463922729071072e-05, "loss": 0.0049, "step": 11766 }, { "epoch": 5.49218203033839, "grad_norm": 0.287109375, "learning_rate": 8.462472811964854e-05, "loss": 0.0034, "step": 11767 }, { "epoch": 5.492648774795799, "grad_norm": 0.201171875, "learning_rate": 8.461022927963303e-05, "loss": 0.002, "step": 11768 }, { "epoch": 5.493115519253209, "grad_norm": 0.2890625, "learning_rate": 8.459573077097636e-05, "loss": 0.0018, "step": 11769 }, { "epoch": 5.493582263710619, "grad_norm": 0.053955078125, "learning_rate": 8.45812325939907e-05, "loss": 0.0006, "step": 11770 }, { "epoch": 5.494049008168028, "grad_norm": 0.26953125, "learning_rate": 8.45667347489882e-05, "loss": 0.001, "step": 11771 }, { "epoch": 5.494515752625437, "grad_norm": 0.10205078125, "learning_rate": 8.455223723628105e-05, "loss": 0.0009, "step": 11772 }, { "epoch": 5.494982497082847, "grad_norm": 0.283203125, "learning_rate": 8.453774005618136e-05, "loss": 0.0064, "step": 11773 }, { "epoch": 5.495449241540257, "grad_norm": 0.1650390625, "learning_rate": 8.452324320900125e-05, "loss": 0.0062, "step": 11774 }, { "epoch": 5.495915985997666, "grad_norm": 0.04541015625, "learning_rate": 8.450874669505291e-05, "loss": 0.0004, "step": 11775 }, { "epoch": 5.496382730455076, "grad_norm": 0.11669921875, "learning_rate": 8.449425051464846e-05, "loss": 0.0007, "step": 11776 }, { "epoch": 5.496849474912485, "grad_norm": 0.1748046875, "learning_rate": 8.447975466809997e-05, "loss": 0.0017, "step": 11777 }, { "epoch": 5.497316219369895, "grad_norm": 0.0478515625, "learning_rate": 8.44652591557196e-05, "loss": 0.0027, "step": 11778 }, { "epoch": 5.497782963827304, "grad_norm": 0.189453125, "learning_rate": 8.445076397781944e-05, "loss": 0.0025, "step": 11779 }, { "epoch": 5.498249708284714, "grad_norm": 0.28125, "learning_rate": 8.443626913471159e-05, "loss": 0.0025, "step": 11780 }, { "epoch": 5.498716452742124, "grad_norm": 0.20703125, "learning_rate": 8.44217746267081e-05, "loss": 0.0104, "step": 11781 }, { "epoch": 5.4991831971995335, "grad_norm": 0.3359375, "learning_rate": 8.44072804541211e-05, "loss": 0.0013, "step": 11782 }, { "epoch": 5.499649941656942, "grad_norm": 0.09228515625, "learning_rate": 8.439278661726267e-05, "loss": 0.0024, "step": 11783 }, { "epoch": 5.500116686114352, "grad_norm": 0.1767578125, "learning_rate": 8.437829311644484e-05, "loss": 0.0023, "step": 11784 }, { "epoch": 5.500583430571762, "grad_norm": 0.1474609375, "learning_rate": 8.43637999519797e-05, "loss": 0.001, "step": 11785 }, { "epoch": 5.501050175029172, "grad_norm": 0.0517578125, "learning_rate": 8.434930712417931e-05, "loss": 0.0006, "step": 11786 }, { "epoch": 5.501516919486581, "grad_norm": 0.041748046875, "learning_rate": 8.433481463335567e-05, "loss": 0.0006, "step": 11787 }, { "epoch": 5.5019836639439905, "grad_norm": 0.064453125, "learning_rate": 8.432032247982087e-05, "loss": 0.0006, "step": 11788 }, { "epoch": 5.5024504084014, "grad_norm": 0.21875, "learning_rate": 8.430583066388695e-05, "loss": 0.0024, "step": 11789 }, { "epoch": 5.50291715285881, "grad_norm": 1.1015625, "learning_rate": 8.429133918586589e-05, "loss": 0.0047, "step": 11790 }, { "epoch": 5.503383897316219, "grad_norm": 0.333984375, "learning_rate": 8.427684804606975e-05, "loss": 0.0025, "step": 11791 }, { "epoch": 5.503850641773629, "grad_norm": 0.1357421875, "learning_rate": 8.426235724481049e-05, "loss": 0.0053, "step": 11792 }, { "epoch": 5.503850641773629, "eval_loss": 2.22942852973938, "eval_runtime": 57.9891, "eval_samples_per_second": 31.109, "eval_steps_per_second": 3.897, "step": 11792 }, { "epoch": 5.504317386231039, "grad_norm": 0.058837890625, "learning_rate": 8.424786678240011e-05, "loss": 0.0006, "step": 11793 }, { "epoch": 5.504784130688448, "grad_norm": 0.328125, "learning_rate": 8.423337665915068e-05, "loss": 0.0018, "step": 11794 }, { "epoch": 5.505250875145857, "grad_norm": 0.3671875, "learning_rate": 8.421888687537412e-05, "loss": 0.0015, "step": 11795 }, { "epoch": 5.505717619603267, "grad_norm": 0.03369140625, "learning_rate": 8.420439743138242e-05, "loss": 0.0005, "step": 11796 }, { "epoch": 5.506184364060677, "grad_norm": 0.072265625, "learning_rate": 8.41899083274876e-05, "loss": 0.0008, "step": 11797 }, { "epoch": 5.506651108518087, "grad_norm": 0.1484375, "learning_rate": 8.417541956400158e-05, "loss": 0.0013, "step": 11798 }, { "epoch": 5.507117852975496, "grad_norm": 0.07861328125, "learning_rate": 8.416093114123633e-05, "loss": 0.0006, "step": 11799 }, { "epoch": 5.507584597432905, "grad_norm": 0.169921875, "learning_rate": 8.414644305950383e-05, "loss": 0.0078, "step": 11800 }, { "epoch": 5.508051341890315, "grad_norm": 0.16015625, "learning_rate": 8.4131955319116e-05, "loss": 0.0009, "step": 11801 }, { "epoch": 5.508518086347725, "grad_norm": 0.2470703125, "learning_rate": 8.411746792038477e-05, "loss": 0.0015, "step": 11802 }, { "epoch": 5.508984830805134, "grad_norm": 0.087890625, "learning_rate": 8.410298086362204e-05, "loss": 0.0007, "step": 11803 }, { "epoch": 5.509451575262544, "grad_norm": 0.2265625, "learning_rate": 8.408849414913982e-05, "loss": 0.0011, "step": 11804 }, { "epoch": 5.5099183197199535, "grad_norm": 0.2041015625, "learning_rate": 8.407400777724996e-05, "loss": 0.0013, "step": 11805 }, { "epoch": 5.510385064177363, "grad_norm": 0.11376953125, "learning_rate": 8.405952174826437e-05, "loss": 0.0008, "step": 11806 }, { "epoch": 5.510851808634772, "grad_norm": 0.31640625, "learning_rate": 8.404503606249498e-05, "loss": 0.0019, "step": 11807 }, { "epoch": 5.511318553092182, "grad_norm": 0.115234375, "learning_rate": 8.403055072025365e-05, "loss": 0.0007, "step": 11808 }, { "epoch": 5.511785297549592, "grad_norm": 0.302734375, "learning_rate": 8.401606572185228e-05, "loss": 0.0016, "step": 11809 }, { "epoch": 5.512252042007001, "grad_norm": 0.01953125, "learning_rate": 8.400158106760276e-05, "loss": 0.0003, "step": 11810 }, { "epoch": 5.5127187864644105, "grad_norm": 0.1455078125, "learning_rate": 8.398709675781696e-05, "loss": 0.0013, "step": 11811 }, { "epoch": 5.51318553092182, "grad_norm": 0.1318359375, "learning_rate": 8.397261279280673e-05, "loss": 0.0061, "step": 11812 }, { "epoch": 5.51365227537923, "grad_norm": 0.103515625, "learning_rate": 8.395812917288391e-05, "loss": 0.0018, "step": 11813 }, { "epoch": 5.51411901983664, "grad_norm": 0.12890625, "learning_rate": 8.39436458983604e-05, "loss": 0.001, "step": 11814 }, { "epoch": 5.514585764294049, "grad_norm": 0.0810546875, "learning_rate": 8.392916296954797e-05, "loss": 0.0008, "step": 11815 }, { "epoch": 5.515052508751459, "grad_norm": 0.1611328125, "learning_rate": 8.39146803867585e-05, "loss": 0.0011, "step": 11816 }, { "epoch": 5.515519253208868, "grad_norm": 0.1220703125, "learning_rate": 8.390019815030383e-05, "loss": 0.0008, "step": 11817 }, { "epoch": 5.515985997666277, "grad_norm": 0.1494140625, "learning_rate": 8.388571626049575e-05, "loss": 0.0031, "step": 11818 }, { "epoch": 5.516452742123687, "grad_norm": 0.322265625, "learning_rate": 8.387123471764606e-05, "loss": 0.0041, "step": 11819 }, { "epoch": 5.516919486581097, "grad_norm": 0.2119140625, "learning_rate": 8.385675352206661e-05, "loss": 0.0011, "step": 11820 }, { "epoch": 5.517386231038507, "grad_norm": 0.384765625, "learning_rate": 8.384227267406916e-05, "loss": 0.0067, "step": 11821 }, { "epoch": 5.5178529754959165, "grad_norm": 0.259765625, "learning_rate": 8.38277921739655e-05, "loss": 0.0011, "step": 11822 }, { "epoch": 5.518319719953325, "grad_norm": 0.1123046875, "learning_rate": 8.381331202206741e-05, "loss": 0.0008, "step": 11823 }, { "epoch": 5.518786464410735, "grad_norm": 0.28125, "learning_rate": 8.379883221868668e-05, "loss": 0.002, "step": 11824 }, { "epoch": 5.519253208868145, "grad_norm": 0.13671875, "learning_rate": 8.378435276413508e-05, "loss": 0.0007, "step": 11825 }, { "epoch": 5.519719953325554, "grad_norm": 0.035400390625, "learning_rate": 8.376987365872432e-05, "loss": 0.0003, "step": 11826 }, { "epoch": 5.520186697782964, "grad_norm": 0.043701171875, "learning_rate": 8.375539490276622e-05, "loss": 0.0004, "step": 11827 }, { "epoch": 5.5206534422403735, "grad_norm": 0.71484375, "learning_rate": 8.37409164965725e-05, "loss": 0.0022, "step": 11828 }, { "epoch": 5.521120186697783, "grad_norm": 0.3515625, "learning_rate": 8.372643844045484e-05, "loss": 0.0063, "step": 11829 }, { "epoch": 5.521586931155192, "grad_norm": 0.1708984375, "learning_rate": 8.371196073472506e-05, "loss": 0.0011, "step": 11830 }, { "epoch": 5.522053675612602, "grad_norm": 0.69921875, "learning_rate": 8.369748337969483e-05, "loss": 0.0081, "step": 11831 }, { "epoch": 5.522520420070012, "grad_norm": 0.12890625, "learning_rate": 8.368300637567587e-05, "loss": 0.0008, "step": 11832 }, { "epoch": 5.5229871645274216, "grad_norm": 0.08642578125, "learning_rate": 8.366852972297988e-05, "loss": 0.0046, "step": 11833 }, { "epoch": 5.5234539089848305, "grad_norm": 0.31640625, "learning_rate": 8.365405342191856e-05, "loss": 0.0022, "step": 11834 }, { "epoch": 5.52392065344224, "grad_norm": 0.1904296875, "learning_rate": 8.363957747280361e-05, "loss": 0.0012, "step": 11835 }, { "epoch": 5.52438739789965, "grad_norm": 0.06689453125, "learning_rate": 8.362510187594671e-05, "loss": 0.0006, "step": 11836 }, { "epoch": 5.52485414235706, "grad_norm": 0.0546875, "learning_rate": 8.361062663165953e-05, "loss": 0.0021, "step": 11837 }, { "epoch": 5.525320886814469, "grad_norm": 0.4921875, "learning_rate": 8.359615174025378e-05, "loss": 0.005, "step": 11838 }, { "epoch": 5.5257876312718786, "grad_norm": 0.2294921875, "learning_rate": 8.358167720204106e-05, "loss": 0.0015, "step": 11839 }, { "epoch": 5.526254375729288, "grad_norm": 0.12451171875, "learning_rate": 8.356720301733306e-05, "loss": 0.001, "step": 11840 }, { "epoch": 5.526721120186698, "grad_norm": 0.31640625, "learning_rate": 8.355272918644137e-05, "loss": 0.0029, "step": 11841 }, { "epoch": 5.527187864644107, "grad_norm": 0.3046875, "learning_rate": 8.353825570967769e-05, "loss": 0.0019, "step": 11842 }, { "epoch": 5.527654609101517, "grad_norm": 0.1533203125, "learning_rate": 8.352378258735364e-05, "loss": 0.0014, "step": 11843 }, { "epoch": 5.528121353558927, "grad_norm": 0.26953125, "learning_rate": 8.350930981978082e-05, "loss": 0.0019, "step": 11844 }, { "epoch": 5.528588098016336, "grad_norm": 0.1357421875, "learning_rate": 8.349483740727086e-05, "loss": 0.0009, "step": 11845 }, { "epoch": 5.529054842473745, "grad_norm": 0.0712890625, "learning_rate": 8.348036535013537e-05, "loss": 0.0005, "step": 11846 }, { "epoch": 5.529521586931155, "grad_norm": 0.1318359375, "learning_rate": 8.346589364868594e-05, "loss": 0.0009, "step": 11847 }, { "epoch": 5.529988331388565, "grad_norm": 0.1279296875, "learning_rate": 8.345142230323413e-05, "loss": 0.0011, "step": 11848 }, { "epoch": 5.530455075845975, "grad_norm": 0.048095703125, "learning_rate": 8.34369513140916e-05, "loss": 0.0005, "step": 11849 }, { "epoch": 5.530921820303384, "grad_norm": 0.1416015625, "learning_rate": 8.342248068156988e-05, "loss": 0.0042, "step": 11850 }, { "epoch": 5.531388564760793, "grad_norm": 0.22265625, "learning_rate": 8.340801040598055e-05, "loss": 0.0026, "step": 11851 }, { "epoch": 5.531855309218203, "grad_norm": 0.2275390625, "learning_rate": 8.339354048763512e-05, "loss": 0.0015, "step": 11852 }, { "epoch": 5.532322053675612, "grad_norm": 0.1513671875, "learning_rate": 8.337907092684522e-05, "loss": 0.0008, "step": 11853 }, { "epoch": 5.532788798133022, "grad_norm": 0.0810546875, "learning_rate": 8.336460172392238e-05, "loss": 0.0007, "step": 11854 }, { "epoch": 5.533255542590432, "grad_norm": 0.07421875, "learning_rate": 8.335013287917809e-05, "loss": 0.0005, "step": 11855 }, { "epoch": 5.5337222870478415, "grad_norm": 0.154296875, "learning_rate": 8.333566439292394e-05, "loss": 0.0007, "step": 11856 }, { "epoch": 5.534189031505251, "grad_norm": 0.06640625, "learning_rate": 8.332119626547142e-05, "loss": 0.0006, "step": 11857 }, { "epoch": 5.53465577596266, "grad_norm": 0.130859375, "learning_rate": 8.330672849713202e-05, "loss": 0.0021, "step": 11858 }, { "epoch": 5.53512252042007, "grad_norm": 0.1572265625, "learning_rate": 8.329226108821732e-05, "loss": 0.0035, "step": 11859 }, { "epoch": 5.53558926487748, "grad_norm": 1.421875, "learning_rate": 8.327779403903875e-05, "loss": 0.0139, "step": 11860 }, { "epoch": 5.536056009334889, "grad_norm": 0.0673828125, "learning_rate": 8.326332734990785e-05, "loss": 0.0024, "step": 11861 }, { "epoch": 5.5365227537922985, "grad_norm": 0.1044921875, "learning_rate": 8.324886102113605e-05, "loss": 0.0007, "step": 11862 }, { "epoch": 5.536989498249708, "grad_norm": 0.06982421875, "learning_rate": 8.323439505303488e-05, "loss": 0.0006, "step": 11863 }, { "epoch": 5.537456242707118, "grad_norm": 0.10302734375, "learning_rate": 8.321992944591578e-05, "loss": 0.0009, "step": 11864 }, { "epoch": 5.537922987164528, "grad_norm": 0.053466796875, "learning_rate": 8.32054642000902e-05, "loss": 0.0005, "step": 11865 }, { "epoch": 5.538389731621937, "grad_norm": 0.1806640625, "learning_rate": 8.319099931586965e-05, "loss": 0.0008, "step": 11866 }, { "epoch": 5.538856476079347, "grad_norm": 0.053466796875, "learning_rate": 8.31765347935655e-05, "loss": 0.0004, "step": 11867 }, { "epoch": 5.539323220536756, "grad_norm": 0.0390625, "learning_rate": 8.316207063348923e-05, "loss": 0.0005, "step": 11868 }, { "epoch": 5.539789964994165, "grad_norm": 0.0830078125, "learning_rate": 8.314760683595228e-05, "loss": 0.0006, "step": 11869 }, { "epoch": 5.540256709451575, "grad_norm": 0.1484375, "learning_rate": 8.313314340126603e-05, "loss": 0.0015, "step": 11870 }, { "epoch": 5.540723453908985, "grad_norm": 0.154296875, "learning_rate": 8.311868032974194e-05, "loss": 0.001, "step": 11871 }, { "epoch": 5.541190198366395, "grad_norm": 0.06787109375, "learning_rate": 8.310421762169136e-05, "loss": 0.0006, "step": 11872 }, { "epoch": 5.541656942823804, "grad_norm": 0.1220703125, "learning_rate": 8.308975527742574e-05, "loss": 0.0006, "step": 11873 }, { "epoch": 5.542123687281213, "grad_norm": 0.7890625, "learning_rate": 8.307529329725645e-05, "loss": 0.008, "step": 11874 }, { "epoch": 5.542590431738623, "grad_norm": 0.15625, "learning_rate": 8.306083168149487e-05, "loss": 0.0035, "step": 11875 }, { "epoch": 5.543057176196033, "grad_norm": 0.21484375, "learning_rate": 8.304637043045238e-05, "loss": 0.0017, "step": 11876 }, { "epoch": 5.543523920653442, "grad_norm": 0.14453125, "learning_rate": 8.303190954444036e-05, "loss": 0.0013, "step": 11877 }, { "epoch": 5.543990665110852, "grad_norm": 0.28515625, "learning_rate": 8.301744902377012e-05, "loss": 0.0011, "step": 11878 }, { "epoch": 5.5444574095682615, "grad_norm": 0.21875, "learning_rate": 8.300298886875308e-05, "loss": 0.0059, "step": 11879 }, { "epoch": 5.544924154025671, "grad_norm": 0.1982421875, "learning_rate": 8.298852907970053e-05, "loss": 0.0016, "step": 11880 }, { "epoch": 5.54539089848308, "grad_norm": 0.1962890625, "learning_rate": 8.297406965692383e-05, "loss": 0.0013, "step": 11881 }, { "epoch": 5.54585764294049, "grad_norm": 0.05126953125, "learning_rate": 8.295961060073428e-05, "loss": 0.0005, "step": 11882 }, { "epoch": 5.5463243873979, "grad_norm": 0.36328125, "learning_rate": 8.294515191144325e-05, "loss": 0.0032, "step": 11883 }, { "epoch": 5.54679113185531, "grad_norm": 0.07763671875, "learning_rate": 8.293069358936202e-05, "loss": 0.0008, "step": 11884 }, { "epoch": 5.5472578763127185, "grad_norm": 0.2080078125, "learning_rate": 8.291623563480186e-05, "loss": 0.0008, "step": 11885 }, { "epoch": 5.547724620770128, "grad_norm": 0.1513671875, "learning_rate": 8.290177804807418e-05, "loss": 0.0013, "step": 11886 }, { "epoch": 5.548191365227538, "grad_norm": 0.055419921875, "learning_rate": 8.288732082949013e-05, "loss": 0.0006, "step": 11887 }, { "epoch": 5.548658109684947, "grad_norm": 0.140625, "learning_rate": 8.287286397936107e-05, "loss": 0.0031, "step": 11888 }, { "epoch": 5.549124854142357, "grad_norm": 0.06396484375, "learning_rate": 8.285840749799822e-05, "loss": 0.0006, "step": 11889 }, { "epoch": 5.549591598599767, "grad_norm": 0.16015625, "learning_rate": 8.284395138571289e-05, "loss": 0.0009, "step": 11890 }, { "epoch": 5.550058343057176, "grad_norm": 0.2265625, "learning_rate": 8.282949564281633e-05, "loss": 0.0021, "step": 11891 }, { "epoch": 5.550525087514586, "grad_norm": 0.10546875, "learning_rate": 8.281504026961978e-05, "loss": 0.001, "step": 11892 }, { "epoch": 5.550991831971995, "grad_norm": 0.341796875, "learning_rate": 8.280058526643448e-05, "loss": 0.002, "step": 11893 }, { "epoch": 5.551458576429405, "grad_norm": 0.349609375, "learning_rate": 8.278613063357164e-05, "loss": 0.0026, "step": 11894 }, { "epoch": 5.551925320886815, "grad_norm": 0.36328125, "learning_rate": 8.277167637134253e-05, "loss": 0.0082, "step": 11895 }, { "epoch": 5.552392065344224, "grad_norm": 0.27734375, "learning_rate": 8.275722248005835e-05, "loss": 0.0052, "step": 11896 }, { "epoch": 5.552858809801633, "grad_norm": 0.326171875, "learning_rate": 8.274276896003027e-05, "loss": 0.0014, "step": 11897 }, { "epoch": 5.553325554259043, "grad_norm": 0.08837890625, "learning_rate": 8.272831581156955e-05, "loss": 0.0033, "step": 11898 }, { "epoch": 5.553792298716453, "grad_norm": 0.298828125, "learning_rate": 8.271386303498734e-05, "loss": 0.0011, "step": 11899 }, { "epoch": 5.554259043173863, "grad_norm": 0.1767578125, "learning_rate": 8.269941063059484e-05, "loss": 0.0013, "step": 11900 }, { "epoch": 5.554725787631272, "grad_norm": 0.0419921875, "learning_rate": 8.268495859870322e-05, "loss": 0.0004, "step": 11901 }, { "epoch": 5.5551925320886815, "grad_norm": 0.058837890625, "learning_rate": 8.267050693962365e-05, "loss": 0.0005, "step": 11902 }, { "epoch": 5.555659276546091, "grad_norm": 0.15625, "learning_rate": 8.265605565366732e-05, "loss": 0.0022, "step": 11903 }, { "epoch": 5.5561260210035, "grad_norm": 0.236328125, "learning_rate": 8.264160474114531e-05, "loss": 0.0043, "step": 11904 }, { "epoch": 5.55659276546091, "grad_norm": 0.255859375, "learning_rate": 8.262715420236884e-05, "loss": 0.0015, "step": 11905 }, { "epoch": 5.55705950991832, "grad_norm": 0.04150390625, "learning_rate": 8.261270403764902e-05, "loss": 0.0025, "step": 11906 }, { "epoch": 5.55752625437573, "grad_norm": 0.205078125, "learning_rate": 8.259825424729696e-05, "loss": 0.0061, "step": 11907 }, { "epoch": 5.557992998833139, "grad_norm": 0.36328125, "learning_rate": 8.25838048316238e-05, "loss": 0.002, "step": 11908 }, { "epoch": 5.558459743290548, "grad_norm": 0.09326171875, "learning_rate": 8.256935579094066e-05, "loss": 0.0008, "step": 11909 }, { "epoch": 5.558926487747958, "grad_norm": 0.07421875, "learning_rate": 8.255490712555862e-05, "loss": 0.0008, "step": 11910 }, { "epoch": 5.559393232205368, "grad_norm": 0.150390625, "learning_rate": 8.254045883578876e-05, "loss": 0.0012, "step": 11911 }, { "epoch": 5.559859976662777, "grad_norm": 0.33203125, "learning_rate": 8.252601092194223e-05, "loss": 0.0015, "step": 11912 }, { "epoch": 5.5603267211201866, "grad_norm": 0.060546875, "learning_rate": 8.251156338433004e-05, "loss": 0.0006, "step": 11913 }, { "epoch": 5.560793465577596, "grad_norm": 0.10205078125, "learning_rate": 8.24971162232633e-05, "loss": 0.0009, "step": 11914 }, { "epoch": 5.561260210035006, "grad_norm": 0.111328125, "learning_rate": 8.248266943905309e-05, "loss": 0.003, "step": 11915 }, { "epoch": 5.561726954492415, "grad_norm": 0.08154296875, "learning_rate": 8.246822303201043e-05, "loss": 0.0024, "step": 11916 }, { "epoch": 5.562193698949825, "grad_norm": 0.12353515625, "learning_rate": 8.245377700244637e-05, "loss": 0.0015, "step": 11917 }, { "epoch": 5.562660443407235, "grad_norm": 0.171875, "learning_rate": 8.243933135067197e-05, "loss": 0.0009, "step": 11918 }, { "epoch": 5.563127187864644, "grad_norm": 0.0791015625, "learning_rate": 8.242488607699826e-05, "loss": 0.0008, "step": 11919 }, { "epoch": 5.563593932322053, "grad_norm": 0.1064453125, "learning_rate": 8.241044118173624e-05, "loss": 0.0008, "step": 11920 }, { "epoch": 5.564060676779463, "grad_norm": 0.060302734375, "learning_rate": 8.239599666519692e-05, "loss": 0.0005, "step": 11921 }, { "epoch": 5.564527421236873, "grad_norm": 0.039306640625, "learning_rate": 8.238155252769135e-05, "loss": 0.0004, "step": 11922 }, { "epoch": 5.564994165694283, "grad_norm": 0.076171875, "learning_rate": 8.23671087695305e-05, "loss": 0.0032, "step": 11923 }, { "epoch": 5.565460910151692, "grad_norm": 0.224609375, "learning_rate": 8.235266539102533e-05, "loss": 0.0012, "step": 11924 }, { "epoch": 5.565927654609101, "grad_norm": 0.10546875, "learning_rate": 8.23382223924869e-05, "loss": 0.0031, "step": 11925 }, { "epoch": 5.566394399066511, "grad_norm": 0.0478515625, "learning_rate": 8.232377977422612e-05, "loss": 0.0005, "step": 11926 }, { "epoch": 5.566861143523921, "grad_norm": 0.27734375, "learning_rate": 8.230933753655395e-05, "loss": 0.0021, "step": 11927 }, { "epoch": 5.56732788798133, "grad_norm": 0.0810546875, "learning_rate": 8.229489567978141e-05, "loss": 0.0006, "step": 11928 }, { "epoch": 5.56779463243874, "grad_norm": 0.05712890625, "learning_rate": 8.228045420421939e-05, "loss": 0.0006, "step": 11929 }, { "epoch": 5.5682613768961495, "grad_norm": 0.169921875, "learning_rate": 8.226601311017886e-05, "loss": 0.002, "step": 11930 }, { "epoch": 5.568728121353558, "grad_norm": 0.14453125, "learning_rate": 8.225157239797072e-05, "loss": 0.0011, "step": 11931 }, { "epoch": 5.569194865810968, "grad_norm": 0.1748046875, "learning_rate": 8.223713206790595e-05, "loss": 0.0009, "step": 11932 }, { "epoch": 5.569661610268378, "grad_norm": 0.150390625, "learning_rate": 8.222269212029542e-05, "loss": 0.0032, "step": 11933 }, { "epoch": 5.570128354725788, "grad_norm": 0.166015625, "learning_rate": 8.220825255545009e-05, "loss": 0.0048, "step": 11934 }, { "epoch": 5.570595099183198, "grad_norm": 0.12109375, "learning_rate": 8.219381337368079e-05, "loss": 0.0007, "step": 11935 }, { "epoch": 5.5710618436406065, "grad_norm": 0.048828125, "learning_rate": 8.217937457529843e-05, "loss": 0.0036, "step": 11936 }, { "epoch": 5.571528588098016, "grad_norm": 0.333984375, "learning_rate": 8.216493616061391e-05, "loss": 0.0024, "step": 11937 }, { "epoch": 5.571995332555426, "grad_norm": 0.16796875, "learning_rate": 8.215049812993813e-05, "loss": 0.0008, "step": 11938 }, { "epoch": 5.572462077012835, "grad_norm": 0.05029296875, "learning_rate": 8.213606048358189e-05, "loss": 0.0004, "step": 11939 }, { "epoch": 5.572928821470245, "grad_norm": 0.1875, "learning_rate": 8.212162322185612e-05, "loss": 0.0009, "step": 11940 }, { "epoch": 5.573395565927655, "grad_norm": 0.033935546875, "learning_rate": 8.210718634507163e-05, "loss": 0.0004, "step": 11941 }, { "epoch": 5.573862310385064, "grad_norm": 0.0284423828125, "learning_rate": 8.209274985353927e-05, "loss": 0.0004, "step": 11942 }, { "epoch": 5.574329054842474, "grad_norm": 0.07275390625, "learning_rate": 8.207831374756987e-05, "loss": 0.0009, "step": 11943 }, { "epoch": 5.574795799299883, "grad_norm": 0.08056640625, "learning_rate": 8.206387802747426e-05, "loss": 0.0037, "step": 11944 }, { "epoch": 5.575262543757293, "grad_norm": 0.2158203125, "learning_rate": 8.204944269356326e-05, "loss": 0.0008, "step": 11945 }, { "epoch": 5.575729288214703, "grad_norm": 0.09765625, "learning_rate": 8.203500774614766e-05, "loss": 0.0039, "step": 11946 }, { "epoch": 5.576196032672112, "grad_norm": 0.28125, "learning_rate": 8.20205731855383e-05, "loss": 0.0014, "step": 11947 }, { "epoch": 5.576662777129521, "grad_norm": 0.1337890625, "learning_rate": 8.200613901204595e-05, "loss": 0.0008, "step": 11948 }, { "epoch": 5.577129521586931, "grad_norm": 0.1376953125, "learning_rate": 8.199170522598135e-05, "loss": 0.0012, "step": 11949 }, { "epoch": 5.577596266044341, "grad_norm": 0.23046875, "learning_rate": 8.197727182765536e-05, "loss": 0.0012, "step": 11950 }, { "epoch": 5.57806301050175, "grad_norm": 0.052001953125, "learning_rate": 8.19628388173787e-05, "loss": 0.0007, "step": 11951 }, { "epoch": 5.57852975495916, "grad_norm": 0.08740234375, "learning_rate": 8.194840619546212e-05, "loss": 0.0007, "step": 11952 }, { "epoch": 5.5789964994165695, "grad_norm": 0.047119140625, "learning_rate": 8.193397396221637e-05, "loss": 0.0004, "step": 11953 }, { "epoch": 5.579463243873979, "grad_norm": 0.095703125, "learning_rate": 8.191954211795223e-05, "loss": 0.0009, "step": 11954 }, { "epoch": 5.579929988331388, "grad_norm": 0.283203125, "learning_rate": 8.190511066298042e-05, "loss": 0.0029, "step": 11955 }, { "epoch": 5.580396732788798, "grad_norm": 0.1376953125, "learning_rate": 8.18906795976116e-05, "loss": 0.001, "step": 11956 }, { "epoch": 5.580863477246208, "grad_norm": 0.06640625, "learning_rate": 8.187624892215658e-05, "loss": 0.0006, "step": 11957 }, { "epoch": 5.581330221703618, "grad_norm": 0.1943359375, "learning_rate": 8.186181863692604e-05, "loss": 0.0007, "step": 11958 }, { "epoch": 5.5817969661610265, "grad_norm": 0.23828125, "learning_rate": 8.184738874223066e-05, "loss": 0.0013, "step": 11959 }, { "epoch": 5.582263710618436, "grad_norm": 0.181640625, "learning_rate": 8.183295923838111e-05, "loss": 0.0056, "step": 11960 }, { "epoch": 5.582730455075846, "grad_norm": 0.0771484375, "learning_rate": 8.181853012568815e-05, "loss": 0.0008, "step": 11961 }, { "epoch": 5.583197199533256, "grad_norm": 0.1923828125, "learning_rate": 8.180410140446239e-05, "loss": 0.0012, "step": 11962 }, { "epoch": 5.583663943990665, "grad_norm": 0.056884765625, "learning_rate": 8.178967307501449e-05, "loss": 0.0027, "step": 11963 }, { "epoch": 5.584130688448075, "grad_norm": 0.1220703125, "learning_rate": 8.177524513765517e-05, "loss": 0.0034, "step": 11964 }, { "epoch": 5.584597432905484, "grad_norm": 0.1943359375, "learning_rate": 8.176081759269503e-05, "loss": 0.0008, "step": 11965 }, { "epoch": 5.585064177362894, "grad_norm": 0.298828125, "learning_rate": 8.17463904404447e-05, "loss": 0.0051, "step": 11966 }, { "epoch": 5.585530921820303, "grad_norm": 0.06201171875, "learning_rate": 8.173196368121487e-05, "loss": 0.0006, "step": 11967 }, { "epoch": 5.585997666277713, "grad_norm": 0.12158203125, "learning_rate": 8.171753731531615e-05, "loss": 0.0007, "step": 11968 }, { "epoch": 5.586464410735123, "grad_norm": 0.09130859375, "learning_rate": 8.170311134305911e-05, "loss": 0.0007, "step": 11969 }, { "epoch": 5.5869311551925325, "grad_norm": 0.0556640625, "learning_rate": 8.168868576475436e-05, "loss": 0.0039, "step": 11970 }, { "epoch": 5.587397899649941, "grad_norm": 0.10546875, "learning_rate": 8.167426058071254e-05, "loss": 0.0011, "step": 11971 }, { "epoch": 5.587864644107351, "grad_norm": 0.1298828125, "learning_rate": 8.165983579124424e-05, "loss": 0.0012, "step": 11972 }, { "epoch": 5.588331388564761, "grad_norm": 0.038330078125, "learning_rate": 8.164541139665999e-05, "loss": 0.0004, "step": 11973 }, { "epoch": 5.58879813302217, "grad_norm": 0.158203125, "learning_rate": 8.163098739727042e-05, "loss": 0.0008, "step": 11974 }, { "epoch": 5.58926487747958, "grad_norm": 0.1806640625, "learning_rate": 8.161656379338608e-05, "loss": 0.0012, "step": 11975 }, { "epoch": 5.5897316219369895, "grad_norm": 0.20703125, "learning_rate": 8.160214058531749e-05, "loss": 0.001, "step": 11976 }, { "epoch": 5.590198366394399, "grad_norm": 0.1396484375, "learning_rate": 8.158771777337524e-05, "loss": 0.0009, "step": 11977 }, { "epoch": 5.590665110851809, "grad_norm": 0.2158203125, "learning_rate": 8.157329535786988e-05, "loss": 0.0081, "step": 11978 }, { "epoch": 5.591131855309218, "grad_norm": 0.1298828125, "learning_rate": 8.155887333911189e-05, "loss": 0.0007, "step": 11979 }, { "epoch": 5.591598599766628, "grad_norm": 0.05322265625, "learning_rate": 8.154445171741181e-05, "loss": 0.0007, "step": 11980 }, { "epoch": 5.592065344224038, "grad_norm": 0.04541015625, "learning_rate": 8.153003049308019e-05, "loss": 0.0004, "step": 11981 }, { "epoch": 5.5925320886814465, "grad_norm": 0.30859375, "learning_rate": 8.151560966642751e-05, "loss": 0.002, "step": 11982 }, { "epoch": 5.592998833138856, "grad_norm": 0.09130859375, "learning_rate": 8.150118923776426e-05, "loss": 0.0027, "step": 11983 }, { "epoch": 5.593465577596266, "grad_norm": 0.031982421875, "learning_rate": 8.14867692074009e-05, "loss": 0.0024, "step": 11984 }, { "epoch": 5.593932322053676, "grad_norm": 0.07275390625, "learning_rate": 8.147234957564794e-05, "loss": 0.0035, "step": 11985 }, { "epoch": 5.594399066511086, "grad_norm": 0.0556640625, "learning_rate": 8.145793034281588e-05, "loss": 0.0006, "step": 11986 }, { "epoch": 5.594865810968495, "grad_norm": 0.1416015625, "learning_rate": 8.144351150921514e-05, "loss": 0.0009, "step": 11987 }, { "epoch": 5.595332555425904, "grad_norm": 0.044921875, "learning_rate": 8.142909307515617e-05, "loss": 0.0027, "step": 11988 }, { "epoch": 5.595799299883314, "grad_norm": 0.052490234375, "learning_rate": 8.141467504094944e-05, "loss": 0.0006, "step": 11989 }, { "epoch": 5.596266044340723, "grad_norm": 0.11181640625, "learning_rate": 8.140025740690539e-05, "loss": 0.001, "step": 11990 }, { "epoch": 5.596732788798133, "grad_norm": 0.1025390625, "learning_rate": 8.138584017333442e-05, "loss": 0.0007, "step": 11991 }, { "epoch": 5.597199533255543, "grad_norm": 0.014892578125, "learning_rate": 8.137142334054696e-05, "loss": 0.0002, "step": 11992 }, { "epoch": 5.597666277712952, "grad_norm": 0.12890625, "learning_rate": 8.135700690885344e-05, "loss": 0.0007, "step": 11993 }, { "epoch": 5.598133022170361, "grad_norm": 0.10498046875, "learning_rate": 8.134259087856424e-05, "loss": 0.0008, "step": 11994 }, { "epoch": 5.598599766627771, "grad_norm": 0.04541015625, "learning_rate": 8.132817524998975e-05, "loss": 0.0025, "step": 11995 }, { "epoch": 5.599066511085181, "grad_norm": 0.30078125, "learning_rate": 8.131376002344038e-05, "loss": 0.0031, "step": 11996 }, { "epoch": 5.599533255542591, "grad_norm": 0.060546875, "learning_rate": 8.129934519922649e-05, "loss": 0.0006, "step": 11997 }, { "epoch": 5.6, "grad_norm": 0.044677734375, "learning_rate": 8.128493077765841e-05, "loss": 0.0005, "step": 11998 }, { "epoch": 5.600466744457409, "grad_norm": 0.251953125, "learning_rate": 8.127051675904658e-05, "loss": 0.0017, "step": 11999 }, { "epoch": 5.600933488914819, "grad_norm": 0.326171875, "learning_rate": 8.125610314370129e-05, "loss": 0.0062, "step": 12000 }, { "epoch": 5.601400233372229, "grad_norm": 0.166015625, "learning_rate": 8.124168993193289e-05, "loss": 0.0021, "step": 12001 }, { "epoch": 5.601866977829638, "grad_norm": 0.21875, "learning_rate": 8.122727712405172e-05, "loss": 0.0011, "step": 12002 }, { "epoch": 5.602333722287048, "grad_norm": 0.1806640625, "learning_rate": 8.12128647203681e-05, "loss": 0.0008, "step": 12003 }, { "epoch": 5.6028004667444575, "grad_norm": 0.3984375, "learning_rate": 8.119845272119235e-05, "loss": 0.0013, "step": 12004 }, { "epoch": 5.603267211201867, "grad_norm": 0.205078125, "learning_rate": 8.118404112683478e-05, "loss": 0.0009, "step": 12005 }, { "epoch": 5.603733955659276, "grad_norm": 0.05810546875, "learning_rate": 8.116962993760567e-05, "loss": 0.0005, "step": 12006 }, { "epoch": 5.604200700116686, "grad_norm": 0.267578125, "learning_rate": 8.115521915381533e-05, "loss": 0.0013, "step": 12007 }, { "epoch": 5.604667444574096, "grad_norm": 0.4765625, "learning_rate": 8.114080877577401e-05, "loss": 0.0024, "step": 12008 }, { "epoch": 5.605134189031506, "grad_norm": 0.10888671875, "learning_rate": 8.112639880379203e-05, "loss": 0.0008, "step": 12009 }, { "epoch": 5.6056009334889145, "grad_norm": 0.1240234375, "learning_rate": 8.111198923817962e-05, "loss": 0.0009, "step": 12010 }, { "epoch": 5.606067677946324, "grad_norm": 0.0673828125, "learning_rate": 8.109758007924703e-05, "loss": 0.0005, "step": 12011 }, { "epoch": 5.606534422403734, "grad_norm": 0.07958984375, "learning_rate": 8.10831713273045e-05, "loss": 0.004, "step": 12012 }, { "epoch": 5.607001166861144, "grad_norm": 0.45703125, "learning_rate": 8.106876298266231e-05, "loss": 0.0023, "step": 12013 }, { "epoch": 5.607467911318553, "grad_norm": 0.08544921875, "learning_rate": 8.105435504563063e-05, "loss": 0.0007, "step": 12014 }, { "epoch": 5.607934655775963, "grad_norm": 0.040771484375, "learning_rate": 8.103994751651971e-05, "loss": 0.0004, "step": 12015 }, { "epoch": 5.608401400233372, "grad_norm": 0.061279296875, "learning_rate": 8.102554039563978e-05, "loss": 0.0006, "step": 12016 }, { "epoch": 5.608868144690781, "grad_norm": 0.08154296875, "learning_rate": 8.1011133683301e-05, "loss": 0.0007, "step": 12017 }, { "epoch": 5.609334889148191, "grad_norm": 0.061767578125, "learning_rate": 8.09967273798136e-05, "loss": 0.0005, "step": 12018 }, { "epoch": 5.609801633605601, "grad_norm": 0.146484375, "learning_rate": 8.09823214854877e-05, "loss": 0.001, "step": 12019 }, { "epoch": 5.610268378063011, "grad_norm": 0.1318359375, "learning_rate": 8.096791600063357e-05, "loss": 0.0011, "step": 12020 }, { "epoch": 5.6107351225204205, "grad_norm": 0.259765625, "learning_rate": 8.09535109255613e-05, "loss": 0.0014, "step": 12021 }, { "epoch": 5.611201866977829, "grad_norm": 0.055908203125, "learning_rate": 8.093910626058106e-05, "loss": 0.0005, "step": 12022 }, { "epoch": 5.611668611435239, "grad_norm": 0.33203125, "learning_rate": 8.092470200600301e-05, "loss": 0.0038, "step": 12023 }, { "epoch": 5.612135355892649, "grad_norm": 0.1337890625, "learning_rate": 8.091029816213732e-05, "loss": 0.0029, "step": 12024 }, { "epoch": 5.612602100350058, "grad_norm": 0.51171875, "learning_rate": 8.089589472929406e-05, "loss": 0.0021, "step": 12025 }, { "epoch": 5.613068844807468, "grad_norm": 0.03173828125, "learning_rate": 8.08814917077834e-05, "loss": 0.0004, "step": 12026 }, { "epoch": 5.6135355892648775, "grad_norm": 0.0810546875, "learning_rate": 8.086708909791544e-05, "loss": 0.0007, "step": 12027 }, { "epoch": 5.614002333722287, "grad_norm": 0.1640625, "learning_rate": 8.085268690000029e-05, "loss": 0.0014, "step": 12028 }, { "epoch": 5.614469078179697, "grad_norm": 0.06396484375, "learning_rate": 8.0838285114348e-05, "loss": 0.0006, "step": 12029 }, { "epoch": 5.614935822637106, "grad_norm": 0.1259765625, "learning_rate": 8.082388374126876e-05, "loss": 0.0011, "step": 12030 }, { "epoch": 5.615402567094516, "grad_norm": 0.279296875, "learning_rate": 8.08094827810725e-05, "loss": 0.0009, "step": 12031 }, { "epoch": 5.615869311551926, "grad_norm": 0.04150390625, "learning_rate": 8.079508223406942e-05, "loss": 0.0005, "step": 12032 }, { "epoch": 5.6163360560093345, "grad_norm": 0.049072265625, "learning_rate": 8.07806821005695e-05, "loss": 0.0005, "step": 12033 }, { "epoch": 5.616802800466744, "grad_norm": 0.341796875, "learning_rate": 8.076628238088282e-05, "loss": 0.0024, "step": 12034 }, { "epoch": 5.617269544924154, "grad_norm": 0.026123046875, "learning_rate": 8.075188307531943e-05, "loss": 0.0003, "step": 12035 }, { "epoch": 5.617736289381564, "grad_norm": 0.1181640625, "learning_rate": 8.073748418418935e-05, "loss": 0.0006, "step": 12036 }, { "epoch": 5.618203033838973, "grad_norm": 0.30078125, "learning_rate": 8.07230857078026e-05, "loss": 0.0021, "step": 12037 }, { "epoch": 5.618669778296383, "grad_norm": 0.29296875, "learning_rate": 8.07086876464692e-05, "loss": 0.0036, "step": 12038 }, { "epoch": 5.619136522753792, "grad_norm": 0.1767578125, "learning_rate": 8.069429000049918e-05, "loss": 0.0012, "step": 12039 }, { "epoch": 5.619603267211202, "grad_norm": 0.03759765625, "learning_rate": 8.067989277020251e-05, "loss": 0.0004, "step": 12040 }, { "epoch": 5.620070011668611, "grad_norm": 0.0771484375, "learning_rate": 8.066549595588916e-05, "loss": 0.0015, "step": 12041 }, { "epoch": 5.620536756126021, "grad_norm": 0.28125, "learning_rate": 8.065109955786916e-05, "loss": 0.0033, "step": 12042 }, { "epoch": 5.621003500583431, "grad_norm": 0.2431640625, "learning_rate": 8.063670357645246e-05, "loss": 0.0012, "step": 12043 }, { "epoch": 5.6214702450408405, "grad_norm": 0.056640625, "learning_rate": 8.062230801194898e-05, "loss": 0.0006, "step": 12044 }, { "epoch": 5.621936989498249, "grad_norm": 0.083984375, "learning_rate": 8.060791286466874e-05, "loss": 0.0005, "step": 12045 }, { "epoch": 5.622403733955659, "grad_norm": 0.1806640625, "learning_rate": 8.059351813492165e-05, "loss": 0.0008, "step": 12046 }, { "epoch": 5.622870478413069, "grad_norm": 0.33984375, "learning_rate": 8.057912382301762e-05, "loss": 0.001, "step": 12047 }, { "epoch": 5.623337222870479, "grad_norm": 0.0703125, "learning_rate": 8.056472992926663e-05, "loss": 0.0008, "step": 12048 }, { "epoch": 5.623803967327888, "grad_norm": 0.10302734375, "learning_rate": 8.055033645397858e-05, "loss": 0.0006, "step": 12049 }, { "epoch": 5.6242707117852975, "grad_norm": 0.0223388671875, "learning_rate": 8.053594339746334e-05, "loss": 0.0004, "step": 12050 }, { "epoch": 5.624737456242707, "grad_norm": 0.236328125, "learning_rate": 8.052155076003083e-05, "loss": 0.0042, "step": 12051 }, { "epoch": 5.625204200700117, "grad_norm": 0.203125, "learning_rate": 8.050715854199096e-05, "loss": 0.0025, "step": 12052 }, { "epoch": 5.625670945157526, "grad_norm": 0.02734375, "learning_rate": 8.049276674365361e-05, "loss": 0.0004, "step": 12053 }, { "epoch": 5.626137689614936, "grad_norm": 0.283203125, "learning_rate": 8.047837536532858e-05, "loss": 0.003, "step": 12054 }, { "epoch": 5.626604434072346, "grad_norm": 0.0517578125, "learning_rate": 8.046398440732583e-05, "loss": 0.0005, "step": 12055 }, { "epoch": 5.627071178529755, "grad_norm": 0.0279541015625, "learning_rate": 8.044959386995518e-05, "loss": 0.0004, "step": 12056 }, { "epoch": 5.627537922987164, "grad_norm": 0.07568359375, "learning_rate": 8.043520375352642e-05, "loss": 0.0005, "step": 12057 }, { "epoch": 5.628004667444574, "grad_norm": 0.212890625, "learning_rate": 8.042081405834946e-05, "loss": 0.0008, "step": 12058 }, { "epoch": 5.628471411901984, "grad_norm": 0.06298828125, "learning_rate": 8.040642478473412e-05, "loss": 0.0059, "step": 12059 }, { "epoch": 5.628938156359393, "grad_norm": 0.056640625, "learning_rate": 8.039203593299017e-05, "loss": 0.0006, "step": 12060 }, { "epoch": 5.629404900816803, "grad_norm": 0.10498046875, "learning_rate": 8.037764750342743e-05, "loss": 0.0006, "step": 12061 }, { "epoch": 5.629871645274212, "grad_norm": 0.04296875, "learning_rate": 8.036325949635572e-05, "loss": 0.0004, "step": 12062 }, { "epoch": 5.630338389731622, "grad_norm": 0.059326171875, "learning_rate": 8.034887191208485e-05, "loss": 0.0005, "step": 12063 }, { "epoch": 5.630805134189032, "grad_norm": 0.04150390625, "learning_rate": 8.033448475092452e-05, "loss": 0.0004, "step": 12064 }, { "epoch": 5.631271878646441, "grad_norm": 0.034912109375, "learning_rate": 8.032009801318458e-05, "loss": 0.0004, "step": 12065 }, { "epoch": 5.631738623103851, "grad_norm": 0.2060546875, "learning_rate": 8.030571169917477e-05, "loss": 0.001, "step": 12066 }, { "epoch": 5.6322053675612604, "grad_norm": 0.130859375, "learning_rate": 8.029132580920482e-05, "loss": 0.0005, "step": 12067 }, { "epoch": 5.632672112018669, "grad_norm": 0.1875, "learning_rate": 8.027694034358451e-05, "loss": 0.0055, "step": 12068 }, { "epoch": 5.633138856476079, "grad_norm": 0.0289306640625, "learning_rate": 8.026255530262356e-05, "loss": 0.0004, "step": 12069 }, { "epoch": 5.633605600933489, "grad_norm": 0.068359375, "learning_rate": 8.024817068663168e-05, "loss": 0.0005, "step": 12070 }, { "epoch": 5.634072345390899, "grad_norm": 0.091796875, "learning_rate": 8.023378649591859e-05, "loss": 0.0006, "step": 12071 }, { "epoch": 5.6345390898483085, "grad_norm": 0.10009765625, "learning_rate": 8.021940273079402e-05, "loss": 0.0006, "step": 12072 }, { "epoch": 5.635005834305717, "grad_norm": 0.09228515625, "learning_rate": 8.020501939156767e-05, "loss": 0.0009, "step": 12073 }, { "epoch": 5.635472578763127, "grad_norm": 0.115234375, "learning_rate": 8.019063647854919e-05, "loss": 0.0007, "step": 12074 }, { "epoch": 5.635939323220537, "grad_norm": 0.1708984375, "learning_rate": 8.017625399204831e-05, "loss": 0.0009, "step": 12075 }, { "epoch": 5.636406067677946, "grad_norm": 0.0927734375, "learning_rate": 8.016187193237467e-05, "loss": 0.0006, "step": 12076 }, { "epoch": 5.636872812135356, "grad_norm": 0.158203125, "learning_rate": 8.014749029983794e-05, "loss": 0.0008, "step": 12077 }, { "epoch": 5.6373395565927655, "grad_norm": 0.21875, "learning_rate": 8.013310909474779e-05, "loss": 0.0008, "step": 12078 }, { "epoch": 5.637806301050175, "grad_norm": 0.07861328125, "learning_rate": 8.01187283174138e-05, "loss": 0.0008, "step": 12079 }, { "epoch": 5.638273045507584, "grad_norm": 0.08154296875, "learning_rate": 8.010434796814568e-05, "loss": 0.0006, "step": 12080 }, { "epoch": 5.638739789964994, "grad_norm": 0.08203125, "learning_rate": 8.0089968047253e-05, "loss": 0.0004, "step": 12081 }, { "epoch": 5.639206534422404, "grad_norm": 0.034912109375, "learning_rate": 8.007558855504542e-05, "loss": 0.0004, "step": 12082 }, { "epoch": 5.639673278879814, "grad_norm": 0.30859375, "learning_rate": 8.00612094918325e-05, "loss": 0.0082, "step": 12083 }, { "epoch": 5.6401400233372225, "grad_norm": 0.0869140625, "learning_rate": 8.004683085792388e-05, "loss": 0.0007, "step": 12084 }, { "epoch": 5.640606767794632, "grad_norm": 0.04931640625, "learning_rate": 8.003245265362913e-05, "loss": 0.0005, "step": 12085 }, { "epoch": 5.641073512252042, "grad_norm": 0.0205078125, "learning_rate": 8.001807487925782e-05, "loss": 0.0003, "step": 12086 }, { "epoch": 5.641540256709452, "grad_norm": 0.07958984375, "learning_rate": 8.000369753511952e-05, "loss": 0.0005, "step": 12087 }, { "epoch": 5.642007001166861, "grad_norm": 0.2265625, "learning_rate": 7.998932062152382e-05, "loss": 0.0011, "step": 12088 }, { "epoch": 5.642473745624271, "grad_norm": 0.06298828125, "learning_rate": 7.997494413878024e-05, "loss": 0.0004, "step": 12089 }, { "epoch": 5.64294049008168, "grad_norm": 0.11328125, "learning_rate": 7.996056808719832e-05, "loss": 0.0011, "step": 12090 }, { "epoch": 5.64340723453909, "grad_norm": 0.048828125, "learning_rate": 7.99461924670876e-05, "loss": 0.0004, "step": 12091 }, { "epoch": 5.643873978996499, "grad_norm": 0.201171875, "learning_rate": 7.993181727875765e-05, "loss": 0.0016, "step": 12092 }, { "epoch": 5.644340723453909, "grad_norm": 0.318359375, "learning_rate": 7.991744252251788e-05, "loss": 0.0054, "step": 12093 }, { "epoch": 5.644807467911319, "grad_norm": 0.1796875, "learning_rate": 7.99030681986779e-05, "loss": 0.001, "step": 12094 }, { "epoch": 5.6452742123687285, "grad_norm": 0.053466796875, "learning_rate": 7.988869430754714e-05, "loss": 0.0027, "step": 12095 }, { "epoch": 5.645740956826137, "grad_norm": 0.1357421875, "learning_rate": 7.987432084943509e-05, "loss": 0.001, "step": 12096 }, { "epoch": 5.646207701283547, "grad_norm": 0.08837890625, "learning_rate": 7.985994782465127e-05, "loss": 0.0068, "step": 12097 }, { "epoch": 5.646674445740957, "grad_norm": 0.05712890625, "learning_rate": 7.984557523350512e-05, "loss": 0.0005, "step": 12098 }, { "epoch": 5.647141190198367, "grad_norm": 0.0810546875, "learning_rate": 7.98312030763061e-05, "loss": 0.0007, "step": 12099 }, { "epoch": 5.647607934655776, "grad_norm": 0.08837890625, "learning_rate": 7.981683135336362e-05, "loss": 0.0007, "step": 12100 }, { "epoch": 5.6480746791131855, "grad_norm": 0.1298828125, "learning_rate": 7.980246006498718e-05, "loss": 0.0007, "step": 12101 }, { "epoch": 5.648541423570595, "grad_norm": 0.25390625, "learning_rate": 7.97880892114862e-05, "loss": 0.0011, "step": 12102 }, { "epoch": 5.649008168028004, "grad_norm": 0.2314453125, "learning_rate": 7.977371879317004e-05, "loss": 0.001, "step": 12103 }, { "epoch": 5.649474912485414, "grad_norm": 0.1337890625, "learning_rate": 7.975934881034819e-05, "loss": 0.0008, "step": 12104 }, { "epoch": 5.649941656942824, "grad_norm": 0.05322265625, "learning_rate": 7.974497926333001e-05, "loss": 0.002, "step": 12105 }, { "epoch": 5.650408401400234, "grad_norm": 0.052734375, "learning_rate": 7.973061015242489e-05, "loss": 0.002, "step": 12106 }, { "epoch": 5.650875145857643, "grad_norm": 0.1201171875, "learning_rate": 7.971624147794224e-05, "loss": 0.0007, "step": 12107 }, { "epoch": 5.651341890315052, "grad_norm": 0.06884765625, "learning_rate": 7.970187324019141e-05, "loss": 0.0005, "step": 12108 }, { "epoch": 5.651808634772462, "grad_norm": 0.1806640625, "learning_rate": 7.968750543948177e-05, "loss": 0.0036, "step": 12109 }, { "epoch": 5.652275379229872, "grad_norm": 0.033447265625, "learning_rate": 7.967313807612266e-05, "loss": 0.0003, "step": 12110 }, { "epoch": 5.652742123687281, "grad_norm": 0.1884765625, "learning_rate": 7.965877115042347e-05, "loss": 0.0049, "step": 12111 }, { "epoch": 5.653208868144691, "grad_norm": 0.103515625, "learning_rate": 7.964440466269349e-05, "loss": 0.004, "step": 12112 }, { "epoch": 5.6536756126021, "grad_norm": 0.15625, "learning_rate": 7.963003861324205e-05, "loss": 0.001, "step": 12113 }, { "epoch": 5.65414235705951, "grad_norm": 0.017578125, "learning_rate": 7.961567300237849e-05, "loss": 0.0003, "step": 12114 }, { "epoch": 5.65460910151692, "grad_norm": 0.064453125, "learning_rate": 7.96013078304121e-05, "loss": 0.0026, "step": 12115 }, { "epoch": 5.655075845974329, "grad_norm": 0.08349609375, "learning_rate": 7.958694309765219e-05, "loss": 0.0022, "step": 12116 }, { "epoch": 5.655542590431739, "grad_norm": 0.04248046875, "learning_rate": 7.957257880440805e-05, "loss": 0.0004, "step": 12117 }, { "epoch": 5.6560093348891485, "grad_norm": 0.038818359375, "learning_rate": 7.955821495098896e-05, "loss": 0.0004, "step": 12118 }, { "epoch": 5.656476079346557, "grad_norm": 0.228515625, "learning_rate": 7.954385153770418e-05, "loss": 0.0011, "step": 12119 }, { "epoch": 5.656942823803967, "grad_norm": 0.57421875, "learning_rate": 7.952948856486295e-05, "loss": 0.0068, "step": 12120 }, { "epoch": 5.657409568261377, "grad_norm": 0.06396484375, "learning_rate": 7.951512603277456e-05, "loss": 0.0005, "step": 12121 }, { "epoch": 5.657876312718787, "grad_norm": 0.033203125, "learning_rate": 7.950076394174825e-05, "loss": 0.0004, "step": 12122 }, { "epoch": 5.658343057176196, "grad_norm": 0.1435546875, "learning_rate": 7.948640229209321e-05, "loss": 0.0056, "step": 12123 }, { "epoch": 5.6588098016336055, "grad_norm": 0.1474609375, "learning_rate": 7.947204108411869e-05, "loss": 0.0022, "step": 12124 }, { "epoch": 5.659276546091015, "grad_norm": 0.2177734375, "learning_rate": 7.945768031813395e-05, "loss": 0.0008, "step": 12125 }, { "epoch": 5.659743290548425, "grad_norm": 0.099609375, "learning_rate": 7.944331999444811e-05, "loss": 0.0006, "step": 12126 }, { "epoch": 5.660210035005834, "grad_norm": 0.279296875, "learning_rate": 7.94289601133704e-05, "loss": 0.0016, "step": 12127 }, { "epoch": 5.660676779463244, "grad_norm": 0.0888671875, "learning_rate": 7.941460067520998e-05, "loss": 0.0007, "step": 12128 }, { "epoch": 5.661143523920654, "grad_norm": 0.10400390625, "learning_rate": 7.940024168027607e-05, "loss": 0.0005, "step": 12129 }, { "epoch": 5.661610268378063, "grad_norm": 0.1796875, "learning_rate": 7.938588312887782e-05, "loss": 0.0031, "step": 12130 }, { "epoch": 5.662077012835472, "grad_norm": 0.0478515625, "learning_rate": 7.937152502132437e-05, "loss": 0.0006, "step": 12131 }, { "epoch": 5.662543757292882, "grad_norm": 0.0299072265625, "learning_rate": 7.935716735792486e-05, "loss": 0.0004, "step": 12132 }, { "epoch": 5.663010501750292, "grad_norm": 0.1494140625, "learning_rate": 7.934281013898846e-05, "loss": 0.0026, "step": 12133 }, { "epoch": 5.663477246207702, "grad_norm": 0.060791015625, "learning_rate": 7.932845336482426e-05, "loss": 0.0004, "step": 12134 }, { "epoch": 5.663943990665111, "grad_norm": 0.4375, "learning_rate": 7.93140970357414e-05, "loss": 0.0027, "step": 12135 }, { "epoch": 5.66441073512252, "grad_norm": 0.203125, "learning_rate": 7.929974115204898e-05, "loss": 0.0008, "step": 12136 }, { "epoch": 5.66487747957993, "grad_norm": 0.0400390625, "learning_rate": 7.928538571405611e-05, "loss": 0.0004, "step": 12137 }, { "epoch": 5.665344224037339, "grad_norm": 0.51171875, "learning_rate": 7.927103072207185e-05, "loss": 0.0022, "step": 12138 }, { "epoch": 5.665810968494749, "grad_norm": 0.1435546875, "learning_rate": 7.925667617640532e-05, "loss": 0.0046, "step": 12139 }, { "epoch": 5.666277712952159, "grad_norm": 0.2041015625, "learning_rate": 7.924232207736556e-05, "loss": 0.0084, "step": 12140 }, { "epoch": 5.6667444574095684, "grad_norm": 0.103515625, "learning_rate": 7.922796842526165e-05, "loss": 0.0042, "step": 12141 }, { "epoch": 5.667211201866978, "grad_norm": 0.1826171875, "learning_rate": 7.921361522040258e-05, "loss": 0.0007, "step": 12142 }, { "epoch": 5.667677946324387, "grad_norm": 0.11181640625, "learning_rate": 7.919926246309748e-05, "loss": 0.0007, "step": 12143 }, { "epoch": 5.668144690781797, "grad_norm": 0.1845703125, "learning_rate": 7.918491015365533e-05, "loss": 0.0009, "step": 12144 }, { "epoch": 5.668611435239207, "grad_norm": 0.1123046875, "learning_rate": 7.917055829238514e-05, "loss": 0.0007, "step": 12145 }, { "epoch": 5.669078179696616, "grad_norm": 0.1796875, "learning_rate": 7.915620687959597e-05, "loss": 0.0025, "step": 12146 }, { "epoch": 5.6695449241540254, "grad_norm": 0.05908203125, "learning_rate": 7.914185591559679e-05, "loss": 0.0039, "step": 12147 }, { "epoch": 5.670011668611435, "grad_norm": 0.047119140625, "learning_rate": 7.912750540069658e-05, "loss": 0.0024, "step": 12148 }, { "epoch": 5.670478413068845, "grad_norm": 0.1591796875, "learning_rate": 7.911315533520432e-05, "loss": 0.001, "step": 12149 }, { "epoch": 5.670945157526255, "grad_norm": 0.244140625, "learning_rate": 7.909880571942904e-05, "loss": 0.0042, "step": 12150 }, { "epoch": 5.671411901983664, "grad_norm": 0.01226806640625, "learning_rate": 7.908445655367964e-05, "loss": 0.0002, "step": 12151 }, { "epoch": 5.6718786464410735, "grad_norm": 0.109375, "learning_rate": 7.907010783826509e-05, "loss": 0.0005, "step": 12152 }, { "epoch": 5.672345390898483, "grad_norm": 0.07568359375, "learning_rate": 7.905575957349436e-05, "loss": 0.0007, "step": 12153 }, { "epoch": 5.672812135355892, "grad_norm": 0.06005859375, "learning_rate": 7.904141175967635e-05, "loss": 0.0006, "step": 12154 }, { "epoch": 5.673278879813302, "grad_norm": 0.357421875, "learning_rate": 7.902706439712e-05, "loss": 0.0019, "step": 12155 }, { "epoch": 5.673745624270712, "grad_norm": 0.267578125, "learning_rate": 7.901271748613422e-05, "loss": 0.0038, "step": 12156 }, { "epoch": 5.674212368728122, "grad_norm": 0.0849609375, "learning_rate": 7.899837102702794e-05, "loss": 0.0006, "step": 12157 }, { "epoch": 5.674679113185531, "grad_norm": 0.07666015625, "learning_rate": 7.898402502011002e-05, "loss": 0.0007, "step": 12158 }, { "epoch": 5.67514585764294, "grad_norm": 0.330078125, "learning_rate": 7.896967946568936e-05, "loss": 0.0019, "step": 12159 }, { "epoch": 5.67561260210035, "grad_norm": 0.080078125, "learning_rate": 7.895533436407483e-05, "loss": 0.0053, "step": 12160 }, { "epoch": 5.67607934655776, "grad_norm": 0.173828125, "learning_rate": 7.894098971557531e-05, "loss": 0.0007, "step": 12161 }, { "epoch": 5.676546091015169, "grad_norm": 0.08935546875, "learning_rate": 7.892664552049963e-05, "loss": 0.0006, "step": 12162 }, { "epoch": 5.677012835472579, "grad_norm": 0.12353515625, "learning_rate": 7.891230177915666e-05, "loss": 0.0032, "step": 12163 }, { "epoch": 5.677479579929988, "grad_norm": 0.1357421875, "learning_rate": 7.889795849185524e-05, "loss": 0.0031, "step": 12164 }, { "epoch": 5.677946324387398, "grad_norm": 0.04833984375, "learning_rate": 7.888361565890417e-05, "loss": 0.0005, "step": 12165 }, { "epoch": 5.678413068844807, "grad_norm": 0.123046875, "learning_rate": 7.886927328061228e-05, "loss": 0.0005, "step": 12166 }, { "epoch": 5.678879813302217, "grad_norm": 0.0289306640625, "learning_rate": 7.885493135728841e-05, "loss": 0.0004, "step": 12167 }, { "epoch": 5.679346557759627, "grad_norm": 0.1826171875, "learning_rate": 7.884058988924132e-05, "loss": 0.0011, "step": 12168 }, { "epoch": 5.6798133022170365, "grad_norm": 0.25, "learning_rate": 7.882624887677979e-05, "loss": 0.0012, "step": 12169 }, { "epoch": 5.680280046674445, "grad_norm": 0.119140625, "learning_rate": 7.881190832021264e-05, "loss": 0.0029, "step": 12170 }, { "epoch": 5.680746791131855, "grad_norm": 0.02734375, "learning_rate": 7.87975682198486e-05, "loss": 0.0004, "step": 12171 }, { "epoch": 5.681213535589265, "grad_norm": 0.0267333984375, "learning_rate": 7.878322857599641e-05, "loss": 0.0003, "step": 12172 }, { "epoch": 5.681680280046675, "grad_norm": 0.0234375, "learning_rate": 7.876888938896494e-05, "loss": 0.0003, "step": 12173 }, { "epoch": 5.682147024504084, "grad_norm": 0.27734375, "learning_rate": 7.875455065906275e-05, "loss": 0.0011, "step": 12174 }, { "epoch": 5.6826137689614935, "grad_norm": 0.02294921875, "learning_rate": 7.874021238659869e-05, "loss": 0.0004, "step": 12175 }, { "epoch": 5.683080513418903, "grad_norm": 0.10302734375, "learning_rate": 7.872587457188144e-05, "loss": 0.0004, "step": 12176 }, { "epoch": 5.683547257876313, "grad_norm": 0.193359375, "learning_rate": 7.87115372152197e-05, "loss": 0.0041, "step": 12177 }, { "epoch": 5.684014002333722, "grad_norm": 0.10888671875, "learning_rate": 7.86972003169222e-05, "loss": 0.0005, "step": 12178 }, { "epoch": 5.684480746791132, "grad_norm": 0.11083984375, "learning_rate": 7.868286387729762e-05, "loss": 0.0009, "step": 12179 }, { "epoch": 5.684947491248542, "grad_norm": 0.09765625, "learning_rate": 7.866852789665463e-05, "loss": 0.0009, "step": 12180 }, { "epoch": 5.6854142357059505, "grad_norm": 0.1328125, "learning_rate": 7.865419237530187e-05, "loss": 0.0006, "step": 12181 }, { "epoch": 5.68588098016336, "grad_norm": 0.10400390625, "learning_rate": 7.863985731354806e-05, "loss": 0.0006, "step": 12182 }, { "epoch": 5.68634772462077, "grad_norm": 0.1484375, "learning_rate": 7.862552271170183e-05, "loss": 0.0023, "step": 12183 }, { "epoch": 5.68681446907818, "grad_norm": 0.1103515625, "learning_rate": 7.861118857007177e-05, "loss": 0.0005, "step": 12184 }, { "epoch": 5.68728121353559, "grad_norm": 0.07958984375, "learning_rate": 7.859685488896659e-05, "loss": 0.0007, "step": 12185 }, { "epoch": 5.687747957992999, "grad_norm": 0.09375, "learning_rate": 7.858252166869487e-05, "loss": 0.0005, "step": 12186 }, { "epoch": 5.688214702450408, "grad_norm": 0.04443359375, "learning_rate": 7.85681889095652e-05, "loss": 0.0004, "step": 12187 }, { "epoch": 5.688681446907818, "grad_norm": 0.08154296875, "learning_rate": 7.85538566118862e-05, "loss": 0.0006, "step": 12188 }, { "epoch": 5.689148191365227, "grad_norm": 0.359375, "learning_rate": 7.853952477596648e-05, "loss": 0.0012, "step": 12189 }, { "epoch": 5.689614935822637, "grad_norm": 0.130859375, "learning_rate": 7.852519340211462e-05, "loss": 0.0033, "step": 12190 }, { "epoch": 5.690081680280047, "grad_norm": 0.056396484375, "learning_rate": 7.851086249063912e-05, "loss": 0.0029, "step": 12191 }, { "epoch": 5.6905484247374565, "grad_norm": 0.07177734375, "learning_rate": 7.849653204184864e-05, "loss": 0.0007, "step": 12192 }, { "epoch": 5.691015169194866, "grad_norm": 0.09375, "learning_rate": 7.848220205605168e-05, "loss": 0.0008, "step": 12193 }, { "epoch": 5.691481913652275, "grad_norm": 0.1025390625, "learning_rate": 7.846787253355675e-05, "loss": 0.0005, "step": 12194 }, { "epoch": 5.691948658109685, "grad_norm": 0.1826171875, "learning_rate": 7.845354347467244e-05, "loss": 0.004, "step": 12195 }, { "epoch": 5.692415402567095, "grad_norm": 0.1953125, "learning_rate": 7.843921487970725e-05, "loss": 0.0035, "step": 12196 }, { "epoch": 5.692882147024504, "grad_norm": 0.2109375, "learning_rate": 7.842488674896968e-05, "loss": 0.0012, "step": 12197 }, { "epoch": 5.6933488914819135, "grad_norm": 0.30078125, "learning_rate": 7.841055908276822e-05, "loss": 0.0015, "step": 12198 }, { "epoch": 5.693815635939323, "grad_norm": 0.060302734375, "learning_rate": 7.83962318814114e-05, "loss": 0.0004, "step": 12199 }, { "epoch": 5.694282380396733, "grad_norm": 0.0194091796875, "learning_rate": 7.838190514520767e-05, "loss": 0.0002, "step": 12200 }, { "epoch": 5.694749124854142, "grad_norm": 0.05712890625, "learning_rate": 7.836757887446548e-05, "loss": 0.0005, "step": 12201 }, { "epoch": 5.695215869311552, "grad_norm": 0.158203125, "learning_rate": 7.835325306949335e-05, "loss": 0.0008, "step": 12202 }, { "epoch": 5.695682613768962, "grad_norm": 0.0615234375, "learning_rate": 7.833892773059971e-05, "loss": 0.0005, "step": 12203 }, { "epoch": 5.696149358226371, "grad_norm": 0.1220703125, "learning_rate": 7.832460285809296e-05, "loss": 0.0013, "step": 12204 }, { "epoch": 5.69661610268378, "grad_norm": 0.130859375, "learning_rate": 7.831027845228158e-05, "loss": 0.0009, "step": 12205 }, { "epoch": 5.69708284714119, "grad_norm": 0.052001953125, "learning_rate": 7.829595451347397e-05, "loss": 0.0004, "step": 12206 }, { "epoch": 5.6975495915986, "grad_norm": 0.0595703125, "learning_rate": 7.828163104197854e-05, "loss": 0.0004, "step": 12207 }, { "epoch": 5.69801633605601, "grad_norm": 0.040283203125, "learning_rate": 7.826730803810367e-05, "loss": 0.0004, "step": 12208 }, { "epoch": 5.698483080513419, "grad_norm": 0.08642578125, "learning_rate": 7.825298550215778e-05, "loss": 0.0007, "step": 12209 }, { "epoch": 5.698949824970828, "grad_norm": 0.07568359375, "learning_rate": 7.823866343444926e-05, "loss": 0.0033, "step": 12210 }, { "epoch": 5.699416569428238, "grad_norm": 0.037841796875, "learning_rate": 7.822434183528642e-05, "loss": 0.0004, "step": 12211 }, { "epoch": 5.699883313885648, "grad_norm": 0.09130859375, "learning_rate": 7.821002070497769e-05, "loss": 0.0006, "step": 12212 }, { "epoch": 5.700350058343057, "grad_norm": 0.142578125, "learning_rate": 7.81957000438314e-05, "loss": 0.001, "step": 12213 }, { "epoch": 5.700816802800467, "grad_norm": 0.12255859375, "learning_rate": 7.818137985215586e-05, "loss": 0.001, "step": 12214 }, { "epoch": 5.7012835472578764, "grad_norm": 0.134765625, "learning_rate": 7.816706013025943e-05, "loss": 0.0006, "step": 12215 }, { "epoch": 5.701750291715286, "grad_norm": 0.068359375, "learning_rate": 7.815274087845042e-05, "loss": 0.0006, "step": 12216 }, { "epoch": 5.702217036172695, "grad_norm": 0.10302734375, "learning_rate": 7.813842209703715e-05, "loss": 0.0008, "step": 12217 }, { "epoch": 5.702683780630105, "grad_norm": 0.42578125, "learning_rate": 7.812410378632787e-05, "loss": 0.0034, "step": 12218 }, { "epoch": 5.703150525087515, "grad_norm": 0.0888671875, "learning_rate": 7.810978594663093e-05, "loss": 0.0004, "step": 12219 }, { "epoch": 5.7036172695449245, "grad_norm": 0.21875, "learning_rate": 7.80954685782546e-05, "loss": 0.0016, "step": 12220 }, { "epoch": 5.7040840140023334, "grad_norm": 0.359375, "learning_rate": 7.808115168150714e-05, "loss": 0.002, "step": 12221 }, { "epoch": 5.704550758459743, "grad_norm": 0.1494140625, "learning_rate": 7.80668352566968e-05, "loss": 0.001, "step": 12222 }, { "epoch": 5.705017502917153, "grad_norm": 0.22265625, "learning_rate": 7.80525193041318e-05, "loss": 0.0022, "step": 12223 }, { "epoch": 5.705484247374562, "grad_norm": 0.04296875, "learning_rate": 7.803820382412044e-05, "loss": 0.0004, "step": 12224 }, { "epoch": 5.705950991831972, "grad_norm": 0.0291748046875, "learning_rate": 7.802388881697092e-05, "loss": 0.0003, "step": 12225 }, { "epoch": 5.7064177362893815, "grad_norm": 0.31640625, "learning_rate": 7.800957428299142e-05, "loss": 0.0019, "step": 12226 }, { "epoch": 5.706884480746791, "grad_norm": 0.040771484375, "learning_rate": 7.799526022249022e-05, "loss": 0.0003, "step": 12227 }, { "epoch": 5.707351225204201, "grad_norm": 0.181640625, "learning_rate": 7.798094663577546e-05, "loss": 0.001, "step": 12228 }, { "epoch": 5.70781796966161, "grad_norm": 0.1494140625, "learning_rate": 7.796663352315537e-05, "loss": 0.0007, "step": 12229 }, { "epoch": 5.70828471411902, "grad_norm": 0.0185546875, "learning_rate": 7.795232088493809e-05, "loss": 0.0002, "step": 12230 }, { "epoch": 5.70875145857643, "grad_norm": 0.1025390625, "learning_rate": 7.793800872143182e-05, "loss": 0.0009, "step": 12231 }, { "epoch": 5.7092182030338385, "grad_norm": 0.049072265625, "learning_rate": 7.792369703294469e-05, "loss": 0.0005, "step": 12232 }, { "epoch": 5.709684947491248, "grad_norm": 0.0927734375, "learning_rate": 7.790938581978484e-05, "loss": 0.0004, "step": 12233 }, { "epoch": 5.710151691948658, "grad_norm": 0.126953125, "learning_rate": 7.789507508226045e-05, "loss": 0.0006, "step": 12234 }, { "epoch": 5.710618436406068, "grad_norm": 0.09326171875, "learning_rate": 7.788076482067961e-05, "loss": 0.0005, "step": 12235 }, { "epoch": 5.711085180863478, "grad_norm": 0.1748046875, "learning_rate": 7.786645503535044e-05, "loss": 0.0037, "step": 12236 }, { "epoch": 5.711551925320887, "grad_norm": 0.0595703125, "learning_rate": 7.785214572658106e-05, "loss": 0.0004, "step": 12237 }, { "epoch": 5.712018669778296, "grad_norm": 0.205078125, "learning_rate": 7.783783689467957e-05, "loss": 0.0025, "step": 12238 }, { "epoch": 5.712485414235706, "grad_norm": 0.033447265625, "learning_rate": 7.782352853995402e-05, "loss": 0.0003, "step": 12239 }, { "epoch": 5.712952158693115, "grad_norm": 0.1669921875, "learning_rate": 7.78092206627125e-05, "loss": 0.001, "step": 12240 }, { "epoch": 5.713418903150525, "grad_norm": 0.16796875, "learning_rate": 7.779491326326309e-05, "loss": 0.001, "step": 12241 }, { "epoch": 5.713885647607935, "grad_norm": 0.040283203125, "learning_rate": 7.778060634191384e-05, "loss": 0.0004, "step": 12242 }, { "epoch": 5.7143523920653445, "grad_norm": 0.1240234375, "learning_rate": 7.776629989897276e-05, "loss": 0.0006, "step": 12243 }, { "epoch": 5.714819136522753, "grad_norm": 0.05712890625, "learning_rate": 7.775199393474795e-05, "loss": 0.0004, "step": 12244 }, { "epoch": 5.715285880980163, "grad_norm": 0.037353515625, "learning_rate": 7.773768844954737e-05, "loss": 0.0003, "step": 12245 }, { "epoch": 5.715752625437573, "grad_norm": 0.07080078125, "learning_rate": 7.772338344367907e-05, "loss": 0.0004, "step": 12246 }, { "epoch": 5.716219369894983, "grad_norm": 0.0693359375, "learning_rate": 7.770907891745102e-05, "loss": 0.0005, "step": 12247 }, { "epoch": 5.716686114352392, "grad_norm": 0.0262451171875, "learning_rate": 7.769477487117127e-05, "loss": 0.0003, "step": 12248 }, { "epoch": 5.7171528588098015, "grad_norm": 0.17578125, "learning_rate": 7.768047130514776e-05, "loss": 0.0006, "step": 12249 }, { "epoch": 5.717619603267211, "grad_norm": 0.1328125, "learning_rate": 7.766616821968843e-05, "loss": 0.0028, "step": 12250 }, { "epoch": 5.718086347724621, "grad_norm": 0.083984375, "learning_rate": 7.76518656151013e-05, "loss": 0.0006, "step": 12251 }, { "epoch": 5.71855309218203, "grad_norm": 0.12451171875, "learning_rate": 7.763756349169431e-05, "loss": 0.0007, "step": 12252 }, { "epoch": 5.71901983663944, "grad_norm": 0.07080078125, "learning_rate": 7.762326184977537e-05, "loss": 0.0004, "step": 12253 }, { "epoch": 5.71948658109685, "grad_norm": 0.052001953125, "learning_rate": 7.760896068965244e-05, "loss": 0.0005, "step": 12254 }, { "epoch": 5.719953325554259, "grad_norm": 0.171875, "learning_rate": 7.759466001163343e-05, "loss": 0.0008, "step": 12255 }, { "epoch": 5.720420070011668, "grad_norm": 0.0634765625, "learning_rate": 7.758035981602626e-05, "loss": 0.0031, "step": 12256 }, { "epoch": 5.720886814469078, "grad_norm": 0.154296875, "learning_rate": 7.75660601031388e-05, "loss": 0.0009, "step": 12257 }, { "epoch": 5.721353558926488, "grad_norm": 0.056884765625, "learning_rate": 7.755176087327895e-05, "loss": 0.0005, "step": 12258 }, { "epoch": 5.721820303383898, "grad_norm": 0.263671875, "learning_rate": 7.753746212675463e-05, "loss": 0.001, "step": 12259 }, { "epoch": 5.722287047841307, "grad_norm": 0.083984375, "learning_rate": 7.752316386387362e-05, "loss": 0.0007, "step": 12260 }, { "epoch": 5.722753792298716, "grad_norm": 0.0179443359375, "learning_rate": 7.750886608494388e-05, "loss": 0.0003, "step": 12261 }, { "epoch": 5.723220536756126, "grad_norm": 0.0947265625, "learning_rate": 7.74945687902732e-05, "loss": 0.0005, "step": 12262 }, { "epoch": 5.723687281213536, "grad_norm": 0.044189453125, "learning_rate": 7.74802719801694e-05, "loss": 0.0005, "step": 12263 }, { "epoch": 5.724154025670945, "grad_norm": 0.0478515625, "learning_rate": 7.746597565494033e-05, "loss": 0.0004, "step": 12264 }, { "epoch": 5.724620770128355, "grad_norm": 0.053955078125, "learning_rate": 7.745167981489385e-05, "loss": 0.0005, "step": 12265 }, { "epoch": 5.7250875145857645, "grad_norm": 0.267578125, "learning_rate": 7.743738446033768e-05, "loss": 0.0016, "step": 12266 }, { "epoch": 5.725554259043173, "grad_norm": 0.1513671875, "learning_rate": 7.742308959157966e-05, "loss": 0.0005, "step": 12267 }, { "epoch": 5.726021003500583, "grad_norm": 0.1103515625, "learning_rate": 7.740879520892758e-05, "loss": 0.0049, "step": 12268 }, { "epoch": 5.726487747957993, "grad_norm": 0.2294921875, "learning_rate": 7.739450131268922e-05, "loss": 0.0035, "step": 12269 }, { "epoch": 5.726954492415403, "grad_norm": 0.236328125, "learning_rate": 7.738020790317233e-05, "loss": 0.0014, "step": 12270 }, { "epoch": 5.727421236872813, "grad_norm": 0.1865234375, "learning_rate": 7.736591498068465e-05, "loss": 0.0007, "step": 12271 }, { "epoch": 5.7278879813302215, "grad_norm": 0.08447265625, "learning_rate": 7.735162254553391e-05, "loss": 0.0084, "step": 12272 }, { "epoch": 5.728354725787631, "grad_norm": 0.08154296875, "learning_rate": 7.733733059802788e-05, "loss": 0.0032, "step": 12273 }, { "epoch": 5.728821470245041, "grad_norm": 0.0791015625, "learning_rate": 7.732303913847427e-05, "loss": 0.0004, "step": 12274 }, { "epoch": 5.72928821470245, "grad_norm": 0.03466796875, "learning_rate": 7.730874816718078e-05, "loss": 0.0004, "step": 12275 }, { "epoch": 5.72975495915986, "grad_norm": 0.212890625, "learning_rate": 7.729445768445512e-05, "loss": 0.0008, "step": 12276 }, { "epoch": 5.73022170361727, "grad_norm": 0.375, "learning_rate": 7.728016769060499e-05, "loss": 0.0014, "step": 12277 }, { "epoch": 5.730688448074679, "grad_norm": 0.09423828125, "learning_rate": 7.726587818593806e-05, "loss": 0.0006, "step": 12278 }, { "epoch": 5.731155192532089, "grad_norm": 0.0206298828125, "learning_rate": 7.725158917076196e-05, "loss": 0.0005, "step": 12279 }, { "epoch": 5.731621936989498, "grad_norm": 0.171875, "learning_rate": 7.723730064538442e-05, "loss": 0.0012, "step": 12280 }, { "epoch": 5.732088681446908, "grad_norm": 0.1748046875, "learning_rate": 7.722301261011306e-05, "loss": 0.001, "step": 12281 }, { "epoch": 5.732555425904318, "grad_norm": 0.020751953125, "learning_rate": 7.720872506525547e-05, "loss": 0.0003, "step": 12282 }, { "epoch": 5.733022170361727, "grad_norm": 0.00836181640625, "learning_rate": 7.719443801111935e-05, "loss": 0.0002, "step": 12283 }, { "epoch": 5.733488914819136, "grad_norm": 0.0242919921875, "learning_rate": 7.718015144801227e-05, "loss": 0.0003, "step": 12284 }, { "epoch": 5.733955659276546, "grad_norm": 0.05322265625, "learning_rate": 7.716586537624184e-05, "loss": 0.0005, "step": 12285 }, { "epoch": 5.734422403733956, "grad_norm": 0.07275390625, "learning_rate": 7.71515797961157e-05, "loss": 0.0006, "step": 12286 }, { "epoch": 5.734889148191365, "grad_norm": 0.0517578125, "learning_rate": 7.713729470794136e-05, "loss": 0.0003, "step": 12287 }, { "epoch": 5.735355892648775, "grad_norm": 0.205078125, "learning_rate": 7.712301011202645e-05, "loss": 0.0052, "step": 12288 }, { "epoch": 5.7358226371061845, "grad_norm": 0.034912109375, "learning_rate": 7.71087260086785e-05, "loss": 0.0004, "step": 12289 }, { "epoch": 5.736289381563594, "grad_norm": 0.064453125, "learning_rate": 7.70944423982051e-05, "loss": 0.0006, "step": 12290 }, { "epoch": 5.736756126021003, "grad_norm": 0.1396484375, "learning_rate": 7.708015928091375e-05, "loss": 0.0011, "step": 12291 }, { "epoch": 5.737222870478413, "grad_norm": 0.0233154296875, "learning_rate": 7.7065876657112e-05, "loss": 0.0003, "step": 12292 }, { "epoch": 5.737689614935823, "grad_norm": 0.03369140625, "learning_rate": 7.705159452710737e-05, "loss": 0.0004, "step": 12293 }, { "epoch": 5.7381563593932325, "grad_norm": 0.05615234375, "learning_rate": 7.70373128912074e-05, "loss": 0.0005, "step": 12294 }, { "epoch": 5.7386231038506414, "grad_norm": 0.083984375, "learning_rate": 7.702303174971952e-05, "loss": 0.0006, "step": 12295 }, { "epoch": 5.739089848308051, "grad_norm": 0.05029296875, "learning_rate": 7.700875110295128e-05, "loss": 0.0003, "step": 12296 }, { "epoch": 5.739556592765461, "grad_norm": 0.1357421875, "learning_rate": 7.699447095121015e-05, "loss": 0.0026, "step": 12297 }, { "epoch": 5.740023337222871, "grad_norm": 0.07568359375, "learning_rate": 7.698019129480358e-05, "loss": 0.0005, "step": 12298 }, { "epoch": 5.74049008168028, "grad_norm": 0.031494140625, "learning_rate": 7.696591213403901e-05, "loss": 0.0004, "step": 12299 }, { "epoch": 5.7409568261376895, "grad_norm": 0.1396484375, "learning_rate": 7.695163346922392e-05, "loss": 0.0027, "step": 12300 }, { "epoch": 5.741423570595099, "grad_norm": 0.1220703125, "learning_rate": 7.693735530066575e-05, "loss": 0.0007, "step": 12301 }, { "epoch": 5.741890315052509, "grad_norm": 0.32421875, "learning_rate": 7.692307762867188e-05, "loss": 0.0032, "step": 12302 }, { "epoch": 5.742357059509918, "grad_norm": 0.1259765625, "learning_rate": 7.690880045354977e-05, "loss": 0.0005, "step": 12303 }, { "epoch": 5.742823803967328, "grad_norm": 0.10888671875, "learning_rate": 7.689452377560682e-05, "loss": 0.0006, "step": 12304 }, { "epoch": 5.743290548424738, "grad_norm": 0.0712890625, "learning_rate": 7.688024759515038e-05, "loss": 0.0029, "step": 12305 }, { "epoch": 5.743757292882147, "grad_norm": 0.032958984375, "learning_rate": 7.686597191248789e-05, "loss": 0.0004, "step": 12306 }, { "epoch": 5.744224037339556, "grad_norm": 0.037841796875, "learning_rate": 7.685169672792668e-05, "loss": 0.0004, "step": 12307 }, { "epoch": 5.744690781796966, "grad_norm": 0.1748046875, "learning_rate": 7.683742204177414e-05, "loss": 0.0032, "step": 12308 }, { "epoch": 5.745157526254376, "grad_norm": 0.0498046875, "learning_rate": 7.682314785433756e-05, "loss": 0.0003, "step": 12309 }, { "epoch": 5.745624270711785, "grad_norm": 0.0162353515625, "learning_rate": 7.680887416592435e-05, "loss": 0.0003, "step": 12310 }, { "epoch": 5.746091015169195, "grad_norm": 0.130859375, "learning_rate": 7.679460097684182e-05, "loss": 0.0012, "step": 12311 }, { "epoch": 5.746557759626604, "grad_norm": 0.0390625, "learning_rate": 7.678032828739725e-05, "loss": 0.0023, "step": 12312 }, { "epoch": 5.747024504084014, "grad_norm": 0.040771484375, "learning_rate": 7.676605609789799e-05, "loss": 0.0003, "step": 12313 }, { "epoch": 5.747491248541424, "grad_norm": 0.056640625, "learning_rate": 7.675178440865133e-05, "loss": 0.0032, "step": 12314 }, { "epoch": 5.747957992998833, "grad_norm": 0.06396484375, "learning_rate": 7.673751321996454e-05, "loss": 0.0035, "step": 12315 }, { "epoch": 5.748424737456243, "grad_norm": 0.1689453125, "learning_rate": 7.672324253214492e-05, "loss": 0.0006, "step": 12316 }, { "epoch": 5.7488914819136525, "grad_norm": 0.03955078125, "learning_rate": 7.670897234549967e-05, "loss": 0.0004, "step": 12317 }, { "epoch": 5.749358226371061, "grad_norm": 0.09521484375, "learning_rate": 7.669470266033612e-05, "loss": 0.003, "step": 12318 }, { "epoch": 5.749824970828471, "grad_norm": 0.10986328125, "learning_rate": 7.668043347696148e-05, "loss": 0.0007, "step": 12319 }, { "epoch": 5.750291715285881, "grad_norm": 0.2177734375, "learning_rate": 7.666616479568297e-05, "loss": 0.0021, "step": 12320 }, { "epoch": 5.750758459743291, "grad_norm": 0.0311279296875, "learning_rate": 7.66518966168078e-05, "loss": 0.0003, "step": 12321 }, { "epoch": 5.751225204200701, "grad_norm": 0.0289306640625, "learning_rate": 7.663762894064324e-05, "loss": 0.0004, "step": 12322 }, { "epoch": 5.7516919486581095, "grad_norm": 0.06005859375, "learning_rate": 7.662336176749643e-05, "loss": 0.003, "step": 12323 }, { "epoch": 5.752158693115519, "grad_norm": 0.018310546875, "learning_rate": 7.660909509767455e-05, "loss": 0.0003, "step": 12324 }, { "epoch": 5.752625437572929, "grad_norm": 0.0247802734375, "learning_rate": 7.659482893148485e-05, "loss": 0.0004, "step": 12325 }, { "epoch": 5.753092182030338, "grad_norm": 0.020263671875, "learning_rate": 7.658056326923444e-05, "loss": 0.0003, "step": 12326 }, { "epoch": 5.753558926487748, "grad_norm": 0.07568359375, "learning_rate": 7.65662981112305e-05, "loss": 0.0004, "step": 12327 }, { "epoch": 5.754025670945158, "grad_norm": 0.318359375, "learning_rate": 7.655203345778012e-05, "loss": 0.0055, "step": 12328 }, { "epoch": 5.754025670945158, "eval_loss": 2.309670925140381, "eval_runtime": 55.4413, "eval_samples_per_second": 32.539, "eval_steps_per_second": 4.076, "step": 12328 }, { "epoch": 5.754492415402567, "grad_norm": 0.439453125, "learning_rate": 7.65377693091905e-05, "loss": 0.0036, "step": 12329 }, { "epoch": 5.754959159859976, "grad_norm": 0.018310546875, "learning_rate": 7.652350566576875e-05, "loss": 0.0003, "step": 12330 }, { "epoch": 5.755425904317386, "grad_norm": 0.0250244140625, "learning_rate": 7.650924252782194e-05, "loss": 0.0003, "step": 12331 }, { "epoch": 5.755892648774796, "grad_norm": 0.059814453125, "learning_rate": 7.649497989565723e-05, "loss": 0.0005, "step": 12332 }, { "epoch": 5.756359393232206, "grad_norm": 0.208984375, "learning_rate": 7.648071776958167e-05, "loss": 0.0006, "step": 12333 }, { "epoch": 5.756826137689615, "grad_norm": 0.11865234375, "learning_rate": 7.646645614990235e-05, "loss": 0.0006, "step": 12334 }, { "epoch": 5.757292882147024, "grad_norm": 0.031982421875, "learning_rate": 7.645219503692635e-05, "loss": 0.0003, "step": 12335 }, { "epoch": 5.757759626604434, "grad_norm": 0.057373046875, "learning_rate": 7.643793443096072e-05, "loss": 0.0004, "step": 12336 }, { "epoch": 5.758226371061844, "grad_norm": 0.046875, "learning_rate": 7.64236743323125e-05, "loss": 0.0005, "step": 12337 }, { "epoch": 5.758693115519253, "grad_norm": 0.049560546875, "learning_rate": 7.640941474128871e-05, "loss": 0.0004, "step": 12338 }, { "epoch": 5.759159859976663, "grad_norm": 0.07080078125, "learning_rate": 7.639515565819642e-05, "loss": 0.0007, "step": 12339 }, { "epoch": 5.7596266044340725, "grad_norm": 0.08349609375, "learning_rate": 7.638089708334261e-05, "loss": 0.0007, "step": 12340 }, { "epoch": 5.760093348891482, "grad_norm": 0.1494140625, "learning_rate": 7.636663901703428e-05, "loss": 0.0006, "step": 12341 }, { "epoch": 5.760560093348891, "grad_norm": 0.037109375, "learning_rate": 7.635238145957845e-05, "loss": 0.0003, "step": 12342 }, { "epoch": 5.761026837806301, "grad_norm": 0.10546875, "learning_rate": 7.633812441128208e-05, "loss": 0.0005, "step": 12343 }, { "epoch": 5.761493582263711, "grad_norm": 0.06982421875, "learning_rate": 7.632386787245213e-05, "loss": 0.0006, "step": 12344 }, { "epoch": 5.761960326721121, "grad_norm": 0.17578125, "learning_rate": 7.63096118433956e-05, "loss": 0.001, "step": 12345 }, { "epoch": 5.7624270711785295, "grad_norm": 0.027099609375, "learning_rate": 7.62953563244194e-05, "loss": 0.0003, "step": 12346 }, { "epoch": 5.762893815635939, "grad_norm": 0.05712890625, "learning_rate": 7.628110131583049e-05, "loss": 0.0005, "step": 12347 }, { "epoch": 5.763360560093349, "grad_norm": 0.03515625, "learning_rate": 7.626684681793577e-05, "loss": 0.0004, "step": 12348 }, { "epoch": 5.763827304550759, "grad_norm": 0.140625, "learning_rate": 7.625259283104218e-05, "loss": 0.0006, "step": 12349 }, { "epoch": 5.764294049008168, "grad_norm": 0.08251953125, "learning_rate": 7.623833935545662e-05, "loss": 0.0031, "step": 12350 }, { "epoch": 5.764760793465578, "grad_norm": 0.0361328125, "learning_rate": 7.622408639148593e-05, "loss": 0.0005, "step": 12351 }, { "epoch": 5.765227537922987, "grad_norm": 0.1357421875, "learning_rate": 7.620983393943709e-05, "loss": 0.0033, "step": 12352 }, { "epoch": 5.765694282380396, "grad_norm": 0.2177734375, "learning_rate": 7.619558199961691e-05, "loss": 0.005, "step": 12353 }, { "epoch": 5.766161026837806, "grad_norm": 0.0849609375, "learning_rate": 7.618133057233225e-05, "loss": 0.0006, "step": 12354 }, { "epoch": 5.766627771295216, "grad_norm": 0.09130859375, "learning_rate": 7.616707965788999e-05, "loss": 0.0005, "step": 12355 }, { "epoch": 5.767094515752626, "grad_norm": 0.0194091796875, "learning_rate": 7.615282925659694e-05, "loss": 0.0003, "step": 12356 }, { "epoch": 5.7675612602100355, "grad_norm": 0.09033203125, "learning_rate": 7.613857936875994e-05, "loss": 0.0039, "step": 12357 }, { "epoch": 5.768028004667444, "grad_norm": 0.0223388671875, "learning_rate": 7.612432999468577e-05, "loss": 0.0002, "step": 12358 }, { "epoch": 5.768494749124854, "grad_norm": 0.08837890625, "learning_rate": 7.61100811346813e-05, "loss": 0.0006, "step": 12359 }, { "epoch": 5.768961493582264, "grad_norm": 0.0250244140625, "learning_rate": 7.609583278905328e-05, "loss": 0.0003, "step": 12360 }, { "epoch": 5.769428238039673, "grad_norm": 0.037109375, "learning_rate": 7.608158495810849e-05, "loss": 0.0003, "step": 12361 }, { "epoch": 5.769894982497083, "grad_norm": 0.03466796875, "learning_rate": 7.606733764215373e-05, "loss": 0.0004, "step": 12362 }, { "epoch": 5.7703617269544925, "grad_norm": 0.0277099609375, "learning_rate": 7.605309084149574e-05, "loss": 0.0003, "step": 12363 }, { "epoch": 5.770828471411902, "grad_norm": 0.08203125, "learning_rate": 7.603884455644131e-05, "loss": 0.0005, "step": 12364 }, { "epoch": 5.771295215869312, "grad_norm": 0.2353515625, "learning_rate": 7.60245987872971e-05, "loss": 0.0011, "step": 12365 }, { "epoch": 5.771761960326721, "grad_norm": 0.03515625, "learning_rate": 7.601035353436987e-05, "loss": 0.0003, "step": 12366 }, { "epoch": 5.772228704784131, "grad_norm": 0.091796875, "learning_rate": 7.599610879796637e-05, "loss": 0.0026, "step": 12367 }, { "epoch": 5.7726954492415405, "grad_norm": 0.15625, "learning_rate": 7.598186457839327e-05, "loss": 0.0007, "step": 12368 }, { "epoch": 5.7731621936989495, "grad_norm": 0.06689453125, "learning_rate": 7.596762087595728e-05, "loss": 0.0004, "step": 12369 }, { "epoch": 5.773628938156359, "grad_norm": 0.021728515625, "learning_rate": 7.595337769096506e-05, "loss": 0.0003, "step": 12370 }, { "epoch": 5.774095682613769, "grad_norm": 0.0517578125, "learning_rate": 7.593913502372331e-05, "loss": 0.0004, "step": 12371 }, { "epoch": 5.774562427071179, "grad_norm": 0.1552734375, "learning_rate": 7.592489287453868e-05, "loss": 0.0008, "step": 12372 }, { "epoch": 5.775029171528588, "grad_norm": 0.357421875, "learning_rate": 7.591065124371782e-05, "loss": 0.0021, "step": 12373 }, { "epoch": 5.7754959159859975, "grad_norm": 0.03759765625, "learning_rate": 7.589641013156736e-05, "loss": 0.0004, "step": 12374 }, { "epoch": 5.775962660443407, "grad_norm": 0.09375, "learning_rate": 7.588216953839395e-05, "loss": 0.0005, "step": 12375 }, { "epoch": 5.776429404900817, "grad_norm": 0.053955078125, "learning_rate": 7.586792946450417e-05, "loss": 0.0005, "step": 12376 }, { "epoch": 5.776896149358226, "grad_norm": 0.1865234375, "learning_rate": 7.585368991020463e-05, "loss": 0.0008, "step": 12377 }, { "epoch": 5.777362893815636, "grad_norm": 0.01336669921875, "learning_rate": 7.583945087580196e-05, "loss": 0.0003, "step": 12378 }, { "epoch": 5.777829638273046, "grad_norm": 0.0458984375, "learning_rate": 7.582521236160272e-05, "loss": 0.0022, "step": 12379 }, { "epoch": 5.778296382730455, "grad_norm": 0.0213623046875, "learning_rate": 7.581097436791347e-05, "loss": 0.0002, "step": 12380 }, { "epoch": 5.778763127187864, "grad_norm": 0.035400390625, "learning_rate": 7.579673689504079e-05, "loss": 0.0012, "step": 12381 }, { "epoch": 5.779229871645274, "grad_norm": 0.216796875, "learning_rate": 7.578249994329122e-05, "loss": 0.0006, "step": 12382 }, { "epoch": 5.779696616102684, "grad_norm": 0.06591796875, "learning_rate": 7.576826351297127e-05, "loss": 0.0004, "step": 12383 }, { "epoch": 5.780163360560094, "grad_norm": 0.1865234375, "learning_rate": 7.575402760438752e-05, "loss": 0.0005, "step": 12384 }, { "epoch": 5.780630105017503, "grad_norm": 0.07861328125, "learning_rate": 7.573979221784645e-05, "loss": 0.0007, "step": 12385 }, { "epoch": 5.781096849474912, "grad_norm": 0.0400390625, "learning_rate": 7.572555735365457e-05, "loss": 0.0003, "step": 12386 }, { "epoch": 5.781563593932322, "grad_norm": 0.09228515625, "learning_rate": 7.571132301211836e-05, "loss": 0.0006, "step": 12387 }, { "epoch": 5.782030338389731, "grad_norm": 0.0113525390625, "learning_rate": 7.569708919354433e-05, "loss": 0.0002, "step": 12388 }, { "epoch": 5.782497082847141, "grad_norm": 0.041259765625, "learning_rate": 7.568285589823895e-05, "loss": 0.0003, "step": 12389 }, { "epoch": 5.782963827304551, "grad_norm": 0.0294189453125, "learning_rate": 7.566862312650862e-05, "loss": 0.0021, "step": 12390 }, { "epoch": 5.7834305717619605, "grad_norm": 0.1884765625, "learning_rate": 7.565439087865986e-05, "loss": 0.0006, "step": 12391 }, { "epoch": 5.78389731621937, "grad_norm": 0.16015625, "learning_rate": 7.564015915499908e-05, "loss": 0.0034, "step": 12392 }, { "epoch": 5.784364060676779, "grad_norm": 0.032470703125, "learning_rate": 7.562592795583268e-05, "loss": 0.0004, "step": 12393 }, { "epoch": 5.784830805134189, "grad_norm": 0.0478515625, "learning_rate": 7.561169728146711e-05, "loss": 0.0004, "step": 12394 }, { "epoch": 5.785297549591599, "grad_norm": 0.021484375, "learning_rate": 7.559746713220877e-05, "loss": 0.0004, "step": 12395 }, { "epoch": 5.785764294049008, "grad_norm": 0.037353515625, "learning_rate": 7.558323750836401e-05, "loss": 0.0003, "step": 12396 }, { "epoch": 5.7862310385064175, "grad_norm": 0.0390625, "learning_rate": 7.556900841023924e-05, "loss": 0.0004, "step": 12397 }, { "epoch": 5.786697782963827, "grad_norm": 0.123046875, "learning_rate": 7.555477983814083e-05, "loss": 0.0006, "step": 12398 }, { "epoch": 5.787164527421237, "grad_norm": 0.09521484375, "learning_rate": 7.554055179237515e-05, "loss": 0.0024, "step": 12399 }, { "epoch": 5.787631271878647, "grad_norm": 0.03662109375, "learning_rate": 7.55263242732485e-05, "loss": 0.0003, "step": 12400 }, { "epoch": 5.788098016336056, "grad_norm": 0.0172119140625, "learning_rate": 7.551209728106727e-05, "loss": 0.0002, "step": 12401 }, { "epoch": 5.788564760793466, "grad_norm": 0.0703125, "learning_rate": 7.549787081613774e-05, "loss": 0.0005, "step": 12402 }, { "epoch": 5.789031505250875, "grad_norm": 0.15625, "learning_rate": 7.548364487876623e-05, "loss": 0.0008, "step": 12403 }, { "epoch": 5.789498249708284, "grad_norm": 0.03564453125, "learning_rate": 7.546941946925906e-05, "loss": 0.0003, "step": 12404 }, { "epoch": 5.789964994165694, "grad_norm": 0.0303955078125, "learning_rate": 7.54551945879225e-05, "loss": 0.0023, "step": 12405 }, { "epoch": 5.790431738623104, "grad_norm": 0.06396484375, "learning_rate": 7.544097023506285e-05, "loss": 0.0004, "step": 12406 }, { "epoch": 5.790898483080514, "grad_norm": 0.072265625, "learning_rate": 7.542674641098633e-05, "loss": 0.0006, "step": 12407 }, { "epoch": 5.791365227537923, "grad_norm": 0.921875, "learning_rate": 7.541252311599925e-05, "loss": 0.0024, "step": 12408 }, { "epoch": 5.791831971995332, "grad_norm": 0.1474609375, "learning_rate": 7.539830035040782e-05, "loss": 0.0006, "step": 12409 }, { "epoch": 5.792298716452742, "grad_norm": 0.0263671875, "learning_rate": 7.538407811451825e-05, "loss": 0.0003, "step": 12410 }, { "epoch": 5.792765460910152, "grad_norm": 0.11474609375, "learning_rate": 7.536985640863683e-05, "loss": 0.0004, "step": 12411 }, { "epoch": 5.793232205367561, "grad_norm": 0.04833984375, "learning_rate": 7.535563523306974e-05, "loss": 0.0004, "step": 12412 }, { "epoch": 5.793698949824971, "grad_norm": 0.27734375, "learning_rate": 7.534141458812315e-05, "loss": 0.0011, "step": 12413 }, { "epoch": 5.7941656942823805, "grad_norm": 0.166015625, "learning_rate": 7.532719447410326e-05, "loss": 0.0013, "step": 12414 }, { "epoch": 5.79463243873979, "grad_norm": 0.046875, "learning_rate": 7.531297489131623e-05, "loss": 0.0003, "step": 12415 }, { "epoch": 5.795099183197199, "grad_norm": 0.035400390625, "learning_rate": 7.529875584006826e-05, "loss": 0.0003, "step": 12416 }, { "epoch": 5.795565927654609, "grad_norm": 0.07421875, "learning_rate": 7.528453732066548e-05, "loss": 0.0005, "step": 12417 }, { "epoch": 5.796032672112019, "grad_norm": 0.181640625, "learning_rate": 7.527031933341404e-05, "loss": 0.0006, "step": 12418 }, { "epoch": 5.796499416569429, "grad_norm": 0.0859375, "learning_rate": 7.525610187862004e-05, "loss": 0.0005, "step": 12419 }, { "epoch": 5.7969661610268375, "grad_norm": 0.0390625, "learning_rate": 7.524188495658964e-05, "loss": 0.0005, "step": 12420 }, { "epoch": 5.797432905484247, "grad_norm": 0.3515625, "learning_rate": 7.522766856762893e-05, "loss": 0.0011, "step": 12421 }, { "epoch": 5.797899649941657, "grad_norm": 0.072265625, "learning_rate": 7.521345271204397e-05, "loss": 0.0033, "step": 12422 }, { "epoch": 5.798366394399067, "grad_norm": 0.0498046875, "learning_rate": 7.51992373901409e-05, "loss": 0.0027, "step": 12423 }, { "epoch": 5.798833138856476, "grad_norm": 0.0242919921875, "learning_rate": 7.518502260222577e-05, "loss": 0.0003, "step": 12424 }, { "epoch": 5.799299883313886, "grad_norm": 0.0284423828125, "learning_rate": 7.517080834860462e-05, "loss": 0.0003, "step": 12425 }, { "epoch": 5.799766627771295, "grad_norm": 0.1708984375, "learning_rate": 7.515659462958354e-05, "loss": 0.0007, "step": 12426 }, { "epoch": 5.800233372228705, "grad_norm": 0.049072265625, "learning_rate": 7.514238144546852e-05, "loss": 0.0004, "step": 12427 }, { "epoch": 5.800700116686114, "grad_norm": 0.1865234375, "learning_rate": 7.512816879656562e-05, "loss": 0.0039, "step": 12428 }, { "epoch": 5.801166861143524, "grad_norm": 0.109375, "learning_rate": 7.511395668318082e-05, "loss": 0.0005, "step": 12429 }, { "epoch": 5.801633605600934, "grad_norm": 0.09619140625, "learning_rate": 7.509974510562017e-05, "loss": 0.0007, "step": 12430 }, { "epoch": 5.802100350058343, "grad_norm": 0.087890625, "learning_rate": 7.508553406418962e-05, "loss": 0.0007, "step": 12431 }, { "epoch": 5.802567094515752, "grad_norm": 0.0615234375, "learning_rate": 7.507132355919516e-05, "loss": 0.0043, "step": 12432 }, { "epoch": 5.803033838973162, "grad_norm": 0.041748046875, "learning_rate": 7.505711359094277e-05, "loss": 0.0003, "step": 12433 }, { "epoch": 5.803500583430572, "grad_norm": 0.0771484375, "learning_rate": 7.504290415973842e-05, "loss": 0.0005, "step": 12434 }, { "epoch": 5.803967327887982, "grad_norm": 0.068359375, "learning_rate": 7.502869526588802e-05, "loss": 0.0023, "step": 12435 }, { "epoch": 5.804434072345391, "grad_norm": 0.035888671875, "learning_rate": 7.50144869096975e-05, "loss": 0.0003, "step": 12436 }, { "epoch": 5.8049008168028005, "grad_norm": 0.111328125, "learning_rate": 7.50002790914728e-05, "loss": 0.0005, "step": 12437 }, { "epoch": 5.80536756126021, "grad_norm": 0.1513671875, "learning_rate": 7.498607181151984e-05, "loss": 0.0007, "step": 12438 }, { "epoch": 5.805834305717619, "grad_norm": 0.134765625, "learning_rate": 7.49718650701445e-05, "loss": 0.0029, "step": 12439 }, { "epoch": 5.806301050175029, "grad_norm": 0.032958984375, "learning_rate": 7.495765886765269e-05, "loss": 0.0004, "step": 12440 }, { "epoch": 5.806767794632439, "grad_norm": 0.66015625, "learning_rate": 7.494345320435026e-05, "loss": 0.0147, "step": 12441 }, { "epoch": 5.8072345390898485, "grad_norm": 0.03857421875, "learning_rate": 7.492924808054307e-05, "loss": 0.0006, "step": 12442 }, { "epoch": 5.807701283547258, "grad_norm": 0.125, "learning_rate": 7.491504349653699e-05, "loss": 0.0007, "step": 12443 }, { "epoch": 5.808168028004667, "grad_norm": 0.283203125, "learning_rate": 7.490083945263789e-05, "loss": 0.0013, "step": 12444 }, { "epoch": 5.808634772462077, "grad_norm": 0.1259765625, "learning_rate": 7.488663594915154e-05, "loss": 0.0062, "step": 12445 }, { "epoch": 5.809101516919487, "grad_norm": 0.193359375, "learning_rate": 7.487243298638375e-05, "loss": 0.0031, "step": 12446 }, { "epoch": 5.809568261376896, "grad_norm": 0.1669921875, "learning_rate": 7.485823056464041e-05, "loss": 0.0008, "step": 12447 }, { "epoch": 5.8100350058343055, "grad_norm": 0.1591796875, "learning_rate": 7.484402868422724e-05, "loss": 0.0008, "step": 12448 }, { "epoch": 5.810501750291715, "grad_norm": 0.04296875, "learning_rate": 7.482982734545003e-05, "loss": 0.0029, "step": 12449 }, { "epoch": 5.810968494749125, "grad_norm": 0.1298828125, "learning_rate": 7.481562654861458e-05, "loss": 0.0008, "step": 12450 }, { "epoch": 5.811435239206534, "grad_norm": 0.111328125, "learning_rate": 7.480142629402663e-05, "loss": 0.0006, "step": 12451 }, { "epoch": 5.811901983663944, "grad_norm": 0.10302734375, "learning_rate": 7.478722658199193e-05, "loss": 0.0009, "step": 12452 }, { "epoch": 5.812368728121354, "grad_norm": 0.271484375, "learning_rate": 7.477302741281621e-05, "loss": 0.0056, "step": 12453 }, { "epoch": 5.812835472578763, "grad_norm": 0.27734375, "learning_rate": 7.475882878680521e-05, "loss": 0.0016, "step": 12454 }, { "epoch": 5.813302217036172, "grad_norm": 0.0498046875, "learning_rate": 7.474463070426463e-05, "loss": 0.0004, "step": 12455 }, { "epoch": 5.813768961493582, "grad_norm": 0.06640625, "learning_rate": 7.473043316550016e-05, "loss": 0.0031, "step": 12456 }, { "epoch": 5.814235705950992, "grad_norm": 0.0218505859375, "learning_rate": 7.471623617081752e-05, "loss": 0.0003, "step": 12457 }, { "epoch": 5.814702450408402, "grad_norm": 0.2734375, "learning_rate": 7.470203972052237e-05, "loss": 0.0008, "step": 12458 }, { "epoch": 5.815169194865811, "grad_norm": 0.0712890625, "learning_rate": 7.468784381492033e-05, "loss": 0.0006, "step": 12459 }, { "epoch": 5.81563593932322, "grad_norm": 0.1708984375, "learning_rate": 7.467364845431717e-05, "loss": 0.0009, "step": 12460 }, { "epoch": 5.81610268378063, "grad_norm": 0.01611328125, "learning_rate": 7.46594536390184e-05, "loss": 0.0003, "step": 12461 }, { "epoch": 5.81656942823804, "grad_norm": 0.13671875, "learning_rate": 7.464525936932974e-05, "loss": 0.0007, "step": 12462 }, { "epoch": 5.817036172695449, "grad_norm": 0.05224609375, "learning_rate": 7.463106564555678e-05, "loss": 0.0006, "step": 12463 }, { "epoch": 5.817502917152859, "grad_norm": 0.0556640625, "learning_rate": 7.461687246800509e-05, "loss": 0.0004, "step": 12464 }, { "epoch": 5.8179696616102685, "grad_norm": 0.16015625, "learning_rate": 7.460267983698033e-05, "loss": 0.0008, "step": 12465 }, { "epoch": 5.818436406067678, "grad_norm": 0.1484375, "learning_rate": 7.458848775278804e-05, "loss": 0.0007, "step": 12466 }, { "epoch": 5.818903150525087, "grad_norm": 0.0196533203125, "learning_rate": 7.45742962157338e-05, "loss": 0.0003, "step": 12467 }, { "epoch": 5.819369894982497, "grad_norm": 0.0859375, "learning_rate": 7.456010522612316e-05, "loss": 0.0008, "step": 12468 }, { "epoch": 5.819836639439907, "grad_norm": 0.0400390625, "learning_rate": 7.45459147842617e-05, "loss": 0.002, "step": 12469 }, { "epoch": 5.820303383897317, "grad_norm": 0.166015625, "learning_rate": 7.453172489045495e-05, "loss": 0.0007, "step": 12470 }, { "epoch": 5.8207701283547255, "grad_norm": 0.0299072265625, "learning_rate": 7.451753554500838e-05, "loss": 0.0004, "step": 12471 }, { "epoch": 5.821236872812135, "grad_norm": 0.03662109375, "learning_rate": 7.450334674822757e-05, "loss": 0.0003, "step": 12472 }, { "epoch": 5.821703617269545, "grad_norm": 0.0908203125, "learning_rate": 7.448915850041799e-05, "loss": 0.0008, "step": 12473 }, { "epoch": 5.822170361726954, "grad_norm": 0.0262451171875, "learning_rate": 7.447497080188511e-05, "loss": 0.0003, "step": 12474 }, { "epoch": 5.822637106184364, "grad_norm": 0.11376953125, "learning_rate": 7.446078365293446e-05, "loss": 0.0005, "step": 12475 }, { "epoch": 5.823103850641774, "grad_norm": 0.037353515625, "learning_rate": 7.444659705387147e-05, "loss": 0.0004, "step": 12476 }, { "epoch": 5.823570595099183, "grad_norm": 0.30859375, "learning_rate": 7.44324110050016e-05, "loss": 0.0036, "step": 12477 }, { "epoch": 5.824037339556593, "grad_norm": 0.07080078125, "learning_rate": 7.441822550663027e-05, "loss": 0.0005, "step": 12478 }, { "epoch": 5.824504084014002, "grad_norm": 0.357421875, "learning_rate": 7.440404055906293e-05, "loss": 0.005, "step": 12479 }, { "epoch": 5.824970828471412, "grad_norm": 0.1494140625, "learning_rate": 7.438985616260503e-05, "loss": 0.0018, "step": 12480 }, { "epoch": 5.825437572928822, "grad_norm": 0.04296875, "learning_rate": 7.437567231756188e-05, "loss": 0.0004, "step": 12481 }, { "epoch": 5.825904317386231, "grad_norm": 0.01953125, "learning_rate": 7.436148902423898e-05, "loss": 0.0003, "step": 12482 }, { "epoch": 5.82637106184364, "grad_norm": 0.044189453125, "learning_rate": 7.434730628294167e-05, "loss": 0.0003, "step": 12483 }, { "epoch": 5.82683780630105, "grad_norm": 0.068359375, "learning_rate": 7.43331240939753e-05, "loss": 0.0006, "step": 12484 }, { "epoch": 5.82730455075846, "grad_norm": 0.0703125, "learning_rate": 7.431894245764525e-05, "loss": 0.0005, "step": 12485 }, { "epoch": 5.82777129521587, "grad_norm": 0.05810546875, "learning_rate": 7.430476137425688e-05, "loss": 0.0036, "step": 12486 }, { "epoch": 5.828238039673279, "grad_norm": 0.2470703125, "learning_rate": 7.42905808441155e-05, "loss": 0.0015, "step": 12487 }, { "epoch": 5.8287047841306885, "grad_norm": 0.043212890625, "learning_rate": 7.427640086752641e-05, "loss": 0.0004, "step": 12488 }, { "epoch": 5.829171528588098, "grad_norm": 0.1875, "learning_rate": 7.426222144479497e-05, "loss": 0.0009, "step": 12489 }, { "epoch": 5.829638273045507, "grad_norm": 0.10302734375, "learning_rate": 7.424804257622648e-05, "loss": 0.0005, "step": 12490 }, { "epoch": 5.830105017502917, "grad_norm": 0.0291748046875, "learning_rate": 7.423386426212617e-05, "loss": 0.0003, "step": 12491 }, { "epoch": 5.830571761960327, "grad_norm": 0.0172119140625, "learning_rate": 7.421968650279938e-05, "loss": 0.0003, "step": 12492 }, { "epoch": 5.831038506417737, "grad_norm": 0.0240478515625, "learning_rate": 7.420550929855133e-05, "loss": 0.0003, "step": 12493 }, { "epoch": 5.8315052508751455, "grad_norm": 0.07568359375, "learning_rate": 7.419133264968729e-05, "loss": 0.0004, "step": 12494 }, { "epoch": 5.831971995332555, "grad_norm": 0.0322265625, "learning_rate": 7.417715655651247e-05, "loss": 0.0004, "step": 12495 }, { "epoch": 5.832438739789965, "grad_norm": 0.443359375, "learning_rate": 7.416298101933216e-05, "loss": 0.0014, "step": 12496 }, { "epoch": 5.832905484247375, "grad_norm": 0.10693359375, "learning_rate": 7.414880603845152e-05, "loss": 0.0005, "step": 12497 }, { "epoch": 5.833372228704784, "grad_norm": 0.052490234375, "learning_rate": 7.413463161417575e-05, "loss": 0.0004, "step": 12498 }, { "epoch": 5.833838973162194, "grad_norm": 0.09619140625, "learning_rate": 7.412045774681006e-05, "loss": 0.0006, "step": 12499 }, { "epoch": 5.834305717619603, "grad_norm": 0.03173828125, "learning_rate": 7.410628443665966e-05, "loss": 0.0003, "step": 12500 }, { "epoch": 5.834772462077013, "grad_norm": 0.013916015625, "learning_rate": 7.409211168402965e-05, "loss": 0.0003, "step": 12501 }, { "epoch": 5.835239206534422, "grad_norm": 0.205078125, "learning_rate": 7.407793948922523e-05, "loss": 0.0006, "step": 12502 }, { "epoch": 5.835705950991832, "grad_norm": 0.1728515625, "learning_rate": 7.406376785255156e-05, "loss": 0.0007, "step": 12503 }, { "epoch": 5.836172695449242, "grad_norm": 0.031982421875, "learning_rate": 7.404959677431373e-05, "loss": 0.0003, "step": 12504 }, { "epoch": 5.8366394399066515, "grad_norm": 0.039794921875, "learning_rate": 7.403542625481685e-05, "loss": 0.0004, "step": 12505 }, { "epoch": 5.83710618436406, "grad_norm": 0.031982421875, "learning_rate": 7.402125629436608e-05, "loss": 0.0003, "step": 12506 }, { "epoch": 5.83757292882147, "grad_norm": 0.055908203125, "learning_rate": 7.400708689326649e-05, "loss": 0.0024, "step": 12507 }, { "epoch": 5.83803967327888, "grad_norm": 0.0546875, "learning_rate": 7.399291805182317e-05, "loss": 0.0004, "step": 12508 }, { "epoch": 5.83850641773629, "grad_norm": 0.1484375, "learning_rate": 7.397874977034117e-05, "loss": 0.0005, "step": 12509 }, { "epoch": 5.838973162193699, "grad_norm": 0.62109375, "learning_rate": 7.396458204912552e-05, "loss": 0.0038, "step": 12510 }, { "epoch": 5.8394399066511085, "grad_norm": 0.1025390625, "learning_rate": 7.395041488848134e-05, "loss": 0.0006, "step": 12511 }, { "epoch": 5.839906651108518, "grad_norm": 0.08203125, "learning_rate": 7.393624828871362e-05, "loss": 0.0004, "step": 12512 }, { "epoch": 5.840373395565928, "grad_norm": 0.042724609375, "learning_rate": 7.392208225012738e-05, "loss": 0.0003, "step": 12513 }, { "epoch": 5.840840140023337, "grad_norm": 0.1474609375, "learning_rate": 7.390791677302765e-05, "loss": 0.0029, "step": 12514 }, { "epoch": 5.841306884480747, "grad_norm": 0.28515625, "learning_rate": 7.389375185771943e-05, "loss": 0.0047, "step": 12515 }, { "epoch": 5.8417736289381565, "grad_norm": 0.06396484375, "learning_rate": 7.38795875045077e-05, "loss": 0.0005, "step": 12516 }, { "epoch": 5.8422403733955655, "grad_norm": 0.099609375, "learning_rate": 7.386542371369743e-05, "loss": 0.0005, "step": 12517 }, { "epoch": 5.842707117852975, "grad_norm": 0.03466796875, "learning_rate": 7.385126048559358e-05, "loss": 0.0003, "step": 12518 }, { "epoch": 5.843173862310385, "grad_norm": 0.2392578125, "learning_rate": 7.38370978205011e-05, "loss": 0.0019, "step": 12519 }, { "epoch": 5.843640606767795, "grad_norm": 0.1923828125, "learning_rate": 7.382293571872493e-05, "loss": 0.0011, "step": 12520 }, { "epoch": 5.844107351225205, "grad_norm": 0.0791015625, "learning_rate": 7.380877418057e-05, "loss": 0.0007, "step": 12521 }, { "epoch": 5.8445740956826135, "grad_norm": 0.0849609375, "learning_rate": 7.379461320634125e-05, "loss": 0.0034, "step": 12522 }, { "epoch": 5.845040840140023, "grad_norm": 0.056884765625, "learning_rate": 7.37804527963435e-05, "loss": 0.0007, "step": 12523 }, { "epoch": 5.845507584597433, "grad_norm": 0.318359375, "learning_rate": 7.376629295088173e-05, "loss": 0.0009, "step": 12524 }, { "epoch": 5.845974329054842, "grad_norm": 0.2265625, "learning_rate": 7.375213367026077e-05, "loss": 0.0029, "step": 12525 }, { "epoch": 5.846441073512252, "grad_norm": 0.283203125, "learning_rate": 7.373797495478551e-05, "loss": 0.0017, "step": 12526 }, { "epoch": 5.846907817969662, "grad_norm": 0.036865234375, "learning_rate": 7.372381680476076e-05, "loss": 0.0004, "step": 12527 }, { "epoch": 5.847374562427071, "grad_norm": 0.12353515625, "learning_rate": 7.37096592204914e-05, "loss": 0.0007, "step": 12528 }, { "epoch": 5.847841306884481, "grad_norm": 0.126953125, "learning_rate": 7.369550220228225e-05, "loss": 0.0076, "step": 12529 }, { "epoch": 5.84830805134189, "grad_norm": 0.15234375, "learning_rate": 7.368134575043811e-05, "loss": 0.0012, "step": 12530 }, { "epoch": 5.8487747957993, "grad_norm": 0.29296875, "learning_rate": 7.366718986526382e-05, "loss": 0.0043, "step": 12531 }, { "epoch": 5.84924154025671, "grad_norm": 0.0322265625, "learning_rate": 7.365303454706414e-05, "loss": 0.0022, "step": 12532 }, { "epoch": 5.849708284714119, "grad_norm": 0.04638671875, "learning_rate": 7.363887979614384e-05, "loss": 0.0003, "step": 12533 }, { "epoch": 5.850175029171528, "grad_norm": 0.248046875, "learning_rate": 7.362472561280773e-05, "loss": 0.0009, "step": 12534 }, { "epoch": 5.850641773628938, "grad_norm": 0.02392578125, "learning_rate": 7.361057199736055e-05, "loss": 0.0003, "step": 12535 }, { "epoch": 5.851108518086348, "grad_norm": 0.072265625, "learning_rate": 7.359641895010702e-05, "loss": 0.0032, "step": 12536 }, { "epoch": 5.851575262543757, "grad_norm": 0.09033203125, "learning_rate": 7.358226647135186e-05, "loss": 0.0005, "step": 12537 }, { "epoch": 5.852042007001167, "grad_norm": 0.0263671875, "learning_rate": 7.356811456139985e-05, "loss": 0.0004, "step": 12538 }, { "epoch": 5.8525087514585765, "grad_norm": 0.078125, "learning_rate": 7.355396322055565e-05, "loss": 0.0005, "step": 12539 }, { "epoch": 5.852975495915986, "grad_norm": 0.107421875, "learning_rate": 7.353981244912394e-05, "loss": 0.0007, "step": 12540 }, { "epoch": 5.853442240373395, "grad_norm": 0.27734375, "learning_rate": 7.352566224740946e-05, "loss": 0.002, "step": 12541 }, { "epoch": 5.853908984830805, "grad_norm": 0.049560546875, "learning_rate": 7.351151261571684e-05, "loss": 0.0004, "step": 12542 }, { "epoch": 5.854375729288215, "grad_norm": 0.1162109375, "learning_rate": 7.349736355435072e-05, "loss": 0.0006, "step": 12543 }, { "epoch": 5.854842473745625, "grad_norm": 0.0771484375, "learning_rate": 7.348321506361579e-05, "loss": 0.0005, "step": 12544 }, { "epoch": 5.8553092182030335, "grad_norm": 0.06787109375, "learning_rate": 7.346906714381665e-05, "loss": 0.0005, "step": 12545 }, { "epoch": 5.855775962660443, "grad_norm": 0.044677734375, "learning_rate": 7.345491979525795e-05, "loss": 0.0004, "step": 12546 }, { "epoch": 5.856242707117853, "grad_norm": 0.134765625, "learning_rate": 7.344077301824425e-05, "loss": 0.0005, "step": 12547 }, { "epoch": 5.856709451575263, "grad_norm": 0.1923828125, "learning_rate": 7.342662681308018e-05, "loss": 0.0008, "step": 12548 }, { "epoch": 5.857176196032672, "grad_norm": 0.038330078125, "learning_rate": 7.341248118007035e-05, "loss": 0.0004, "step": 12549 }, { "epoch": 5.857642940490082, "grad_norm": 0.12890625, "learning_rate": 7.339833611951925e-05, "loss": 0.0005, "step": 12550 }, { "epoch": 5.858109684947491, "grad_norm": 0.1748046875, "learning_rate": 7.338419163173153e-05, "loss": 0.0074, "step": 12551 }, { "epoch": 5.858576429404901, "grad_norm": 0.2197265625, "learning_rate": 7.337004771701168e-05, "loss": 0.0036, "step": 12552 }, { "epoch": 5.85904317386231, "grad_norm": 0.251953125, "learning_rate": 7.335590437566426e-05, "loss": 0.0028, "step": 12553 }, { "epoch": 5.85950991831972, "grad_norm": 0.1865234375, "learning_rate": 7.334176160799375e-05, "loss": 0.0011, "step": 12554 }, { "epoch": 5.85997666277713, "grad_norm": 0.115234375, "learning_rate": 7.332761941430474e-05, "loss": 0.0008, "step": 12555 }, { "epoch": 5.8604434072345395, "grad_norm": 0.0262451171875, "learning_rate": 7.331347779490165e-05, "loss": 0.0003, "step": 12556 }, { "epoch": 5.860910151691948, "grad_norm": 0.04248046875, "learning_rate": 7.3299336750089e-05, "loss": 0.0004, "step": 12557 }, { "epoch": 5.861376896149358, "grad_norm": 0.0625, "learning_rate": 7.328519628017125e-05, "loss": 0.0004, "step": 12558 }, { "epoch": 5.861843640606768, "grad_norm": 0.04443359375, "learning_rate": 7.327105638545285e-05, "loss": 0.0004, "step": 12559 }, { "epoch": 5.862310385064177, "grad_norm": 0.34765625, "learning_rate": 7.325691706623827e-05, "loss": 0.0049, "step": 12560 }, { "epoch": 5.862777129521587, "grad_norm": 0.439453125, "learning_rate": 7.324277832283195e-05, "loss": 0.002, "step": 12561 }, { "epoch": 5.8632438739789965, "grad_norm": 0.23046875, "learning_rate": 7.322864015553828e-05, "loss": 0.0076, "step": 12562 }, { "epoch": 5.863710618436406, "grad_norm": 0.1748046875, "learning_rate": 7.321450256466171e-05, "loss": 0.0012, "step": 12563 }, { "epoch": 5.864177362893816, "grad_norm": 0.0732421875, "learning_rate": 7.320036555050662e-05, "loss": 0.0003, "step": 12564 }, { "epoch": 5.864644107351225, "grad_norm": 0.0166015625, "learning_rate": 7.318622911337739e-05, "loss": 0.0003, "step": 12565 }, { "epoch": 5.865110851808635, "grad_norm": 0.08349609375, "learning_rate": 7.317209325357837e-05, "loss": 0.0037, "step": 12566 }, { "epoch": 5.865577596266045, "grad_norm": 0.0257568359375, "learning_rate": 7.315795797141398e-05, "loss": 0.0003, "step": 12567 }, { "epoch": 5.8660443407234535, "grad_norm": 0.2412109375, "learning_rate": 7.314382326718853e-05, "loss": 0.0016, "step": 12568 }, { "epoch": 5.866511085180863, "grad_norm": 0.017333984375, "learning_rate": 7.312968914120635e-05, "loss": 0.0002, "step": 12569 }, { "epoch": 5.866977829638273, "grad_norm": 0.0257568359375, "learning_rate": 7.311555559377179e-05, "loss": 0.0003, "step": 12570 }, { "epoch": 5.867444574095683, "grad_norm": 0.07177734375, "learning_rate": 7.310142262518915e-05, "loss": 0.0004, "step": 12571 }, { "epoch": 5.867911318553093, "grad_norm": 0.255859375, "learning_rate": 7.30872902357627e-05, "loss": 0.0031, "step": 12572 }, { "epoch": 5.868378063010502, "grad_norm": 0.138671875, "learning_rate": 7.307315842579676e-05, "loss": 0.0006, "step": 12573 }, { "epoch": 5.868844807467911, "grad_norm": 0.1435546875, "learning_rate": 7.305902719559562e-05, "loss": 0.0023, "step": 12574 }, { "epoch": 5.869311551925321, "grad_norm": 0.13671875, "learning_rate": 7.304489654546351e-05, "loss": 0.0006, "step": 12575 }, { "epoch": 5.86977829638273, "grad_norm": 0.0252685546875, "learning_rate": 7.303076647570465e-05, "loss": 0.0004, "step": 12576 }, { "epoch": 5.87024504084014, "grad_norm": 0.134765625, "learning_rate": 7.301663698662334e-05, "loss": 0.0007, "step": 12577 }, { "epoch": 5.87071178529755, "grad_norm": 0.1455078125, "learning_rate": 7.300250807852378e-05, "loss": 0.0049, "step": 12578 }, { "epoch": 5.8711785297549595, "grad_norm": 0.083984375, "learning_rate": 7.298837975171015e-05, "loss": 0.0005, "step": 12579 }, { "epoch": 5.871645274212368, "grad_norm": 0.181640625, "learning_rate": 7.297425200648669e-05, "loss": 0.0008, "step": 12580 }, { "epoch": 5.872112018669778, "grad_norm": 0.07568359375, "learning_rate": 7.296012484315757e-05, "loss": 0.0005, "step": 12581 }, { "epoch": 5.872578763127188, "grad_norm": 0.09912109375, "learning_rate": 7.294599826202694e-05, "loss": 0.0009, "step": 12582 }, { "epoch": 5.873045507584598, "grad_norm": 0.0537109375, "learning_rate": 7.293187226339903e-05, "loss": 0.0005, "step": 12583 }, { "epoch": 5.873512252042007, "grad_norm": 0.08544921875, "learning_rate": 7.291774684757793e-05, "loss": 0.0005, "step": 12584 }, { "epoch": 5.8739789964994165, "grad_norm": 0.078125, "learning_rate": 7.290362201486778e-05, "loss": 0.0004, "step": 12585 }, { "epoch": 5.874445740956826, "grad_norm": 0.023681640625, "learning_rate": 7.288949776557268e-05, "loss": 0.0003, "step": 12586 }, { "epoch": 5.874912485414236, "grad_norm": 0.035888671875, "learning_rate": 7.287537409999681e-05, "loss": 0.0004, "step": 12587 }, { "epoch": 5.875379229871645, "grad_norm": 0.349609375, "learning_rate": 7.286125101844424e-05, "loss": 0.0039, "step": 12588 }, { "epoch": 5.875845974329055, "grad_norm": 0.06640625, "learning_rate": 7.2847128521219e-05, "loss": 0.0005, "step": 12589 }, { "epoch": 5.8763127187864646, "grad_norm": 0.03515625, "learning_rate": 7.283300660862525e-05, "loss": 0.0004, "step": 12590 }, { "epoch": 5.876779463243874, "grad_norm": 0.0303955078125, "learning_rate": 7.2818885280967e-05, "loss": 0.0003, "step": 12591 }, { "epoch": 5.877246207701283, "grad_norm": 0.02978515625, "learning_rate": 7.28047645385483e-05, "loss": 0.0003, "step": 12592 }, { "epoch": 5.877712952158693, "grad_norm": 0.0400390625, "learning_rate": 7.27906443816732e-05, "loss": 0.0005, "step": 12593 }, { "epoch": 5.878179696616103, "grad_norm": 0.040771484375, "learning_rate": 7.277652481064573e-05, "loss": 0.0003, "step": 12594 }, { "epoch": 5.878646441073512, "grad_norm": 0.0859375, "learning_rate": 7.276240582576987e-05, "loss": 0.0006, "step": 12595 }, { "epoch": 5.8791131855309215, "grad_norm": 0.076171875, "learning_rate": 7.274828742734962e-05, "loss": 0.0005, "step": 12596 }, { "epoch": 5.879579929988331, "grad_norm": 0.23046875, "learning_rate": 7.2734169615689e-05, "loss": 0.0022, "step": 12597 }, { "epoch": 5.880046674445741, "grad_norm": 0.22265625, "learning_rate": 7.272005239109196e-05, "loss": 0.0016, "step": 12598 }, { "epoch": 5.880513418903151, "grad_norm": 0.232421875, "learning_rate": 7.270593575386245e-05, "loss": 0.0013, "step": 12599 }, { "epoch": 5.88098016336056, "grad_norm": 0.046875, "learning_rate": 7.269181970430443e-05, "loss": 0.0005, "step": 12600 }, { "epoch": 5.88144690781797, "grad_norm": 0.033203125, "learning_rate": 7.267770424272183e-05, "loss": 0.0003, "step": 12601 }, { "epoch": 5.881913652275379, "grad_norm": 0.51953125, "learning_rate": 7.266358936941858e-05, "loss": 0.002, "step": 12602 }, { "epoch": 5.882380396732788, "grad_norm": 0.1689453125, "learning_rate": 7.264947508469861e-05, "loss": 0.0004, "step": 12603 }, { "epoch": 5.882847141190198, "grad_norm": 0.10107421875, "learning_rate": 7.263536138886573e-05, "loss": 0.0006, "step": 12604 }, { "epoch": 5.883313885647608, "grad_norm": 0.12451171875, "learning_rate": 7.26212482822239e-05, "loss": 0.0006, "step": 12605 }, { "epoch": 5.883780630105018, "grad_norm": 0.0279541015625, "learning_rate": 7.260713576507699e-05, "loss": 0.0004, "step": 12606 }, { "epoch": 5.8842473745624275, "grad_norm": 0.055908203125, "learning_rate": 7.259302383772883e-05, "loss": 0.0005, "step": 12607 }, { "epoch": 5.884714119019836, "grad_norm": 0.0693359375, "learning_rate": 7.257891250048325e-05, "loss": 0.0007, "step": 12608 }, { "epoch": 5.885180863477246, "grad_norm": 0.1611328125, "learning_rate": 7.256480175364413e-05, "loss": 0.0009, "step": 12609 }, { "epoch": 5.885647607934656, "grad_norm": 0.028564453125, "learning_rate": 7.255069159751526e-05, "loss": 0.0004, "step": 12610 }, { "epoch": 5.886114352392065, "grad_norm": 0.07373046875, "learning_rate": 7.253658203240046e-05, "loss": 0.0004, "step": 12611 }, { "epoch": 5.886581096849475, "grad_norm": 0.032958984375, "learning_rate": 7.252247305860353e-05, "loss": 0.0004, "step": 12612 }, { "epoch": 5.8870478413068845, "grad_norm": 0.236328125, "learning_rate": 7.250836467642823e-05, "loss": 0.0008, "step": 12613 }, { "epoch": 5.887514585764294, "grad_norm": 0.1044921875, "learning_rate": 7.249425688617835e-05, "loss": 0.0005, "step": 12614 }, { "epoch": 5.887981330221704, "grad_norm": 0.057373046875, "learning_rate": 7.248014968815762e-05, "loss": 0.0005, "step": 12615 }, { "epoch": 5.888448074679113, "grad_norm": 0.11865234375, "learning_rate": 7.24660430826698e-05, "loss": 0.0038, "step": 12616 }, { "epoch": 5.888914819136523, "grad_norm": 0.0206298828125, "learning_rate": 7.245193707001866e-05, "loss": 0.0003, "step": 12617 }, { "epoch": 5.889381563593933, "grad_norm": 0.038818359375, "learning_rate": 7.243783165050784e-05, "loss": 0.0003, "step": 12618 }, { "epoch": 5.8898483080513415, "grad_norm": 0.1298828125, "learning_rate": 7.24237268244411e-05, "loss": 0.001, "step": 12619 }, { "epoch": 5.890315052508751, "grad_norm": 0.08154296875, "learning_rate": 7.240962259212212e-05, "loss": 0.0035, "step": 12620 }, { "epoch": 5.890781796966161, "grad_norm": 0.04296875, "learning_rate": 7.239551895385455e-05, "loss": 0.0022, "step": 12621 }, { "epoch": 5.891248541423571, "grad_norm": 0.0118408203125, "learning_rate": 7.238141590994212e-05, "loss": 0.0002, "step": 12622 }, { "epoch": 5.89171528588098, "grad_norm": 0.0263671875, "learning_rate": 7.236731346068844e-05, "loss": 0.0003, "step": 12623 }, { "epoch": 5.89218203033839, "grad_norm": 0.018310546875, "learning_rate": 7.235321160639716e-05, "loss": 0.0003, "step": 12624 }, { "epoch": 5.892648774795799, "grad_norm": 0.0966796875, "learning_rate": 7.23391103473719e-05, "loss": 0.0006, "step": 12625 }, { "epoch": 5.893115519253209, "grad_norm": 0.0595703125, "learning_rate": 7.232500968391629e-05, "loss": 0.0005, "step": 12626 }, { "epoch": 5.893582263710618, "grad_norm": 0.263671875, "learning_rate": 7.231090961633395e-05, "loss": 0.0008, "step": 12627 }, { "epoch": 5.894049008168028, "grad_norm": 0.019287109375, "learning_rate": 7.22968101449284e-05, "loss": 0.0003, "step": 12628 }, { "epoch": 5.894515752625438, "grad_norm": 0.0361328125, "learning_rate": 7.22827112700033e-05, "loss": 0.0003, "step": 12629 }, { "epoch": 5.8949824970828475, "grad_norm": 0.322265625, "learning_rate": 7.226861299186218e-05, "loss": 0.0035, "step": 12630 }, { "epoch": 5.895449241540256, "grad_norm": 0.054443359375, "learning_rate": 7.225451531080855e-05, "loss": 0.0004, "step": 12631 }, { "epoch": 5.895915985997666, "grad_norm": 0.5859375, "learning_rate": 7.224041822714604e-05, "loss": 0.0163, "step": 12632 }, { "epoch": 5.896382730455076, "grad_norm": 0.0458984375, "learning_rate": 7.222632174117812e-05, "loss": 0.0004, "step": 12633 }, { "epoch": 5.896849474912486, "grad_norm": 0.0400390625, "learning_rate": 7.221222585320828e-05, "loss": 0.0004, "step": 12634 }, { "epoch": 5.897316219369895, "grad_norm": 0.048828125, "learning_rate": 7.219813056354006e-05, "loss": 0.0046, "step": 12635 }, { "epoch": 5.8977829638273045, "grad_norm": 0.06787109375, "learning_rate": 7.218403587247694e-05, "loss": 0.0004, "step": 12636 }, { "epoch": 5.898249708284714, "grad_norm": 0.1591796875, "learning_rate": 7.21699417803224e-05, "loss": 0.0006, "step": 12637 }, { "epoch": 5.898716452742123, "grad_norm": 0.09619140625, "learning_rate": 7.215584828737985e-05, "loss": 0.0005, "step": 12638 }, { "epoch": 5.899183197199533, "grad_norm": 0.125, "learning_rate": 7.214175539395282e-05, "loss": 0.0034, "step": 12639 }, { "epoch": 5.899649941656943, "grad_norm": 0.1962890625, "learning_rate": 7.212766310034472e-05, "loss": 0.0025, "step": 12640 }, { "epoch": 5.900116686114353, "grad_norm": 0.06640625, "learning_rate": 7.21135714068589e-05, "loss": 0.0005, "step": 12641 }, { "epoch": 5.900583430571762, "grad_norm": 0.05419921875, "learning_rate": 7.209948031379887e-05, "loss": 0.0023, "step": 12642 }, { "epoch": 5.901050175029171, "grad_norm": 0.06298828125, "learning_rate": 7.208538982146799e-05, "loss": 0.0004, "step": 12643 }, { "epoch": 5.901516919486581, "grad_norm": 0.1953125, "learning_rate": 7.207129993016963e-05, "loss": 0.0008, "step": 12644 }, { "epoch": 5.901983663943991, "grad_norm": 0.2255859375, "learning_rate": 7.205721064020715e-05, "loss": 0.0039, "step": 12645 }, { "epoch": 5.9024504084014, "grad_norm": 0.06787109375, "learning_rate": 7.204312195188394e-05, "loss": 0.0004, "step": 12646 }, { "epoch": 5.90291715285881, "grad_norm": 0.203125, "learning_rate": 7.202903386550335e-05, "loss": 0.001, "step": 12647 }, { "epoch": 5.903383897316219, "grad_norm": 0.076171875, "learning_rate": 7.201494638136866e-05, "loss": 0.0007, "step": 12648 }, { "epoch": 5.903850641773629, "grad_norm": 0.018798828125, "learning_rate": 7.200085949978326e-05, "loss": 0.0003, "step": 12649 }, { "epoch": 5.904317386231039, "grad_norm": 0.1142578125, "learning_rate": 7.19867732210504e-05, "loss": 0.0005, "step": 12650 }, { "epoch": 5.904784130688448, "grad_norm": 0.185546875, "learning_rate": 7.197268754547344e-05, "loss": 0.0007, "step": 12651 }, { "epoch": 5.905250875145858, "grad_norm": 0.0291748046875, "learning_rate": 7.195860247335558e-05, "loss": 0.0003, "step": 12652 }, { "epoch": 5.9057176196032675, "grad_norm": 0.05517578125, "learning_rate": 7.19445180050001e-05, "loss": 0.0004, "step": 12653 }, { "epoch": 5.906184364060676, "grad_norm": 0.0113525390625, "learning_rate": 7.19304341407103e-05, "loss": 0.0002, "step": 12654 }, { "epoch": 5.906651108518086, "grad_norm": 0.07421875, "learning_rate": 7.191635088078938e-05, "loss": 0.0058, "step": 12655 }, { "epoch": 5.907117852975496, "grad_norm": 0.07080078125, "learning_rate": 7.19022682255406e-05, "loss": 0.0005, "step": 12656 }, { "epoch": 5.907584597432906, "grad_norm": 0.07666015625, "learning_rate": 7.188818617526715e-05, "loss": 0.0005, "step": 12657 }, { "epoch": 5.908051341890315, "grad_norm": 0.0281982421875, "learning_rate": 7.187410473027225e-05, "loss": 0.0003, "step": 12658 }, { "epoch": 5.9085180863477245, "grad_norm": 0.035888671875, "learning_rate": 7.186002389085908e-05, "loss": 0.0005, "step": 12659 }, { "epoch": 5.908984830805134, "grad_norm": 0.17578125, "learning_rate": 7.18459436573308e-05, "loss": 0.0032, "step": 12660 }, { "epoch": 5.909451575262544, "grad_norm": 0.11572265625, "learning_rate": 7.18318640299906e-05, "loss": 0.0006, "step": 12661 }, { "epoch": 5.909918319719953, "grad_norm": 0.0201416015625, "learning_rate": 7.181778500914164e-05, "loss": 0.0003, "step": 12662 }, { "epoch": 5.910385064177363, "grad_norm": 0.0634765625, "learning_rate": 7.180370659508702e-05, "loss": 0.0005, "step": 12663 }, { "epoch": 5.9108518086347726, "grad_norm": 0.032470703125, "learning_rate": 7.178962878812988e-05, "loss": 0.0004, "step": 12664 }, { "epoch": 5.911318553092182, "grad_norm": 0.189453125, "learning_rate": 7.177555158857334e-05, "loss": 0.0014, "step": 12665 }, { "epoch": 5.911785297549591, "grad_norm": 0.28515625, "learning_rate": 7.176147499672048e-05, "loss": 0.0028, "step": 12666 }, { "epoch": 5.912252042007001, "grad_norm": 0.3203125, "learning_rate": 7.17473990128744e-05, "loss": 0.0015, "step": 12667 }, { "epoch": 5.912718786464411, "grad_norm": 0.46875, "learning_rate": 7.173332363733817e-05, "loss": 0.0017, "step": 12668 }, { "epoch": 5.913185530921821, "grad_norm": 0.036865234375, "learning_rate": 7.171924887041485e-05, "loss": 0.0017, "step": 12669 }, { "epoch": 5.9136522753792296, "grad_norm": 0.0291748046875, "learning_rate": 7.170517471240746e-05, "loss": 0.0003, "step": 12670 }, { "epoch": 5.914119019836639, "grad_norm": 0.087890625, "learning_rate": 7.169110116361906e-05, "loss": 0.0005, "step": 12671 }, { "epoch": 5.914585764294049, "grad_norm": 0.0751953125, "learning_rate": 7.167702822435268e-05, "loss": 0.0042, "step": 12672 }, { "epoch": 5.915052508751459, "grad_norm": 0.1552734375, "learning_rate": 7.166295589491129e-05, "loss": 0.0006, "step": 12673 }, { "epoch": 5.915519253208868, "grad_norm": 0.03173828125, "learning_rate": 7.164888417559788e-05, "loss": 0.0003, "step": 12674 }, { "epoch": 5.915985997666278, "grad_norm": 0.197265625, "learning_rate": 7.163481306671549e-05, "loss": 0.0009, "step": 12675 }, { "epoch": 5.916452742123687, "grad_norm": 0.056640625, "learning_rate": 7.162074256856703e-05, "loss": 0.0007, "step": 12676 }, { "epoch": 5.916919486581097, "grad_norm": 0.04931640625, "learning_rate": 7.160667268145544e-05, "loss": 0.0004, "step": 12677 }, { "epoch": 5.917386231038506, "grad_norm": 0.2578125, "learning_rate": 7.159260340568372e-05, "loss": 0.0011, "step": 12678 }, { "epoch": 5.917852975495916, "grad_norm": 0.15234375, "learning_rate": 7.157853474155479e-05, "loss": 0.0006, "step": 12679 }, { "epoch": 5.918319719953326, "grad_norm": 0.05517578125, "learning_rate": 7.15644666893715e-05, "loss": 0.003, "step": 12680 }, { "epoch": 5.918786464410735, "grad_norm": 0.035400390625, "learning_rate": 7.15503992494368e-05, "loss": 0.0004, "step": 12681 }, { "epoch": 5.919253208868144, "grad_norm": 0.0306396484375, "learning_rate": 7.153633242205359e-05, "loss": 0.0004, "step": 12682 }, { "epoch": 5.919719953325554, "grad_norm": 0.1494140625, "learning_rate": 7.152226620752472e-05, "loss": 0.001, "step": 12683 }, { "epoch": 5.920186697782964, "grad_norm": 0.1474609375, "learning_rate": 7.150820060615302e-05, "loss": 0.0007, "step": 12684 }, { "epoch": 5.920653442240374, "grad_norm": 0.08154296875, "learning_rate": 7.149413561824141e-05, "loss": 0.0007, "step": 12685 }, { "epoch": 5.921120186697783, "grad_norm": 0.150390625, "learning_rate": 7.148007124409268e-05, "loss": 0.002, "step": 12686 }, { "epoch": 5.9215869311551925, "grad_norm": 0.0595703125, "learning_rate": 7.146600748400965e-05, "loss": 0.0005, "step": 12687 }, { "epoch": 5.922053675612602, "grad_norm": 0.09326171875, "learning_rate": 7.145194433829515e-05, "loss": 0.0005, "step": 12688 }, { "epoch": 5.922520420070011, "grad_norm": 0.033935546875, "learning_rate": 7.143788180725196e-05, "loss": 0.0004, "step": 12689 }, { "epoch": 5.922987164527421, "grad_norm": 0.0211181640625, "learning_rate": 7.142381989118286e-05, "loss": 0.0002, "step": 12690 }, { "epoch": 5.923453908984831, "grad_norm": 0.02001953125, "learning_rate": 7.140975859039062e-05, "loss": 0.0003, "step": 12691 }, { "epoch": 5.923920653442241, "grad_norm": 0.03662109375, "learning_rate": 7.139569790517802e-05, "loss": 0.0003, "step": 12692 }, { "epoch": 5.92438739789965, "grad_norm": 0.0255126953125, "learning_rate": 7.138163783584776e-05, "loss": 0.002, "step": 12693 }, { "epoch": 5.924854142357059, "grad_norm": 0.01495361328125, "learning_rate": 7.136757838270258e-05, "loss": 0.0002, "step": 12694 }, { "epoch": 5.925320886814469, "grad_norm": 0.036865234375, "learning_rate": 7.135351954604523e-05, "loss": 0.0003, "step": 12695 }, { "epoch": 5.925787631271879, "grad_norm": 0.061279296875, "learning_rate": 7.133946132617839e-05, "loss": 0.0006, "step": 12696 }, { "epoch": 5.926254375729288, "grad_norm": 0.030517578125, "learning_rate": 7.132540372340472e-05, "loss": 0.0003, "step": 12697 }, { "epoch": 5.926721120186698, "grad_norm": 0.267578125, "learning_rate": 7.131134673802696e-05, "loss": 0.0009, "step": 12698 }, { "epoch": 5.927187864644107, "grad_norm": 0.349609375, "learning_rate": 7.129729037034774e-05, "loss": 0.0011, "step": 12699 }, { "epoch": 5.927654609101517, "grad_norm": 0.019775390625, "learning_rate": 7.128323462066968e-05, "loss": 0.0003, "step": 12700 }, { "epoch": 5.928121353558926, "grad_norm": 0.03759765625, "learning_rate": 7.126917948929544e-05, "loss": 0.0004, "step": 12701 }, { "epoch": 5.928588098016336, "grad_norm": 0.026123046875, "learning_rate": 7.125512497652763e-05, "loss": 0.0003, "step": 12702 }, { "epoch": 5.929054842473746, "grad_norm": 0.054443359375, "learning_rate": 7.124107108266889e-05, "loss": 0.0024, "step": 12703 }, { "epoch": 5.9295215869311555, "grad_norm": 0.044677734375, "learning_rate": 7.122701780802181e-05, "loss": 0.0003, "step": 12704 }, { "epoch": 5.929988331388564, "grad_norm": 0.05517578125, "learning_rate": 7.121296515288895e-05, "loss": 0.0004, "step": 12705 }, { "epoch": 5.930455075845974, "grad_norm": 0.032470703125, "learning_rate": 7.119891311757286e-05, "loss": 0.0003, "step": 12706 }, { "epoch": 5.930921820303384, "grad_norm": 0.171875, "learning_rate": 7.118486170237616e-05, "loss": 0.0008, "step": 12707 }, { "epoch": 5.931388564760794, "grad_norm": 0.019287109375, "learning_rate": 7.117081090760136e-05, "loss": 0.0003, "step": 12708 }, { "epoch": 5.931855309218203, "grad_norm": 0.171875, "learning_rate": 7.115676073355095e-05, "loss": 0.0006, "step": 12709 }, { "epoch": 5.9323220536756125, "grad_norm": 0.03564453125, "learning_rate": 7.114271118052752e-05, "loss": 0.0004, "step": 12710 }, { "epoch": 5.932788798133022, "grad_norm": 0.0135498046875, "learning_rate": 7.112866224883353e-05, "loss": 0.0002, "step": 12711 }, { "epoch": 5.933255542590432, "grad_norm": 0.10791015625, "learning_rate": 7.111461393877145e-05, "loss": 0.0029, "step": 12712 }, { "epoch": 5.933722287047841, "grad_norm": 0.14453125, "learning_rate": 7.110056625064382e-05, "loss": 0.0006, "step": 12713 }, { "epoch": 5.934189031505251, "grad_norm": 0.033203125, "learning_rate": 7.108651918475304e-05, "loss": 0.002, "step": 12714 }, { "epoch": 5.934655775962661, "grad_norm": 0.024169921875, "learning_rate": 7.107247274140161e-05, "loss": 0.0003, "step": 12715 }, { "epoch": 5.93512252042007, "grad_norm": 0.038818359375, "learning_rate": 7.105842692089189e-05, "loss": 0.0004, "step": 12716 }, { "epoch": 5.935589264877479, "grad_norm": 0.06201171875, "learning_rate": 7.104438172352638e-05, "loss": 0.0005, "step": 12717 }, { "epoch": 5.936056009334889, "grad_norm": 0.09423828125, "learning_rate": 7.103033714960746e-05, "loss": 0.0029, "step": 12718 }, { "epoch": 5.936522753792299, "grad_norm": 0.05322265625, "learning_rate": 7.10162931994375e-05, "loss": 0.0004, "step": 12719 }, { "epoch": 5.936989498249709, "grad_norm": 0.0966796875, "learning_rate": 7.100224987331894e-05, "loss": 0.0005, "step": 12720 }, { "epoch": 5.937456242707118, "grad_norm": 0.050537109375, "learning_rate": 7.09882071715541e-05, "loss": 0.0004, "step": 12721 }, { "epoch": 5.937922987164527, "grad_norm": 0.05615234375, "learning_rate": 7.097416509444532e-05, "loss": 0.0004, "step": 12722 }, { "epoch": 5.938389731621937, "grad_norm": 0.033935546875, "learning_rate": 7.096012364229501e-05, "loss": 0.0004, "step": 12723 }, { "epoch": 5.938856476079346, "grad_norm": 0.03369140625, "learning_rate": 7.094608281540545e-05, "loss": 0.0004, "step": 12724 }, { "epoch": 5.939323220536756, "grad_norm": 0.1533203125, "learning_rate": 7.093204261407895e-05, "loss": 0.0008, "step": 12725 }, { "epoch": 5.939789964994166, "grad_norm": 0.08251953125, "learning_rate": 7.091800303861782e-05, "loss": 0.0048, "step": 12726 }, { "epoch": 5.9402567094515755, "grad_norm": 0.1474609375, "learning_rate": 7.090396408932436e-05, "loss": 0.0043, "step": 12727 }, { "epoch": 5.940723453908985, "grad_norm": 0.2578125, "learning_rate": 7.088992576650084e-05, "loss": 0.0032, "step": 12728 }, { "epoch": 5.941190198366394, "grad_norm": 0.03857421875, "learning_rate": 7.087588807044951e-05, "loss": 0.0004, "step": 12729 }, { "epoch": 5.941656942823804, "grad_norm": 0.259765625, "learning_rate": 7.086185100147262e-05, "loss": 0.0009, "step": 12730 }, { "epoch": 5.942123687281214, "grad_norm": 0.2158203125, "learning_rate": 7.084781455987243e-05, "loss": 0.0045, "step": 12731 }, { "epoch": 5.942590431738623, "grad_norm": 0.076171875, "learning_rate": 7.083377874595112e-05, "loss": 0.0004, "step": 12732 }, { "epoch": 5.9430571761960325, "grad_norm": 0.212890625, "learning_rate": 7.081974356001091e-05, "loss": 0.0018, "step": 12733 }, { "epoch": 5.943523920653442, "grad_norm": 0.022705078125, "learning_rate": 7.0805709002354e-05, "loss": 0.0003, "step": 12734 }, { "epoch": 5.943990665110852, "grad_norm": 0.028564453125, "learning_rate": 7.079167507328257e-05, "loss": 0.0003, "step": 12735 }, { "epoch": 5.944457409568262, "grad_norm": 0.11767578125, "learning_rate": 7.077764177309878e-05, "loss": 0.0009, "step": 12736 }, { "epoch": 5.944924154025671, "grad_norm": 0.050048828125, "learning_rate": 7.076360910210479e-05, "loss": 0.0003, "step": 12737 }, { "epoch": 5.9453908984830806, "grad_norm": 0.1767578125, "learning_rate": 7.074957706060275e-05, "loss": 0.0007, "step": 12738 }, { "epoch": 5.94585764294049, "grad_norm": 0.01165771484375, "learning_rate": 7.073554564889473e-05, "loss": 0.0002, "step": 12739 }, { "epoch": 5.946324387397899, "grad_norm": 0.08203125, "learning_rate": 7.072151486728291e-05, "loss": 0.0009, "step": 12740 }, { "epoch": 5.946791131855309, "grad_norm": 0.039794921875, "learning_rate": 7.070748471606936e-05, "loss": 0.0025, "step": 12741 }, { "epoch": 5.947257876312719, "grad_norm": 0.038818359375, "learning_rate": 7.069345519555616e-05, "loss": 0.0003, "step": 12742 }, { "epoch": 5.947724620770129, "grad_norm": 0.0238037109375, "learning_rate": 7.067942630604536e-05, "loss": 0.0004, "step": 12743 }, { "epoch": 5.9481913652275376, "grad_norm": 0.1669921875, "learning_rate": 7.066539804783908e-05, "loss": 0.001, "step": 12744 }, { "epoch": 5.948658109684947, "grad_norm": 0.01953125, "learning_rate": 7.065137042123932e-05, "loss": 0.0003, "step": 12745 }, { "epoch": 5.949124854142357, "grad_norm": 0.1298828125, "learning_rate": 7.06373434265481e-05, "loss": 0.0027, "step": 12746 }, { "epoch": 5.949591598599767, "grad_norm": 0.1640625, "learning_rate": 7.062331706406751e-05, "loss": 0.0007, "step": 12747 }, { "epoch": 5.950058343057176, "grad_norm": 0.1728515625, "learning_rate": 7.060929133409944e-05, "loss": 0.0006, "step": 12748 }, { "epoch": 5.950525087514586, "grad_norm": 0.173828125, "learning_rate": 7.059526623694597e-05, "loss": 0.0006, "step": 12749 }, { "epoch": 5.950991831971995, "grad_norm": 0.2060546875, "learning_rate": 7.058124177290902e-05, "loss": 0.001, "step": 12750 }, { "epoch": 5.951458576429405, "grad_norm": 0.0673828125, "learning_rate": 7.056721794229057e-05, "loss": 0.0005, "step": 12751 }, { "epoch": 5.951925320886814, "grad_norm": 0.06982421875, "learning_rate": 7.05531947453926e-05, "loss": 0.0005, "step": 12752 }, { "epoch": 5.952392065344224, "grad_norm": 0.05517578125, "learning_rate": 7.053917218251699e-05, "loss": 0.0004, "step": 12753 }, { "epoch": 5.952858809801634, "grad_norm": 0.09326171875, "learning_rate": 7.052515025396572e-05, "loss": 0.0039, "step": 12754 }, { "epoch": 5.9533255542590435, "grad_norm": 0.01422119140625, "learning_rate": 7.051112896004063e-05, "loss": 0.0002, "step": 12755 }, { "epoch": 5.953792298716452, "grad_norm": 0.03173828125, "learning_rate": 7.049710830104368e-05, "loss": 0.0003, "step": 12756 }, { "epoch": 5.954259043173862, "grad_norm": 0.043701171875, "learning_rate": 7.048308827727672e-05, "loss": 0.0004, "step": 12757 }, { "epoch": 5.954725787631272, "grad_norm": 0.035888671875, "learning_rate": 7.04690688890416e-05, "loss": 0.0003, "step": 12758 }, { "epoch": 5.955192532088682, "grad_norm": 0.035888671875, "learning_rate": 7.045505013664022e-05, "loss": 0.0003, "step": 12759 }, { "epoch": 5.955659276546091, "grad_norm": 0.052490234375, "learning_rate": 7.044103202037437e-05, "loss": 0.0034, "step": 12760 }, { "epoch": 5.9561260210035005, "grad_norm": 0.03662109375, "learning_rate": 7.04270145405459e-05, "loss": 0.0004, "step": 12761 }, { "epoch": 5.95659276546091, "grad_norm": 0.03466796875, "learning_rate": 7.041299769745662e-05, "loss": 0.0004, "step": 12762 }, { "epoch": 5.95705950991832, "grad_norm": 0.07568359375, "learning_rate": 7.039898149140834e-05, "loss": 0.0047, "step": 12763 }, { "epoch": 5.957526254375729, "grad_norm": 0.028076171875, "learning_rate": 7.038496592270283e-05, "loss": 0.0003, "step": 12764 }, { "epoch": 5.957992998833139, "grad_norm": 0.0238037109375, "learning_rate": 7.037095099164183e-05, "loss": 0.0003, "step": 12765 }, { "epoch": 5.958459743290549, "grad_norm": 0.047119140625, "learning_rate": 7.035693669852716e-05, "loss": 0.0004, "step": 12766 }, { "epoch": 5.9589264877479575, "grad_norm": 0.040771484375, "learning_rate": 7.034292304366053e-05, "loss": 0.0004, "step": 12767 }, { "epoch": 5.959393232205367, "grad_norm": 0.0167236328125, "learning_rate": 7.032891002734365e-05, "loss": 0.0003, "step": 12768 }, { "epoch": 5.959859976662777, "grad_norm": 0.10546875, "learning_rate": 7.031489764987829e-05, "loss": 0.0004, "step": 12769 }, { "epoch": 5.960326721120187, "grad_norm": 0.027099609375, "learning_rate": 7.030088591156611e-05, "loss": 0.0004, "step": 12770 }, { "epoch": 5.960793465577597, "grad_norm": 0.0191650390625, "learning_rate": 7.028687481270879e-05, "loss": 0.0003, "step": 12771 }, { "epoch": 5.961260210035006, "grad_norm": 0.046142578125, "learning_rate": 7.027286435360805e-05, "loss": 0.0004, "step": 12772 }, { "epoch": 5.961726954492415, "grad_norm": 0.072265625, "learning_rate": 7.025885453456551e-05, "loss": 0.0005, "step": 12773 }, { "epoch": 5.962193698949825, "grad_norm": 0.035888671875, "learning_rate": 7.024484535588283e-05, "loss": 0.0004, "step": 12774 }, { "epoch": 5.962660443407234, "grad_norm": 0.07958984375, "learning_rate": 7.023083681786162e-05, "loss": 0.0004, "step": 12775 }, { "epoch": 5.963127187864644, "grad_norm": 0.62890625, "learning_rate": 7.021682892080356e-05, "loss": 0.0025, "step": 12776 }, { "epoch": 5.963593932322054, "grad_norm": 0.3203125, "learning_rate": 7.02028216650102e-05, "loss": 0.0015, "step": 12777 }, { "epoch": 5.9640606767794635, "grad_norm": 0.146484375, "learning_rate": 7.018881505078314e-05, "loss": 0.0035, "step": 12778 }, { "epoch": 5.964527421236873, "grad_norm": 0.197265625, "learning_rate": 7.017480907842399e-05, "loss": 0.0007, "step": 12779 }, { "epoch": 5.964994165694282, "grad_norm": 0.0216064453125, "learning_rate": 7.016080374823429e-05, "loss": 0.0003, "step": 12780 }, { "epoch": 5.965460910151692, "grad_norm": 0.08984375, "learning_rate": 7.014679906051558e-05, "loss": 0.001, "step": 12781 }, { "epoch": 5.965927654609102, "grad_norm": 0.2216796875, "learning_rate": 7.013279501556942e-05, "loss": 0.0024, "step": 12782 }, { "epoch": 5.966394399066511, "grad_norm": 0.1494140625, "learning_rate": 7.011879161369732e-05, "loss": 0.0008, "step": 12783 }, { "epoch": 5.9668611435239205, "grad_norm": 0.166015625, "learning_rate": 7.01047888552008e-05, "loss": 0.0027, "step": 12784 }, { "epoch": 5.96732788798133, "grad_norm": 0.11083984375, "learning_rate": 7.009078674038131e-05, "loss": 0.0004, "step": 12785 }, { "epoch": 5.96779463243874, "grad_norm": 0.05810546875, "learning_rate": 7.007678526954039e-05, "loss": 0.0005, "step": 12786 }, { "epoch": 5.968261376896149, "grad_norm": 0.1259765625, "learning_rate": 7.00627844429795e-05, "loss": 0.0004, "step": 12787 }, { "epoch": 5.968728121353559, "grad_norm": 0.119140625, "learning_rate": 7.004878426100003e-05, "loss": 0.0007, "step": 12788 }, { "epoch": 5.969194865810969, "grad_norm": 0.1689453125, "learning_rate": 7.003478472390351e-05, "loss": 0.0007, "step": 12789 }, { "epoch": 5.969661610268378, "grad_norm": 0.09326171875, "learning_rate": 7.002078583199131e-05, "loss": 0.0036, "step": 12790 }, { "epoch": 5.970128354725787, "grad_norm": 0.1806640625, "learning_rate": 7.000678758556485e-05, "loss": 0.0007, "step": 12791 }, { "epoch": 5.970595099183197, "grad_norm": 0.06689453125, "learning_rate": 6.999278998492553e-05, "loss": 0.0005, "step": 12792 }, { "epoch": 5.971061843640607, "grad_norm": 0.103515625, "learning_rate": 6.997879303037474e-05, "loss": 0.0005, "step": 12793 }, { "epoch": 5.971528588098017, "grad_norm": 0.11279296875, "learning_rate": 6.996479672221386e-05, "loss": 0.0007, "step": 12794 }, { "epoch": 5.971995332555426, "grad_norm": 0.154296875, "learning_rate": 6.995080106074423e-05, "loss": 0.0008, "step": 12795 }, { "epoch": 5.972462077012835, "grad_norm": 0.044677734375, "learning_rate": 6.993680604626718e-05, "loss": 0.0004, "step": 12796 }, { "epoch": 5.972928821470245, "grad_norm": 0.10693359375, "learning_rate": 6.992281167908404e-05, "loss": 0.0006, "step": 12797 }, { "epoch": 5.973395565927655, "grad_norm": 0.041259765625, "learning_rate": 6.990881795949614e-05, "loss": 0.0003, "step": 12798 }, { "epoch": 5.973862310385064, "grad_norm": 0.095703125, "learning_rate": 6.989482488780478e-05, "loss": 0.0006, "step": 12799 }, { "epoch": 5.974329054842474, "grad_norm": 0.0498046875, "learning_rate": 6.988083246431123e-05, "loss": 0.0004, "step": 12800 }, { "epoch": 5.9747957992998835, "grad_norm": 0.024169921875, "learning_rate": 6.98668406893168e-05, "loss": 0.0003, "step": 12801 }, { "epoch": 5.975262543757293, "grad_norm": 0.046142578125, "learning_rate": 6.985284956312271e-05, "loss": 0.0003, "step": 12802 }, { "epoch": 5.975729288214702, "grad_norm": 0.07763671875, "learning_rate": 6.983885908603022e-05, "loss": 0.0004, "step": 12803 }, { "epoch": 5.976196032672112, "grad_norm": 0.0625, "learning_rate": 6.982486925834055e-05, "loss": 0.0004, "step": 12804 }, { "epoch": 5.976662777129522, "grad_norm": 0.0235595703125, "learning_rate": 6.981088008035493e-05, "loss": 0.0002, "step": 12805 }, { "epoch": 5.977129521586932, "grad_norm": 0.302734375, "learning_rate": 6.979689155237456e-05, "loss": 0.0033, "step": 12806 }, { "epoch": 5.9775962660443405, "grad_norm": 0.035888671875, "learning_rate": 6.978290367470061e-05, "loss": 0.0004, "step": 12807 }, { "epoch": 5.97806301050175, "grad_norm": 0.1494140625, "learning_rate": 6.97689164476343e-05, "loss": 0.0004, "step": 12808 }, { "epoch": 5.97852975495916, "grad_norm": 0.0322265625, "learning_rate": 6.975492987147676e-05, "loss": 0.0003, "step": 12809 }, { "epoch": 5.978996499416569, "grad_norm": 0.10888671875, "learning_rate": 6.97409439465291e-05, "loss": 0.0006, "step": 12810 }, { "epoch": 5.979463243873979, "grad_norm": 0.271484375, "learning_rate": 6.972695867309252e-05, "loss": 0.0036, "step": 12811 }, { "epoch": 5.979929988331389, "grad_norm": 0.037841796875, "learning_rate": 6.97129740514681e-05, "loss": 0.0004, "step": 12812 }, { "epoch": 5.980396732788798, "grad_norm": 0.035400390625, "learning_rate": 6.969899008195697e-05, "loss": 0.0003, "step": 12813 }, { "epoch": 5.980863477246208, "grad_norm": 0.06591796875, "learning_rate": 6.968500676486016e-05, "loss": 0.0035, "step": 12814 }, { "epoch": 5.981330221703617, "grad_norm": 0.376953125, "learning_rate": 6.967102410047883e-05, "loss": 0.0018, "step": 12815 }, { "epoch": 5.981796966161027, "grad_norm": 0.0281982421875, "learning_rate": 6.965704208911399e-05, "loss": 0.0003, "step": 12816 }, { "epoch": 5.982263710618437, "grad_norm": 0.2255859375, "learning_rate": 6.964306073106668e-05, "loss": 0.0038, "step": 12817 }, { "epoch": 5.9827304550758456, "grad_norm": 0.04638671875, "learning_rate": 6.962908002663799e-05, "loss": 0.0003, "step": 12818 }, { "epoch": 5.983197199533255, "grad_norm": 0.0257568359375, "learning_rate": 6.96150999761289e-05, "loss": 0.0004, "step": 12819 }, { "epoch": 5.983663943990665, "grad_norm": 0.0296630859375, "learning_rate": 6.960112057984038e-05, "loss": 0.0003, "step": 12820 }, { "epoch": 5.984130688448075, "grad_norm": 0.08935546875, "learning_rate": 6.958714183807348e-05, "loss": 0.0006, "step": 12821 }, { "epoch": 5.984597432905485, "grad_norm": 0.03955078125, "learning_rate": 6.957316375112919e-05, "loss": 0.002, "step": 12822 }, { "epoch": 5.985064177362894, "grad_norm": 0.0296630859375, "learning_rate": 6.955918631930843e-05, "loss": 0.0003, "step": 12823 }, { "epoch": 5.985530921820303, "grad_norm": 0.11279296875, "learning_rate": 6.954520954291214e-05, "loss": 0.0005, "step": 12824 }, { "epoch": 5.985997666277713, "grad_norm": 0.02734375, "learning_rate": 6.953123342224129e-05, "loss": 0.0003, "step": 12825 }, { "epoch": 5.986464410735122, "grad_norm": 0.173828125, "learning_rate": 6.95172579575968e-05, "loss": 0.0008, "step": 12826 }, { "epoch": 5.986931155192532, "grad_norm": 0.068359375, "learning_rate": 6.950328314927955e-05, "loss": 0.0005, "step": 12827 }, { "epoch": 5.987397899649942, "grad_norm": 0.052001953125, "learning_rate": 6.948930899759048e-05, "loss": 0.0032, "step": 12828 }, { "epoch": 5.9878646441073515, "grad_norm": 0.020751953125, "learning_rate": 6.947533550283041e-05, "loss": 0.0003, "step": 12829 }, { "epoch": 5.98833138856476, "grad_norm": 0.054931640625, "learning_rate": 6.946136266530023e-05, "loss": 0.0004, "step": 12830 }, { "epoch": 5.98879813302217, "grad_norm": 0.0673828125, "learning_rate": 6.944739048530082e-05, "loss": 0.0026, "step": 12831 }, { "epoch": 5.98926487747958, "grad_norm": 0.349609375, "learning_rate": 6.9433418963133e-05, "loss": 0.0013, "step": 12832 }, { "epoch": 5.98973162193699, "grad_norm": 0.054443359375, "learning_rate": 6.941944809909756e-05, "loss": 0.0005, "step": 12833 }, { "epoch": 5.990198366394399, "grad_norm": 0.0142822265625, "learning_rate": 6.940547789349533e-05, "loss": 0.0002, "step": 12834 }, { "epoch": 5.9906651108518085, "grad_norm": 0.07373046875, "learning_rate": 6.939150834662711e-05, "loss": 0.0006, "step": 12835 }, { "epoch": 5.991131855309218, "grad_norm": 0.040283203125, "learning_rate": 6.93775394587937e-05, "loss": 0.0004, "step": 12836 }, { "epoch": 5.991598599766628, "grad_norm": 0.2890625, "learning_rate": 6.936357123029579e-05, "loss": 0.0008, "step": 12837 }, { "epoch": 5.992065344224037, "grad_norm": 0.1904296875, "learning_rate": 6.934960366143424e-05, "loss": 0.001, "step": 12838 }, { "epoch": 5.992532088681447, "grad_norm": 0.2001953125, "learning_rate": 6.93356367525097e-05, "loss": 0.0008, "step": 12839 }, { "epoch": 5.992998833138857, "grad_norm": 0.1376953125, "learning_rate": 6.932167050382294e-05, "loss": 0.0007, "step": 12840 }, { "epoch": 5.993465577596266, "grad_norm": 0.1796875, "learning_rate": 6.930770491567463e-05, "loss": 0.0046, "step": 12841 }, { "epoch": 5.993932322053675, "grad_norm": 0.042236328125, "learning_rate": 6.929373998836552e-05, "loss": 0.0004, "step": 12842 }, { "epoch": 5.994399066511085, "grad_norm": 0.2041015625, "learning_rate": 6.927977572219623e-05, "loss": 0.0014, "step": 12843 }, { "epoch": 5.994865810968495, "grad_norm": 0.0281982421875, "learning_rate": 6.926581211746748e-05, "loss": 0.0003, "step": 12844 }, { "epoch": 5.995332555425904, "grad_norm": 0.05859375, "learning_rate": 6.925184917447987e-05, "loss": 0.0003, "step": 12845 }, { "epoch": 5.995799299883314, "grad_norm": 0.018798828125, "learning_rate": 6.923788689353405e-05, "loss": 0.0002, "step": 12846 }, { "epoch": 5.996266044340723, "grad_norm": 0.08642578125, "learning_rate": 6.922392527493069e-05, "loss": 0.0005, "step": 12847 }, { "epoch": 5.996732788798133, "grad_norm": 0.0179443359375, "learning_rate": 6.920996431897036e-05, "loss": 0.0003, "step": 12848 }, { "epoch": 5.997199533255543, "grad_norm": 0.04541015625, "learning_rate": 6.919600402595364e-05, "loss": 0.0003, "step": 12849 }, { "epoch": 5.997666277712952, "grad_norm": 0.04296875, "learning_rate": 6.918204439618114e-05, "loss": 0.0014, "step": 12850 }, { "epoch": 5.998133022170362, "grad_norm": 0.9296875, "learning_rate": 6.916808542995341e-05, "loss": 0.0193, "step": 12851 }, { "epoch": 5.9985997666277715, "grad_norm": 0.1259765625, "learning_rate": 6.915412712757104e-05, "loss": 0.0005, "step": 12852 }, { "epoch": 5.99906651108518, "grad_norm": 0.158203125, "learning_rate": 6.914016948933448e-05, "loss": 0.0006, "step": 12853 }, { "epoch": 5.99953325554259, "grad_norm": 0.24609375, "learning_rate": 6.912621251554435e-05, "loss": 0.0018, "step": 12854 }, { "epoch": 6.0, "grad_norm": 0.062255859375, "learning_rate": 6.911225620650112e-05, "loss": 0.0004, "step": 12855 }, { "epoch": 6.00046674445741, "grad_norm": 0.0247802734375, "learning_rate": 6.909830056250527e-05, "loss": 0.0003, "step": 12856 }, { "epoch": 6.000933488914819, "grad_norm": 0.384765625, "learning_rate": 6.90843455838573e-05, "loss": 0.0037, "step": 12857 }, { "epoch": 6.0014002333722285, "grad_norm": 0.035400390625, "learning_rate": 6.907039127085769e-05, "loss": 0.0002, "step": 12858 }, { "epoch": 6.001866977829638, "grad_norm": 0.054443359375, "learning_rate": 6.905643762380685e-05, "loss": 0.0019, "step": 12859 }, { "epoch": 6.002333722287048, "grad_norm": 0.11376953125, "learning_rate": 6.904248464300525e-05, "loss": 0.0025, "step": 12860 }, { "epoch": 6.002800466744457, "grad_norm": 0.07275390625, "learning_rate": 6.90285323287533e-05, "loss": 0.0004, "step": 12861 }, { "epoch": 6.003267211201867, "grad_norm": 0.030517578125, "learning_rate": 6.901458068135143e-05, "loss": 0.0003, "step": 12862 }, { "epoch": 6.003733955659277, "grad_norm": 0.1865234375, "learning_rate": 6.900062970109998e-05, "loss": 0.0008, "step": 12863 }, { "epoch": 6.004200700116686, "grad_norm": 0.353515625, "learning_rate": 6.89866793882994e-05, "loss": 0.0067, "step": 12864 }, { "epoch": 6.004200700116686, "eval_loss": 2.306922435760498, "eval_runtime": 56.2567, "eval_samples_per_second": 32.067, "eval_steps_per_second": 4.017, "step": 12864 }, { "epoch": 6.004667444574095, "grad_norm": 0.06787109375, "learning_rate": 6.897272974325e-05, "loss": 0.0004, "step": 12865 }, { "epoch": 6.005134189031505, "grad_norm": 0.04052734375, "learning_rate": 6.895878076625215e-05, "loss": 0.0004, "step": 12866 }, { "epoch": 6.005600933488915, "grad_norm": 0.1748046875, "learning_rate": 6.894483245760621e-05, "loss": 0.0014, "step": 12867 }, { "epoch": 6.006067677946325, "grad_norm": 0.056640625, "learning_rate": 6.893088481761248e-05, "loss": 0.0005, "step": 12868 }, { "epoch": 6.006534422403734, "grad_norm": 0.2412109375, "learning_rate": 6.891693784657127e-05, "loss": 0.0023, "step": 12869 }, { "epoch": 6.007001166861143, "grad_norm": 0.03076171875, "learning_rate": 6.890299154478288e-05, "loss": 0.0003, "step": 12870 }, { "epoch": 6.007467911318553, "grad_norm": 0.12060546875, "learning_rate": 6.888904591254758e-05, "loss": 0.0005, "step": 12871 }, { "epoch": 6.007934655775963, "grad_norm": 0.0260009765625, "learning_rate": 6.887510095016565e-05, "loss": 0.0003, "step": 12872 }, { "epoch": 6.008401400233372, "grad_norm": 0.55078125, "learning_rate": 6.88611566579373e-05, "loss": 0.0045, "step": 12873 }, { "epoch": 6.008868144690782, "grad_norm": 0.05029296875, "learning_rate": 6.884721303616282e-05, "loss": 0.0004, "step": 12874 }, { "epoch": 6.0093348891481915, "grad_norm": 0.033935546875, "learning_rate": 6.883327008514241e-05, "loss": 0.0003, "step": 12875 }, { "epoch": 6.009801633605601, "grad_norm": 0.0303955078125, "learning_rate": 6.881932780517625e-05, "loss": 0.001, "step": 12876 }, { "epoch": 6.01026837806301, "grad_norm": 0.0291748046875, "learning_rate": 6.880538619656459e-05, "loss": 0.0004, "step": 12877 }, { "epoch": 6.01073512252042, "grad_norm": 0.31640625, "learning_rate": 6.879144525960757e-05, "loss": 0.0039, "step": 12878 }, { "epoch": 6.01120186697783, "grad_norm": 0.0308837890625, "learning_rate": 6.877750499460533e-05, "loss": 0.0003, "step": 12879 }, { "epoch": 6.01166861143524, "grad_norm": 0.0306396484375, "learning_rate": 6.876356540185808e-05, "loss": 0.0003, "step": 12880 }, { "epoch": 6.0121353558926485, "grad_norm": 0.02001953125, "learning_rate": 6.874962648166593e-05, "loss": 0.0003, "step": 12881 }, { "epoch": 6.012602100350058, "grad_norm": 0.01470947265625, "learning_rate": 6.873568823432898e-05, "loss": 0.0002, "step": 12882 }, { "epoch": 6.013068844807468, "grad_norm": 0.302734375, "learning_rate": 6.872175066014734e-05, "loss": 0.0011, "step": 12883 }, { "epoch": 6.013535589264878, "grad_norm": 0.0208740234375, "learning_rate": 6.870781375942114e-05, "loss": 0.0003, "step": 12884 }, { "epoch": 6.014002333722287, "grad_norm": 0.1884765625, "learning_rate": 6.869387753245042e-05, "loss": 0.0012, "step": 12885 }, { "epoch": 6.014469078179697, "grad_norm": 0.2412109375, "learning_rate": 6.867994197953522e-05, "loss": 0.0033, "step": 12886 }, { "epoch": 6.014935822637106, "grad_norm": 0.01708984375, "learning_rate": 6.866600710097566e-05, "loss": 0.0003, "step": 12887 }, { "epoch": 6.015402567094516, "grad_norm": 0.050537109375, "learning_rate": 6.865207289707171e-05, "loss": 0.0011, "step": 12888 }, { "epoch": 6.015869311551925, "grad_norm": 0.0211181640625, "learning_rate": 6.863813936812342e-05, "loss": 0.0003, "step": 12889 }, { "epoch": 6.016336056009335, "grad_norm": 0.01806640625, "learning_rate": 6.862420651443083e-05, "loss": 0.0003, "step": 12890 }, { "epoch": 6.016802800466745, "grad_norm": 0.0224609375, "learning_rate": 6.861027433629382e-05, "loss": 0.0002, "step": 12891 }, { "epoch": 6.0172695449241544, "grad_norm": 0.029296875, "learning_rate": 6.859634283401246e-05, "loss": 0.0003, "step": 12892 }, { "epoch": 6.017736289381563, "grad_norm": 0.0286865234375, "learning_rate": 6.858241200788669e-05, "loss": 0.0003, "step": 12893 }, { "epoch": 6.018203033838973, "grad_norm": 0.06298828125, "learning_rate": 6.856848185821643e-05, "loss": 0.0032, "step": 12894 }, { "epoch": 6.018669778296383, "grad_norm": 0.470703125, "learning_rate": 6.855455238530162e-05, "loss": 0.0023, "step": 12895 }, { "epoch": 6.019136522753793, "grad_norm": 0.020263671875, "learning_rate": 6.85406235894422e-05, "loss": 0.0003, "step": 12896 }, { "epoch": 6.019603267211202, "grad_norm": 0.0341796875, "learning_rate": 6.852669547093807e-05, "loss": 0.0004, "step": 12897 }, { "epoch": 6.020070011668611, "grad_norm": 0.0771484375, "learning_rate": 6.851276803008907e-05, "loss": 0.0007, "step": 12898 }, { "epoch": 6.020536756126021, "grad_norm": 0.03173828125, "learning_rate": 6.849884126719514e-05, "loss": 0.0004, "step": 12899 }, { "epoch": 6.02100350058343, "grad_norm": 0.1982421875, "learning_rate": 6.848491518255611e-05, "loss": 0.001, "step": 12900 }, { "epoch": 6.02147024504084, "grad_norm": 0.07275390625, "learning_rate": 6.847098977647179e-05, "loss": 0.0005, "step": 12901 }, { "epoch": 6.02193698949825, "grad_norm": 0.21875, "learning_rate": 6.845706504924207e-05, "loss": 0.0044, "step": 12902 }, { "epoch": 6.0224037339556595, "grad_norm": 0.154296875, "learning_rate": 6.844314100116675e-05, "loss": 0.0109, "step": 12903 }, { "epoch": 6.022870478413068, "grad_norm": 0.09619140625, "learning_rate": 6.84292176325456e-05, "loss": 0.0004, "step": 12904 }, { "epoch": 6.023337222870478, "grad_norm": 0.05810546875, "learning_rate": 6.841529494367842e-05, "loss": 0.0005, "step": 12905 }, { "epoch": 6.023803967327888, "grad_norm": 0.0390625, "learning_rate": 6.8401372934865e-05, "loss": 0.0004, "step": 12906 }, { "epoch": 6.024270711785298, "grad_norm": 0.1337890625, "learning_rate": 6.838745160640506e-05, "loss": 0.0008, "step": 12907 }, { "epoch": 6.024737456242707, "grad_norm": 0.041015625, "learning_rate": 6.837353095859835e-05, "loss": 0.0004, "step": 12908 }, { "epoch": 6.0252042007001165, "grad_norm": 0.0654296875, "learning_rate": 6.835961099174464e-05, "loss": 0.0005, "step": 12909 }, { "epoch": 6.025670945157526, "grad_norm": 0.043212890625, "learning_rate": 6.83456917061436e-05, "loss": 0.0004, "step": 12910 }, { "epoch": 6.026137689614936, "grad_norm": 0.06298828125, "learning_rate": 6.833177310209495e-05, "loss": 0.0004, "step": 12911 }, { "epoch": 6.026604434072345, "grad_norm": 0.185546875, "learning_rate": 6.831785517989832e-05, "loss": 0.0013, "step": 12912 }, { "epoch": 6.027071178529755, "grad_norm": 0.0311279296875, "learning_rate": 6.830393793985346e-05, "loss": 0.0004, "step": 12913 }, { "epoch": 6.027537922987165, "grad_norm": 0.259765625, "learning_rate": 6.829002138225999e-05, "loss": 0.0046, "step": 12914 }, { "epoch": 6.028004667444574, "grad_norm": 0.0203857421875, "learning_rate": 6.82761055074175e-05, "loss": 0.0003, "step": 12915 }, { "epoch": 6.028471411901983, "grad_norm": 0.052001953125, "learning_rate": 6.826219031562569e-05, "loss": 0.0004, "step": 12916 }, { "epoch": 6.028938156359393, "grad_norm": 0.107421875, "learning_rate": 6.824827580718414e-05, "loss": 0.0007, "step": 12917 }, { "epoch": 6.029404900816803, "grad_norm": 0.04736328125, "learning_rate": 6.823436198239241e-05, "loss": 0.0045, "step": 12918 }, { "epoch": 6.029871645274213, "grad_norm": 0.099609375, "learning_rate": 6.822044884155013e-05, "loss": 0.0005, "step": 12919 }, { "epoch": 6.030338389731622, "grad_norm": 0.1064453125, "learning_rate": 6.820653638495685e-05, "loss": 0.0009, "step": 12920 }, { "epoch": 6.030805134189031, "grad_norm": 0.11572265625, "learning_rate": 6.819262461291212e-05, "loss": 0.0009, "step": 12921 }, { "epoch": 6.031271878646441, "grad_norm": 0.12158203125, "learning_rate": 6.817871352571545e-05, "loss": 0.0006, "step": 12922 }, { "epoch": 6.031738623103851, "grad_norm": 0.09033203125, "learning_rate": 6.816480312366641e-05, "loss": 0.0025, "step": 12923 }, { "epoch": 6.03220536756126, "grad_norm": 0.1796875, "learning_rate": 6.815089340706448e-05, "loss": 0.0031, "step": 12924 }, { "epoch": 6.03267211201867, "grad_norm": 0.08837890625, "learning_rate": 6.813698437620913e-05, "loss": 0.0024, "step": 12925 }, { "epoch": 6.0331388564760795, "grad_norm": 0.1005859375, "learning_rate": 6.812307603139987e-05, "loss": 0.0006, "step": 12926 }, { "epoch": 6.033605600933489, "grad_norm": 0.109375, "learning_rate": 6.810916837293617e-05, "loss": 0.0006, "step": 12927 }, { "epoch": 6.034072345390898, "grad_norm": 0.035888671875, "learning_rate": 6.809526140111742e-05, "loss": 0.0003, "step": 12928 }, { "epoch": 6.034539089848308, "grad_norm": 0.07958984375, "learning_rate": 6.808135511624312e-05, "loss": 0.0004, "step": 12929 }, { "epoch": 6.035005834305718, "grad_norm": 0.05517578125, "learning_rate": 6.806744951861265e-05, "loss": 0.0004, "step": 12930 }, { "epoch": 6.035472578763128, "grad_norm": 0.026611328125, "learning_rate": 6.805354460852544e-05, "loss": 0.0003, "step": 12931 }, { "epoch": 6.0359393232205365, "grad_norm": 0.33984375, "learning_rate": 6.803964038628082e-05, "loss": 0.0008, "step": 12932 }, { "epoch": 6.036406067677946, "grad_norm": 0.0242919921875, "learning_rate": 6.802573685217823e-05, "loss": 0.0003, "step": 12933 }, { "epoch": 6.036872812135356, "grad_norm": 0.029296875, "learning_rate": 6.8011834006517e-05, "loss": 0.0004, "step": 12934 }, { "epoch": 6.037339556592766, "grad_norm": 0.055419921875, "learning_rate": 6.799793184959646e-05, "loss": 0.0004, "step": 12935 }, { "epoch": 6.037806301050175, "grad_norm": 0.173828125, "learning_rate": 6.798403038171597e-05, "loss": 0.0031, "step": 12936 }, { "epoch": 6.038273045507585, "grad_norm": 0.0458984375, "learning_rate": 6.797012960317483e-05, "loss": 0.0026, "step": 12937 }, { "epoch": 6.038739789964994, "grad_norm": 0.050537109375, "learning_rate": 6.795622951427236e-05, "loss": 0.0004, "step": 12938 }, { "epoch": 6.039206534422403, "grad_norm": 0.1611328125, "learning_rate": 6.79423301153078e-05, "loss": 0.0016, "step": 12939 }, { "epoch": 6.039673278879813, "grad_norm": 0.039306640625, "learning_rate": 6.792843140658043e-05, "loss": 0.0003, "step": 12940 }, { "epoch": 6.040140023337223, "grad_norm": 0.01007080078125, "learning_rate": 6.791453338838953e-05, "loss": 0.0002, "step": 12941 }, { "epoch": 6.040606767794633, "grad_norm": 0.061279296875, "learning_rate": 6.790063606103431e-05, "loss": 0.0004, "step": 12942 }, { "epoch": 6.041073512252042, "grad_norm": 0.05810546875, "learning_rate": 6.788673942481403e-05, "loss": 0.0004, "step": 12943 }, { "epoch": 6.041540256709451, "grad_norm": 0.042236328125, "learning_rate": 6.787284348002785e-05, "loss": 0.0004, "step": 12944 }, { "epoch": 6.042007001166861, "grad_norm": 0.134765625, "learning_rate": 6.785894822697503e-05, "loss": 0.0018, "step": 12945 }, { "epoch": 6.042473745624271, "grad_norm": 0.05615234375, "learning_rate": 6.784505366595471e-05, "loss": 0.0039, "step": 12946 }, { "epoch": 6.04294049008168, "grad_norm": 0.0186767578125, "learning_rate": 6.783115979726602e-05, "loss": 0.0002, "step": 12947 }, { "epoch": 6.04340723453909, "grad_norm": 0.27734375, "learning_rate": 6.78172666212082e-05, "loss": 0.0021, "step": 12948 }, { "epoch": 6.0438739789964995, "grad_norm": 0.0361328125, "learning_rate": 6.780337413808031e-05, "loss": 0.0019, "step": 12949 }, { "epoch": 6.044340723453909, "grad_norm": 0.061767578125, "learning_rate": 6.77894823481815e-05, "loss": 0.0006, "step": 12950 }, { "epoch": 6.044807467911318, "grad_norm": 0.0159912109375, "learning_rate": 6.777559125181088e-05, "loss": 0.0003, "step": 12951 }, { "epoch": 6.045274212368728, "grad_norm": 0.0322265625, "learning_rate": 6.776170084926754e-05, "loss": 0.0003, "step": 12952 }, { "epoch": 6.045740956826138, "grad_norm": 0.09521484375, "learning_rate": 6.774781114085056e-05, "loss": 0.0039, "step": 12953 }, { "epoch": 6.046207701283548, "grad_norm": 0.031005859375, "learning_rate": 6.773392212685894e-05, "loss": 0.0004, "step": 12954 }, { "epoch": 6.0466744457409565, "grad_norm": 0.291015625, "learning_rate": 6.772003380759183e-05, "loss": 0.001, "step": 12955 }, { "epoch": 6.047141190198366, "grad_norm": 0.03173828125, "learning_rate": 6.770614618334819e-05, "loss": 0.0002, "step": 12956 }, { "epoch": 6.047607934655776, "grad_norm": 0.032470703125, "learning_rate": 6.769225925442704e-05, "loss": 0.0004, "step": 12957 }, { "epoch": 6.048074679113186, "grad_norm": 0.056396484375, "learning_rate": 6.767837302112741e-05, "loss": 0.0004, "step": 12958 }, { "epoch": 6.048541423570595, "grad_norm": 0.03173828125, "learning_rate": 6.766448748374827e-05, "loss": 0.0003, "step": 12959 }, { "epoch": 6.049008168028005, "grad_norm": 0.0218505859375, "learning_rate": 6.765060264258857e-05, "loss": 0.0003, "step": 12960 }, { "epoch": 6.049474912485414, "grad_norm": 0.05615234375, "learning_rate": 6.763671849794731e-05, "loss": 0.0004, "step": 12961 }, { "epoch": 6.049941656942824, "grad_norm": 0.212890625, "learning_rate": 6.76228350501234e-05, "loss": 0.0031, "step": 12962 }, { "epoch": 6.050408401400233, "grad_norm": 0.047607421875, "learning_rate": 6.760895229941579e-05, "loss": 0.0003, "step": 12963 }, { "epoch": 6.050875145857643, "grad_norm": 0.0263671875, "learning_rate": 6.759507024612335e-05, "loss": 0.0002, "step": 12964 }, { "epoch": 6.051341890315053, "grad_norm": 0.1669921875, "learning_rate": 6.758118889054503e-05, "loss": 0.0026, "step": 12965 }, { "epoch": 6.0518086347724624, "grad_norm": 0.046875, "learning_rate": 6.756730823297967e-05, "loss": 0.0003, "step": 12966 }, { "epoch": 6.052275379229871, "grad_norm": 0.043701171875, "learning_rate": 6.755342827372613e-05, "loss": 0.0004, "step": 12967 }, { "epoch": 6.052742123687281, "grad_norm": 0.020751953125, "learning_rate": 6.753954901308329e-05, "loss": 0.0003, "step": 12968 }, { "epoch": 6.053208868144691, "grad_norm": 0.02001953125, "learning_rate": 6.752567045134998e-05, "loss": 0.0003, "step": 12969 }, { "epoch": 6.053675612602101, "grad_norm": 0.046630859375, "learning_rate": 6.751179258882503e-05, "loss": 0.0003, "step": 12970 }, { "epoch": 6.05414235705951, "grad_norm": 0.0113525390625, "learning_rate": 6.74979154258072e-05, "loss": 0.0002, "step": 12971 }, { "epoch": 6.0546091015169194, "grad_norm": 0.03515625, "learning_rate": 6.748403896259534e-05, "loss": 0.0004, "step": 12972 }, { "epoch": 6.055075845974329, "grad_norm": 0.039306640625, "learning_rate": 6.74701631994882e-05, "loss": 0.0003, "step": 12973 }, { "epoch": 6.055542590431739, "grad_norm": 0.040771484375, "learning_rate": 6.74562881367845e-05, "loss": 0.0004, "step": 12974 }, { "epoch": 6.056009334889148, "grad_norm": 0.0157470703125, "learning_rate": 6.744241377478307e-05, "loss": 0.0003, "step": 12975 }, { "epoch": 6.056476079346558, "grad_norm": 0.024169921875, "learning_rate": 6.742854011378256e-05, "loss": 0.0002, "step": 12976 }, { "epoch": 6.0569428238039675, "grad_norm": 0.224609375, "learning_rate": 6.741466715408174e-05, "loss": 0.0014, "step": 12977 }, { "epoch": 6.057409568261377, "grad_norm": 0.03271484375, "learning_rate": 6.740079489597929e-05, "loss": 0.0003, "step": 12978 }, { "epoch": 6.057876312718786, "grad_norm": 0.0166015625, "learning_rate": 6.73869233397739e-05, "loss": 0.0002, "step": 12979 }, { "epoch": 6.058343057176196, "grad_norm": 0.033203125, "learning_rate": 6.737305248576424e-05, "loss": 0.0003, "step": 12980 }, { "epoch": 6.058809801633606, "grad_norm": 0.04736328125, "learning_rate": 6.735918233424891e-05, "loss": 0.0002, "step": 12981 }, { "epoch": 6.059276546091015, "grad_norm": 0.06201171875, "learning_rate": 6.734531288552665e-05, "loss": 0.0026, "step": 12982 }, { "epoch": 6.0597432905484245, "grad_norm": 0.04931640625, "learning_rate": 6.733144413989603e-05, "loss": 0.0003, "step": 12983 }, { "epoch": 6.060210035005834, "grad_norm": 0.1826171875, "learning_rate": 6.731757609765564e-05, "loss": 0.0008, "step": 12984 }, { "epoch": 6.060676779463244, "grad_norm": 0.060302734375, "learning_rate": 6.730370875910412e-05, "loss": 0.0004, "step": 12985 }, { "epoch": 6.061143523920653, "grad_norm": 0.026123046875, "learning_rate": 6.728984212454006e-05, "loss": 0.0002, "step": 12986 }, { "epoch": 6.061610268378063, "grad_norm": 0.0478515625, "learning_rate": 6.727597619426196e-05, "loss": 0.0004, "step": 12987 }, { "epoch": 6.062077012835473, "grad_norm": 0.0277099609375, "learning_rate": 6.72621109685684e-05, "loss": 0.0003, "step": 12988 }, { "epoch": 6.062543757292882, "grad_norm": 0.0301513671875, "learning_rate": 6.724824644775789e-05, "loss": 0.0003, "step": 12989 }, { "epoch": 6.063010501750291, "grad_norm": 0.041259765625, "learning_rate": 6.723438263212898e-05, "loss": 0.0004, "step": 12990 }, { "epoch": 6.063477246207701, "grad_norm": 0.0167236328125, "learning_rate": 6.72205195219802e-05, "loss": 0.0003, "step": 12991 }, { "epoch": 6.063943990665111, "grad_norm": 0.068359375, "learning_rate": 6.720665711760998e-05, "loss": 0.0003, "step": 12992 }, { "epoch": 6.064410735122521, "grad_norm": 0.05322265625, "learning_rate": 6.71927954193168e-05, "loss": 0.0028, "step": 12993 }, { "epoch": 6.06487747957993, "grad_norm": 0.047119140625, "learning_rate": 6.717893442739916e-05, "loss": 0.0004, "step": 12994 }, { "epoch": 6.065344224037339, "grad_norm": 0.056640625, "learning_rate": 6.716507414215548e-05, "loss": 0.0023, "step": 12995 }, { "epoch": 6.065810968494749, "grad_norm": 0.031494140625, "learning_rate": 6.715121456388415e-05, "loss": 0.0004, "step": 12996 }, { "epoch": 6.066277712952159, "grad_norm": 0.016357421875, "learning_rate": 6.713735569288367e-05, "loss": 0.0002, "step": 12997 }, { "epoch": 6.066744457409568, "grad_norm": 0.0322265625, "learning_rate": 6.712349752945236e-05, "loss": 0.0004, "step": 12998 }, { "epoch": 6.067211201866978, "grad_norm": 0.04443359375, "learning_rate": 6.71096400738886e-05, "loss": 0.0019, "step": 12999 }, { "epoch": 6.0676779463243875, "grad_norm": 0.036865234375, "learning_rate": 6.70957833264908e-05, "loss": 0.0003, "step": 13000 }, { "epoch": 6.068144690781797, "grad_norm": 0.037109375, "learning_rate": 6.708192728755732e-05, "loss": 0.0002, "step": 13001 }, { "epoch": 6.068611435239206, "grad_norm": 0.02880859375, "learning_rate": 6.706807195738644e-05, "loss": 0.0026, "step": 13002 }, { "epoch": 6.069078179696616, "grad_norm": 0.0169677734375, "learning_rate": 6.70542173362765e-05, "loss": 0.0002, "step": 13003 }, { "epoch": 6.069544924154026, "grad_norm": 0.040283203125, "learning_rate": 6.704036342452582e-05, "loss": 0.0004, "step": 13004 }, { "epoch": 6.070011668611436, "grad_norm": 0.234375, "learning_rate": 6.702651022243271e-05, "loss": 0.0007, "step": 13005 }, { "epoch": 6.0704784130688445, "grad_norm": 0.1201171875, "learning_rate": 6.701265773029539e-05, "loss": 0.0007, "step": 13006 }, { "epoch": 6.070945157526254, "grad_norm": 0.09228515625, "learning_rate": 6.699880594841216e-05, "loss": 0.0006, "step": 13007 }, { "epoch": 6.071411901983664, "grad_norm": 0.24609375, "learning_rate": 6.698495487708126e-05, "loss": 0.0009, "step": 13008 }, { "epoch": 6.071878646441074, "grad_norm": 0.046142578125, "learning_rate": 6.697110451660089e-05, "loss": 0.0004, "step": 13009 }, { "epoch": 6.072345390898483, "grad_norm": 0.02587890625, "learning_rate": 6.695725486726931e-05, "loss": 0.0003, "step": 13010 }, { "epoch": 6.072812135355893, "grad_norm": 0.0322265625, "learning_rate": 6.69434059293847e-05, "loss": 0.0004, "step": 13011 }, { "epoch": 6.073278879813302, "grad_norm": 0.038818359375, "learning_rate": 6.692955770324522e-05, "loss": 0.0003, "step": 13012 }, { "epoch": 6.073745624270712, "grad_norm": 0.1884765625, "learning_rate": 6.691571018914904e-05, "loss": 0.0008, "step": 13013 }, { "epoch": 6.074212368728121, "grad_norm": 0.1474609375, "learning_rate": 6.690186338739436e-05, "loss": 0.0008, "step": 13014 }, { "epoch": 6.074679113185531, "grad_norm": 0.07470703125, "learning_rate": 6.688801729827928e-05, "loss": 0.0004, "step": 13015 }, { "epoch": 6.075145857642941, "grad_norm": 0.054443359375, "learning_rate": 6.68741719221019e-05, "loss": 0.0018, "step": 13016 }, { "epoch": 6.0756126021003505, "grad_norm": 0.059326171875, "learning_rate": 6.686032725916038e-05, "loss": 0.0003, "step": 13017 }, { "epoch": 6.076079346557759, "grad_norm": 0.032470703125, "learning_rate": 6.684648330975278e-05, "loss": 0.0003, "step": 13018 }, { "epoch": 6.076546091015169, "grad_norm": 0.10693359375, "learning_rate": 6.683264007417717e-05, "loss": 0.0005, "step": 13019 }, { "epoch": 6.077012835472579, "grad_norm": 0.0130615234375, "learning_rate": 6.681879755273162e-05, "loss": 0.0002, "step": 13020 }, { "epoch": 6.077479579929989, "grad_norm": 0.296875, "learning_rate": 6.680495574571417e-05, "loss": 0.0023, "step": 13021 }, { "epoch": 6.077946324387398, "grad_norm": 0.0157470703125, "learning_rate": 6.679111465342287e-05, "loss": 0.0002, "step": 13022 }, { "epoch": 6.0784130688448075, "grad_norm": 0.0986328125, "learning_rate": 6.677727427615569e-05, "loss": 0.0004, "step": 13023 }, { "epoch": 6.078879813302217, "grad_norm": 0.0419921875, "learning_rate": 6.676343461421068e-05, "loss": 0.0003, "step": 13024 }, { "epoch": 6.079346557759626, "grad_norm": 0.044189453125, "learning_rate": 6.674959566788579e-05, "loss": 0.0003, "step": 13025 }, { "epoch": 6.079813302217036, "grad_norm": 0.0120849609375, "learning_rate": 6.673575743747898e-05, "loss": 0.0002, "step": 13026 }, { "epoch": 6.080280046674446, "grad_norm": 0.0211181640625, "learning_rate": 6.672191992328825e-05, "loss": 0.0002, "step": 13027 }, { "epoch": 6.080746791131856, "grad_norm": 0.1328125, "learning_rate": 6.67080831256115e-05, "loss": 0.0008, "step": 13028 }, { "epoch": 6.0812135355892645, "grad_norm": 0.0247802734375, "learning_rate": 6.669424704474665e-05, "loss": 0.0003, "step": 13029 }, { "epoch": 6.081680280046674, "grad_norm": 0.05322265625, "learning_rate": 6.66804116809916e-05, "loss": 0.0003, "step": 13030 }, { "epoch": 6.082147024504084, "grad_norm": 0.047119140625, "learning_rate": 6.666657703464428e-05, "loss": 0.0028, "step": 13031 }, { "epoch": 6.082613768961494, "grad_norm": 0.0400390625, "learning_rate": 6.665274310600254e-05, "loss": 0.0002, "step": 13032 }, { "epoch": 6.083080513418903, "grad_norm": 0.0247802734375, "learning_rate": 6.663890989536426e-05, "loss": 0.0003, "step": 13033 }, { "epoch": 6.083547257876313, "grad_norm": 0.197265625, "learning_rate": 6.662507740302725e-05, "loss": 0.0009, "step": 13034 }, { "epoch": 6.084014002333722, "grad_norm": 0.01953125, "learning_rate": 6.661124562928933e-05, "loss": 0.0002, "step": 13035 }, { "epoch": 6.084480746791132, "grad_norm": 0.037109375, "learning_rate": 6.659741457444835e-05, "loss": 0.0002, "step": 13036 }, { "epoch": 6.084947491248541, "grad_norm": 0.032470703125, "learning_rate": 6.65835842388021e-05, "loss": 0.0026, "step": 13037 }, { "epoch": 6.085414235705951, "grad_norm": 0.10888671875, "learning_rate": 6.656975462264834e-05, "loss": 0.0039, "step": 13038 }, { "epoch": 6.085880980163361, "grad_norm": 0.0247802734375, "learning_rate": 6.655592572628486e-05, "loss": 0.0002, "step": 13039 }, { "epoch": 6.0863477246207704, "grad_norm": 0.059326171875, "learning_rate": 6.654209755000941e-05, "loss": 0.0024, "step": 13040 }, { "epoch": 6.086814469078179, "grad_norm": 0.02685546875, "learning_rate": 6.652827009411974e-05, "loss": 0.0003, "step": 13041 }, { "epoch": 6.087281213535589, "grad_norm": 0.08935546875, "learning_rate": 6.651444335891353e-05, "loss": 0.0022, "step": 13042 }, { "epoch": 6.087747957992999, "grad_norm": 0.030029296875, "learning_rate": 6.650061734468852e-05, "loss": 0.0019, "step": 13043 }, { "epoch": 6.088214702450409, "grad_norm": 0.60546875, "learning_rate": 6.648679205174238e-05, "loss": 0.0016, "step": 13044 }, { "epoch": 6.088681446907818, "grad_norm": 0.0203857421875, "learning_rate": 6.647296748037278e-05, "loss": 0.0002, "step": 13045 }, { "epoch": 6.0891481913652274, "grad_norm": 0.01080322265625, "learning_rate": 6.64591436308774e-05, "loss": 0.0002, "step": 13046 }, { "epoch": 6.089614935822637, "grad_norm": 0.048583984375, "learning_rate": 6.644532050355388e-05, "loss": 0.0004, "step": 13047 }, { "epoch": 6.090081680280047, "grad_norm": 0.046142578125, "learning_rate": 6.643149809869982e-05, "loss": 0.0004, "step": 13048 }, { "epoch": 6.090548424737456, "grad_norm": 0.0284423828125, "learning_rate": 6.641767641661286e-05, "loss": 0.0003, "step": 13049 }, { "epoch": 6.091015169194866, "grad_norm": 0.037841796875, "learning_rate": 6.64038554575906e-05, "loss": 0.0003, "step": 13050 }, { "epoch": 6.0914819136522755, "grad_norm": 0.1025390625, "learning_rate": 6.639003522193059e-05, "loss": 0.0004, "step": 13051 }, { "epoch": 6.091948658109685, "grad_norm": 0.134765625, "learning_rate": 6.637621570993042e-05, "loss": 0.0008, "step": 13052 }, { "epoch": 6.092415402567094, "grad_norm": 0.051025390625, "learning_rate": 6.636239692188762e-05, "loss": 0.0006, "step": 13053 }, { "epoch": 6.092882147024504, "grad_norm": 0.044677734375, "learning_rate": 6.634857885809975e-05, "loss": 0.0004, "step": 13054 }, { "epoch": 6.093348891481914, "grad_norm": 0.0087890625, "learning_rate": 6.63347615188643e-05, "loss": 0.0002, "step": 13055 }, { "epoch": 6.093815635939324, "grad_norm": 0.05029296875, "learning_rate": 6.632094490447881e-05, "loss": 0.0037, "step": 13056 }, { "epoch": 6.0942823803967325, "grad_norm": 0.0947265625, "learning_rate": 6.630712901524074e-05, "loss": 0.0004, "step": 13057 }, { "epoch": 6.094749124854142, "grad_norm": 0.018310546875, "learning_rate": 6.629331385144754e-05, "loss": 0.0002, "step": 13058 }, { "epoch": 6.095215869311552, "grad_norm": 0.032470703125, "learning_rate": 6.627949941339671e-05, "loss": 0.0003, "step": 13059 }, { "epoch": 6.095682613768962, "grad_norm": 0.0185546875, "learning_rate": 6.626568570138568e-05, "loss": 0.0002, "step": 13060 }, { "epoch": 6.096149358226371, "grad_norm": 0.0205078125, "learning_rate": 6.625187271571184e-05, "loss": 0.0002, "step": 13061 }, { "epoch": 6.096616102683781, "grad_norm": 0.064453125, "learning_rate": 6.623806045667265e-05, "loss": 0.002, "step": 13062 }, { "epoch": 6.09708284714119, "grad_norm": 0.1904296875, "learning_rate": 6.622424892456547e-05, "loss": 0.0011, "step": 13063 }, { "epoch": 6.0975495915986, "grad_norm": 0.0311279296875, "learning_rate": 6.621043811968768e-05, "loss": 0.0003, "step": 13064 }, { "epoch": 6.098016336056009, "grad_norm": 0.026123046875, "learning_rate": 6.619662804233664e-05, "loss": 0.0003, "step": 13065 }, { "epoch": 6.098483080513419, "grad_norm": 0.0281982421875, "learning_rate": 6.618281869280971e-05, "loss": 0.0003, "step": 13066 }, { "epoch": 6.098949824970829, "grad_norm": 0.057861328125, "learning_rate": 6.616901007140422e-05, "loss": 0.0009, "step": 13067 }, { "epoch": 6.099416569428238, "grad_norm": 0.0869140625, "learning_rate": 6.615520217841746e-05, "loss": 0.0003, "step": 13068 }, { "epoch": 6.099883313885647, "grad_norm": 0.030029296875, "learning_rate": 6.614139501414677e-05, "loss": 0.0003, "step": 13069 }, { "epoch": 6.100350058343057, "grad_norm": 0.03955078125, "learning_rate": 6.612758857888942e-05, "loss": 0.0003, "step": 13070 }, { "epoch": 6.100816802800467, "grad_norm": 0.41796875, "learning_rate": 6.611378287294267e-05, "loss": 0.0013, "step": 13071 }, { "epoch": 6.101283547257876, "grad_norm": 0.119140625, "learning_rate": 6.609997789660373e-05, "loss": 0.0026, "step": 13072 }, { "epoch": 6.101750291715286, "grad_norm": 0.119140625, "learning_rate": 6.608617365016993e-05, "loss": 0.0006, "step": 13073 }, { "epoch": 6.1022170361726955, "grad_norm": 0.04833984375, "learning_rate": 6.607237013393842e-05, "loss": 0.0004, "step": 13074 }, { "epoch": 6.102683780630105, "grad_norm": 0.0174560546875, "learning_rate": 6.605856734820642e-05, "loss": 0.0003, "step": 13075 }, { "epoch": 6.103150525087514, "grad_norm": 0.05859375, "learning_rate": 6.604476529327112e-05, "loss": 0.0031, "step": 13076 }, { "epoch": 6.103617269544924, "grad_norm": 0.0400390625, "learning_rate": 6.603096396942971e-05, "loss": 0.0004, "step": 13077 }, { "epoch": 6.104084014002334, "grad_norm": 0.032470703125, "learning_rate": 6.601716337697933e-05, "loss": 0.0003, "step": 13078 }, { "epoch": 6.104550758459744, "grad_norm": 0.01495361328125, "learning_rate": 6.600336351621711e-05, "loss": 0.0002, "step": 13079 }, { "epoch": 6.1050175029171525, "grad_norm": 0.07080078125, "learning_rate": 6.598956438744022e-05, "loss": 0.0005, "step": 13080 }, { "epoch": 6.105484247374562, "grad_norm": 0.048095703125, "learning_rate": 6.597576599094575e-05, "loss": 0.0003, "step": 13081 }, { "epoch": 6.105950991831972, "grad_norm": 0.043701171875, "learning_rate": 6.596196832703078e-05, "loss": 0.0005, "step": 13082 }, { "epoch": 6.106417736289382, "grad_norm": 0.1484375, "learning_rate": 6.59481713959924e-05, "loss": 0.0024, "step": 13083 }, { "epoch": 6.106884480746791, "grad_norm": 0.021484375, "learning_rate": 6.593437519812764e-05, "loss": 0.0003, "step": 13084 }, { "epoch": 6.107351225204201, "grad_norm": 0.126953125, "learning_rate": 6.59205797337336e-05, "loss": 0.0005, "step": 13085 }, { "epoch": 6.10781796966161, "grad_norm": 0.0311279296875, "learning_rate": 6.590678500310729e-05, "loss": 0.0003, "step": 13086 }, { "epoch": 6.10828471411902, "grad_norm": 0.314453125, "learning_rate": 6.589299100654572e-05, "loss": 0.0021, "step": 13087 }, { "epoch": 6.108751458576429, "grad_norm": 0.019775390625, "learning_rate": 6.58791977443459e-05, "loss": 0.0003, "step": 13088 }, { "epoch": 6.109218203033839, "grad_norm": 0.07763671875, "learning_rate": 6.586540521680482e-05, "loss": 0.0031, "step": 13089 }, { "epoch": 6.109684947491249, "grad_norm": 0.6875, "learning_rate": 6.585161342421944e-05, "loss": 0.0015, "step": 13090 }, { "epoch": 6.1101516919486585, "grad_norm": 0.0615234375, "learning_rate": 6.583782236688669e-05, "loss": 0.0008, "step": 13091 }, { "epoch": 6.110618436406067, "grad_norm": 0.01153564453125, "learning_rate": 6.582403204510355e-05, "loss": 0.0002, "step": 13092 }, { "epoch": 6.111085180863477, "grad_norm": 0.034423828125, "learning_rate": 6.58102424591669e-05, "loss": 0.0003, "step": 13093 }, { "epoch": 6.111551925320887, "grad_norm": 0.0830078125, "learning_rate": 6.579645360937366e-05, "loss": 0.0006, "step": 13094 }, { "epoch": 6.112018669778297, "grad_norm": 0.80078125, "learning_rate": 6.578266549602075e-05, "loss": 0.0114, "step": 13095 }, { "epoch": 6.112485414235706, "grad_norm": 0.041015625, "learning_rate": 6.5768878119405e-05, "loss": 0.0003, "step": 13096 }, { "epoch": 6.1129521586931155, "grad_norm": 0.068359375, "learning_rate": 6.575509147982327e-05, "loss": 0.0035, "step": 13097 }, { "epoch": 6.113418903150525, "grad_norm": 0.78125, "learning_rate": 6.574130557757243e-05, "loss": 0.0033, "step": 13098 }, { "epoch": 6.113885647607935, "grad_norm": 0.035888671875, "learning_rate": 6.572752041294931e-05, "loss": 0.0004, "step": 13099 }, { "epoch": 6.114352392065344, "grad_norm": 0.26953125, "learning_rate": 6.571373598625068e-05, "loss": 0.0021, "step": 13100 }, { "epoch": 6.114819136522754, "grad_norm": 0.091796875, "learning_rate": 6.569995229777335e-05, "loss": 0.0007, "step": 13101 }, { "epoch": 6.115285880980164, "grad_norm": 0.125, "learning_rate": 6.568616934781413e-05, "loss": 0.0031, "step": 13102 }, { "epoch": 6.115752625437573, "grad_norm": 0.1318359375, "learning_rate": 6.567238713666974e-05, "loss": 0.0009, "step": 13103 }, { "epoch": 6.116219369894982, "grad_norm": 0.033203125, "learning_rate": 6.565860566463693e-05, "loss": 0.0018, "step": 13104 }, { "epoch": 6.116686114352392, "grad_norm": 0.2109375, "learning_rate": 6.564482493201245e-05, "loss": 0.0011, "step": 13105 }, { "epoch": 6.117152858809802, "grad_norm": 0.2373046875, "learning_rate": 6.563104493909302e-05, "loss": 0.0018, "step": 13106 }, { "epoch": 6.117619603267211, "grad_norm": 0.154296875, "learning_rate": 6.56172656861753e-05, "loss": 0.001, "step": 13107 }, { "epoch": 6.118086347724621, "grad_norm": 0.126953125, "learning_rate": 6.560348717355603e-05, "loss": 0.0008, "step": 13108 }, { "epoch": 6.11855309218203, "grad_norm": 0.0267333984375, "learning_rate": 6.558970940153183e-05, "loss": 0.0003, "step": 13109 }, { "epoch": 6.11901983663944, "grad_norm": 0.03515625, "learning_rate": 6.557593237039936e-05, "loss": 0.0004, "step": 13110 }, { "epoch": 6.119486581096849, "grad_norm": 0.25390625, "learning_rate": 6.556215608045526e-05, "loss": 0.0033, "step": 13111 }, { "epoch": 6.119953325554259, "grad_norm": 0.291015625, "learning_rate": 6.554838053199616e-05, "loss": 0.0015, "step": 13112 }, { "epoch": 6.120420070011669, "grad_norm": 0.16015625, "learning_rate": 6.553460572531866e-05, "loss": 0.0043, "step": 13113 }, { "epoch": 6.1208868144690785, "grad_norm": 0.0284423828125, "learning_rate": 6.552083166071929e-05, "loss": 0.0003, "step": 13114 }, { "epoch": 6.121353558926487, "grad_norm": 0.051513671875, "learning_rate": 6.550705833849471e-05, "loss": 0.002, "step": 13115 }, { "epoch": 6.121820303383897, "grad_norm": 0.022705078125, "learning_rate": 6.549328575894144e-05, "loss": 0.0003, "step": 13116 }, { "epoch": 6.122287047841307, "grad_norm": 0.24609375, "learning_rate": 6.5479513922356e-05, "loss": 0.0029, "step": 13117 }, { "epoch": 6.122753792298717, "grad_norm": 0.31640625, "learning_rate": 6.546574282903492e-05, "loss": 0.0012, "step": 13118 }, { "epoch": 6.123220536756126, "grad_norm": 0.03125, "learning_rate": 6.545197247927474e-05, "loss": 0.0004, "step": 13119 }, { "epoch": 6.1236872812135354, "grad_norm": 0.0498046875, "learning_rate": 6.543820287337192e-05, "loss": 0.0004, "step": 13120 }, { "epoch": 6.124154025670945, "grad_norm": 0.1123046875, "learning_rate": 6.542443401162291e-05, "loss": 0.0007, "step": 13121 }, { "epoch": 6.124620770128355, "grad_norm": 0.0205078125, "learning_rate": 6.54106658943242e-05, "loss": 0.0003, "step": 13122 }, { "epoch": 6.125087514585764, "grad_norm": 0.0625, "learning_rate": 6.539689852177227e-05, "loss": 0.0004, "step": 13123 }, { "epoch": 6.125554259043174, "grad_norm": 0.02734375, "learning_rate": 6.538313189426346e-05, "loss": 0.0003, "step": 13124 }, { "epoch": 6.1260210035005835, "grad_norm": 0.0238037109375, "learning_rate": 6.536936601209424e-05, "loss": 0.0003, "step": 13125 }, { "epoch": 6.126487747957993, "grad_norm": 0.08154296875, "learning_rate": 6.535560087556103e-05, "loss": 0.0039, "step": 13126 }, { "epoch": 6.126954492415402, "grad_norm": 0.123046875, "learning_rate": 6.534183648496013e-05, "loss": 0.0006, "step": 13127 }, { "epoch": 6.127421236872812, "grad_norm": 0.02001953125, "learning_rate": 6.532807284058797e-05, "loss": 0.0003, "step": 13128 }, { "epoch": 6.127887981330222, "grad_norm": 0.123046875, "learning_rate": 6.531430994274088e-05, "loss": 0.0007, "step": 13129 }, { "epoch": 6.128354725787632, "grad_norm": 0.07666015625, "learning_rate": 6.530054779171517e-05, "loss": 0.0017, "step": 13130 }, { "epoch": 6.1288214702450405, "grad_norm": 0.0791015625, "learning_rate": 6.528678638780717e-05, "loss": 0.0047, "step": 13131 }, { "epoch": 6.12928821470245, "grad_norm": 0.1748046875, "learning_rate": 6.527302573131315e-05, "loss": 0.0036, "step": 13132 }, { "epoch": 6.12975495915986, "grad_norm": 0.05224609375, "learning_rate": 6.525926582252941e-05, "loss": 0.0004, "step": 13133 }, { "epoch": 6.13022170361727, "grad_norm": 0.11328125, "learning_rate": 6.524550666175226e-05, "loss": 0.0006, "step": 13134 }, { "epoch": 6.130688448074679, "grad_norm": 0.228515625, "learning_rate": 6.523174824927789e-05, "loss": 0.001, "step": 13135 }, { "epoch": 6.131155192532089, "grad_norm": 0.1591796875, "learning_rate": 6.521799058540255e-05, "loss": 0.0033, "step": 13136 }, { "epoch": 6.131621936989498, "grad_norm": 0.443359375, "learning_rate": 6.520423367042247e-05, "loss": 0.0189, "step": 13137 }, { "epoch": 6.132088681446908, "grad_norm": 0.0986328125, "learning_rate": 6.519047750463386e-05, "loss": 0.0006, "step": 13138 }, { "epoch": 6.132555425904317, "grad_norm": 0.146484375, "learning_rate": 6.517672208833287e-05, "loss": 0.0017, "step": 13139 }, { "epoch": 6.133022170361727, "grad_norm": 0.482421875, "learning_rate": 6.51629674218157e-05, "loss": 0.0043, "step": 13140 }, { "epoch": 6.133488914819137, "grad_norm": 0.0830078125, "learning_rate": 6.51492135053785e-05, "loss": 0.002, "step": 13141 }, { "epoch": 6.1339556592765465, "grad_norm": 0.045166015625, "learning_rate": 6.51354603393174e-05, "loss": 0.0004, "step": 13142 }, { "epoch": 6.134422403733955, "grad_norm": 0.05908203125, "learning_rate": 6.51217079239285e-05, "loss": 0.0003, "step": 13143 }, { "epoch": 6.134889148191365, "grad_norm": 0.0181884765625, "learning_rate": 6.510795625950795e-05, "loss": 0.0003, "step": 13144 }, { "epoch": 6.135355892648775, "grad_norm": 0.169921875, "learning_rate": 6.509420534635182e-05, "loss": 0.0025, "step": 13145 }, { "epoch": 6.135822637106184, "grad_norm": 0.08447265625, "learning_rate": 6.508045518475615e-05, "loss": 0.0006, "step": 13146 }, { "epoch": 6.136289381563594, "grad_norm": 0.283203125, "learning_rate": 6.506670577501705e-05, "loss": 0.0024, "step": 13147 }, { "epoch": 6.1367561260210035, "grad_norm": 0.1455078125, "learning_rate": 6.505295711743052e-05, "loss": 0.0006, "step": 13148 }, { "epoch": 6.137222870478413, "grad_norm": 0.2158203125, "learning_rate": 6.50392092122926e-05, "loss": 0.001, "step": 13149 }, { "epoch": 6.137689614935822, "grad_norm": 0.0751953125, "learning_rate": 6.502546205989929e-05, "loss": 0.0006, "step": 13150 }, { "epoch": 6.138156359393232, "grad_norm": 0.03564453125, "learning_rate": 6.50117156605466e-05, "loss": 0.0004, "step": 13151 }, { "epoch": 6.138623103850642, "grad_norm": 0.04736328125, "learning_rate": 6.499797001453049e-05, "loss": 0.0005, "step": 13152 }, { "epoch": 6.139089848308052, "grad_norm": 0.09326171875, "learning_rate": 6.49842251221469e-05, "loss": 0.0007, "step": 13153 }, { "epoch": 6.1395565927654605, "grad_norm": 0.103515625, "learning_rate": 6.497048098369182e-05, "loss": 0.0008, "step": 13154 }, { "epoch": 6.14002333722287, "grad_norm": 0.07763671875, "learning_rate": 6.495673759946114e-05, "loss": 0.0044, "step": 13155 }, { "epoch": 6.14049008168028, "grad_norm": 0.041015625, "learning_rate": 6.494299496975077e-05, "loss": 0.0005, "step": 13156 }, { "epoch": 6.14095682613769, "grad_norm": 0.036376953125, "learning_rate": 6.492925309485662e-05, "loss": 0.0003, "step": 13157 }, { "epoch": 6.141423570595099, "grad_norm": 0.0634765625, "learning_rate": 6.491551197507458e-05, "loss": 0.0007, "step": 13158 }, { "epoch": 6.141890315052509, "grad_norm": 0.0250244140625, "learning_rate": 6.49017716107005e-05, "loss": 0.0004, "step": 13159 }, { "epoch": 6.142357059509918, "grad_norm": 0.0216064453125, "learning_rate": 6.488803200203017e-05, "loss": 0.0004, "step": 13160 }, { "epoch": 6.142823803967328, "grad_norm": 0.06298828125, "learning_rate": 6.487429314935951e-05, "loss": 0.0023, "step": 13161 }, { "epoch": 6.143290548424737, "grad_norm": 0.060302734375, "learning_rate": 6.48605550529843e-05, "loss": 0.0004, "step": 13162 }, { "epoch": 6.143757292882147, "grad_norm": 0.138671875, "learning_rate": 6.484681771320029e-05, "loss": 0.0007, "step": 13163 }, { "epoch": 6.144224037339557, "grad_norm": 0.05078125, "learning_rate": 6.483308113030332e-05, "loss": 0.003, "step": 13164 }, { "epoch": 6.1446907817969665, "grad_norm": 0.30078125, "learning_rate": 6.481934530458913e-05, "loss": 0.0015, "step": 13165 }, { "epoch": 6.145157526254375, "grad_norm": 0.11767578125, "learning_rate": 6.480561023635345e-05, "loss": 0.0005, "step": 13166 }, { "epoch": 6.145624270711785, "grad_norm": 0.16796875, "learning_rate": 6.479187592589206e-05, "loss": 0.0008, "step": 13167 }, { "epoch": 6.146091015169195, "grad_norm": 0.07568359375, "learning_rate": 6.477814237350064e-05, "loss": 0.0004, "step": 13168 }, { "epoch": 6.146557759626605, "grad_norm": 0.11181640625, "learning_rate": 6.476440957947489e-05, "loss": 0.0025, "step": 13169 }, { "epoch": 6.147024504084014, "grad_norm": 0.0849609375, "learning_rate": 6.475067754411049e-05, "loss": 0.0006, "step": 13170 }, { "epoch": 6.1474912485414235, "grad_norm": 0.0260009765625, "learning_rate": 6.473694626770311e-05, "loss": 0.0003, "step": 13171 }, { "epoch": 6.147957992998833, "grad_norm": 0.1201171875, "learning_rate": 6.472321575054842e-05, "loss": 0.0005, "step": 13172 }, { "epoch": 6.148424737456243, "grad_norm": 0.04541015625, "learning_rate": 6.4709485992942e-05, "loss": 0.0004, "step": 13173 }, { "epoch": 6.148891481913652, "grad_norm": 0.0164794921875, "learning_rate": 6.469575699517953e-05, "loss": 0.0003, "step": 13174 }, { "epoch": 6.149358226371062, "grad_norm": 0.10107421875, "learning_rate": 6.468202875755659e-05, "loss": 0.0005, "step": 13175 }, { "epoch": 6.149824970828472, "grad_norm": 0.130859375, "learning_rate": 6.466830128036872e-05, "loss": 0.0007, "step": 13176 }, { "epoch": 6.150291715285881, "grad_norm": 0.033935546875, "learning_rate": 6.46545745639116e-05, "loss": 0.0003, "step": 13177 }, { "epoch": 6.15075845974329, "grad_norm": 0.043701171875, "learning_rate": 6.464084860848065e-05, "loss": 0.0005, "step": 13178 }, { "epoch": 6.1512252042007, "grad_norm": 0.039794921875, "learning_rate": 6.462712341437147e-05, "loss": 0.0004, "step": 13179 }, { "epoch": 6.15169194865811, "grad_norm": 0.03173828125, "learning_rate": 6.461339898187957e-05, "loss": 0.0003, "step": 13180 }, { "epoch": 6.15215869311552, "grad_norm": 0.08154296875, "learning_rate": 6.459967531130045e-05, "loss": 0.0006, "step": 13181 }, { "epoch": 6.152625437572929, "grad_norm": 0.240234375, "learning_rate": 6.458595240292959e-05, "loss": 0.001, "step": 13182 }, { "epoch": 6.153092182030338, "grad_norm": 0.0185546875, "learning_rate": 6.457223025706247e-05, "loss": 0.0003, "step": 13183 }, { "epoch": 6.153558926487748, "grad_norm": 0.05908203125, "learning_rate": 6.455850887399455e-05, "loss": 0.003, "step": 13184 }, { "epoch": 6.154025670945158, "grad_norm": 0.0301513671875, "learning_rate": 6.454478825402124e-05, "loss": 0.0003, "step": 13185 }, { "epoch": 6.154492415402567, "grad_norm": 0.09423828125, "learning_rate": 6.453106839743799e-05, "loss": 0.0004, "step": 13186 }, { "epoch": 6.154959159859977, "grad_norm": 0.062255859375, "learning_rate": 6.45173493045402e-05, "loss": 0.0003, "step": 13187 }, { "epoch": 6.1554259043173865, "grad_norm": 0.0517578125, "learning_rate": 6.450363097562322e-05, "loss": 0.0004, "step": 13188 }, { "epoch": 6.155892648774795, "grad_norm": 0.0234375, "learning_rate": 6.448991341098247e-05, "loss": 0.0003, "step": 13189 }, { "epoch": 6.156359393232205, "grad_norm": 0.09423828125, "learning_rate": 6.447619661091328e-05, "loss": 0.0006, "step": 13190 }, { "epoch": 6.156826137689615, "grad_norm": 0.061767578125, "learning_rate": 6.4462480575711e-05, "loss": 0.0004, "step": 13191 }, { "epoch": 6.157292882147025, "grad_norm": 0.166015625, "learning_rate": 6.444876530567092e-05, "loss": 0.0007, "step": 13192 }, { "epoch": 6.157759626604434, "grad_norm": 0.014892578125, "learning_rate": 6.443505080108838e-05, "loss": 0.0002, "step": 13193 }, { "epoch": 6.1582263710618435, "grad_norm": 0.158203125, "learning_rate": 6.442133706225866e-05, "loss": 0.0008, "step": 13194 }, { "epoch": 6.158693115519253, "grad_norm": 0.09716796875, "learning_rate": 6.440762408947703e-05, "loss": 0.0006, "step": 13195 }, { "epoch": 6.159159859976663, "grad_norm": 0.11767578125, "learning_rate": 6.439391188303874e-05, "loss": 0.0008, "step": 13196 }, { "epoch": 6.159626604434072, "grad_norm": 0.0218505859375, "learning_rate": 6.438020044323902e-05, "loss": 0.0003, "step": 13197 }, { "epoch": 6.160093348891482, "grad_norm": 0.0830078125, "learning_rate": 6.436648977037312e-05, "loss": 0.0007, "step": 13198 }, { "epoch": 6.1605600933488915, "grad_norm": 0.0269775390625, "learning_rate": 6.435277986473622e-05, "loss": 0.0003, "step": 13199 }, { "epoch": 6.161026837806301, "grad_norm": 0.1513671875, "learning_rate": 6.433907072662355e-05, "loss": 0.0008, "step": 13200 }, { "epoch": 6.16149358226371, "grad_norm": 0.0224609375, "learning_rate": 6.432536235633024e-05, "loss": 0.0003, "step": 13201 }, { "epoch": 6.16196032672112, "grad_norm": 0.0194091796875, "learning_rate": 6.431165475415144e-05, "loss": 0.0003, "step": 13202 }, { "epoch": 6.16242707117853, "grad_norm": 0.0947265625, "learning_rate": 6.429794792038232e-05, "loss": 0.0005, "step": 13203 }, { "epoch": 6.16289381563594, "grad_norm": 0.07421875, "learning_rate": 6.428424185531801e-05, "loss": 0.0059, "step": 13204 }, { "epoch": 6.1633605600933485, "grad_norm": 0.1796875, "learning_rate": 6.427053655925356e-05, "loss": 0.0007, "step": 13205 }, { "epoch": 6.163827304550758, "grad_norm": 0.54296875, "learning_rate": 6.425683203248413e-05, "loss": 0.0015, "step": 13206 }, { "epoch": 6.164294049008168, "grad_norm": 0.030029296875, "learning_rate": 6.424312827530478e-05, "loss": 0.0004, "step": 13207 }, { "epoch": 6.164760793465578, "grad_norm": 0.06591796875, "learning_rate": 6.422942528801054e-05, "loss": 0.0012, "step": 13208 }, { "epoch": 6.165227537922987, "grad_norm": 0.283203125, "learning_rate": 6.421572307089643e-05, "loss": 0.0037, "step": 13209 }, { "epoch": 6.165694282380397, "grad_norm": 0.042236328125, "learning_rate": 6.420202162425755e-05, "loss": 0.0003, "step": 13210 }, { "epoch": 6.166161026837806, "grad_norm": 0.048828125, "learning_rate": 6.418832094838885e-05, "loss": 0.0004, "step": 13211 }, { "epoch": 6.166627771295216, "grad_norm": 0.041015625, "learning_rate": 6.41746210435853e-05, "loss": 0.0003, "step": 13212 }, { "epoch": 6.167094515752625, "grad_norm": 0.30859375, "learning_rate": 6.416092191014193e-05, "loss": 0.0039, "step": 13213 }, { "epoch": 6.167561260210035, "grad_norm": 0.10888671875, "learning_rate": 6.414722354835369e-05, "loss": 0.0026, "step": 13214 }, { "epoch": 6.168028004667445, "grad_norm": 0.1982421875, "learning_rate": 6.41335259585155e-05, "loss": 0.0011, "step": 13215 }, { "epoch": 6.1684947491248545, "grad_norm": 0.0166015625, "learning_rate": 6.411982914092229e-05, "loss": 0.0002, "step": 13216 }, { "epoch": 6.168961493582263, "grad_norm": 0.119140625, "learning_rate": 6.410613309586897e-05, "loss": 0.0007, "step": 13217 }, { "epoch": 6.169428238039673, "grad_norm": 0.359375, "learning_rate": 6.409243782365042e-05, "loss": 0.0008, "step": 13218 }, { "epoch": 6.169894982497083, "grad_norm": 0.062255859375, "learning_rate": 6.407874332456152e-05, "loss": 0.0005, "step": 13219 }, { "epoch": 6.170361726954493, "grad_norm": 0.2080078125, "learning_rate": 6.406504959889714e-05, "loss": 0.0073, "step": 13220 }, { "epoch": 6.170828471411902, "grad_norm": 0.08349609375, "learning_rate": 6.405135664695212e-05, "loss": 0.0004, "step": 13221 }, { "epoch": 6.1712952158693115, "grad_norm": 0.37109375, "learning_rate": 6.403766446902125e-05, "loss": 0.001, "step": 13222 }, { "epoch": 6.171761960326721, "grad_norm": 0.0322265625, "learning_rate": 6.40239730653994e-05, "loss": 0.0003, "step": 13223 }, { "epoch": 6.172228704784131, "grad_norm": 0.10400390625, "learning_rate": 6.401028243638134e-05, "loss": 0.0006, "step": 13224 }, { "epoch": 6.17269544924154, "grad_norm": 0.431640625, "learning_rate": 6.39965925822618e-05, "loss": 0.0036, "step": 13225 }, { "epoch": 6.17316219369895, "grad_norm": 0.1435546875, "learning_rate": 6.398290350333558e-05, "loss": 0.0006, "step": 13226 }, { "epoch": 6.17362893815636, "grad_norm": 0.201171875, "learning_rate": 6.396921519989738e-05, "loss": 0.0011, "step": 13227 }, { "epoch": 6.174095682613769, "grad_norm": 0.028076171875, "learning_rate": 6.395552767224198e-05, "loss": 0.0003, "step": 13228 }, { "epoch": 6.174562427071178, "grad_norm": 0.0277099609375, "learning_rate": 6.394184092066405e-05, "loss": 0.0003, "step": 13229 }, { "epoch": 6.175029171528588, "grad_norm": 0.0181884765625, "learning_rate": 6.392815494545831e-05, "loss": 0.0002, "step": 13230 }, { "epoch": 6.175495915985998, "grad_norm": 0.07861328125, "learning_rate": 6.391446974691938e-05, "loss": 0.0007, "step": 13231 }, { "epoch": 6.175962660443407, "grad_norm": 0.1025390625, "learning_rate": 6.390078532534198e-05, "loss": 0.0006, "step": 13232 }, { "epoch": 6.176429404900817, "grad_norm": 0.0654296875, "learning_rate": 6.388710168102073e-05, "loss": 0.0006, "step": 13233 }, { "epoch": 6.176896149358226, "grad_norm": 0.08984375, "learning_rate": 6.387341881425022e-05, "loss": 0.0007, "step": 13234 }, { "epoch": 6.177362893815636, "grad_norm": 0.09375, "learning_rate": 6.385973672532511e-05, "loss": 0.0006, "step": 13235 }, { "epoch": 6.177829638273045, "grad_norm": 0.038818359375, "learning_rate": 6.384605541453998e-05, "loss": 0.0003, "step": 13236 }, { "epoch": 6.178296382730455, "grad_norm": 0.2734375, "learning_rate": 6.383237488218935e-05, "loss": 0.0013, "step": 13237 }, { "epoch": 6.178763127187865, "grad_norm": 0.1982421875, "learning_rate": 6.381869512856784e-05, "loss": 0.0008, "step": 13238 }, { "epoch": 6.1792298716452745, "grad_norm": 0.03662109375, "learning_rate": 6.380501615396998e-05, "loss": 0.0003, "step": 13239 }, { "epoch": 6.179696616102683, "grad_norm": 0.232421875, "learning_rate": 6.379133795869028e-05, "loss": 0.0014, "step": 13240 }, { "epoch": 6.180163360560093, "grad_norm": 0.0732421875, "learning_rate": 6.377766054302322e-05, "loss": 0.0005, "step": 13241 }, { "epoch": 6.180630105017503, "grad_norm": 0.126953125, "learning_rate": 6.376398390726334e-05, "loss": 0.0005, "step": 13242 }, { "epoch": 6.181096849474913, "grad_norm": 0.099609375, "learning_rate": 6.37503080517051e-05, "loss": 0.0006, "step": 13243 }, { "epoch": 6.181563593932322, "grad_norm": 0.083984375, "learning_rate": 6.37366329766429e-05, "loss": 0.0031, "step": 13244 }, { "epoch": 6.1820303383897315, "grad_norm": 0.036865234375, "learning_rate": 6.372295868237128e-05, "loss": 0.0003, "step": 13245 }, { "epoch": 6.182497082847141, "grad_norm": 0.251953125, "learning_rate": 6.37092851691846e-05, "loss": 0.0011, "step": 13246 }, { "epoch": 6.182963827304551, "grad_norm": 0.267578125, "learning_rate": 6.369561243737724e-05, "loss": 0.0013, "step": 13247 }, { "epoch": 6.18343057176196, "grad_norm": 0.08251953125, "learning_rate": 6.368194048724366e-05, "loss": 0.0004, "step": 13248 }, { "epoch": 6.18389731621937, "grad_norm": 0.0771484375, "learning_rate": 6.366826931907819e-05, "loss": 0.0004, "step": 13249 }, { "epoch": 6.18436406067678, "grad_norm": 0.0302734375, "learning_rate": 6.365459893317519e-05, "loss": 0.0003, "step": 13250 }, { "epoch": 6.184830805134189, "grad_norm": 0.0546875, "learning_rate": 6.364092932982899e-05, "loss": 0.0004, "step": 13251 }, { "epoch": 6.185297549591598, "grad_norm": 0.0159912109375, "learning_rate": 6.362726050933395e-05, "loss": 0.0002, "step": 13252 }, { "epoch": 6.185764294049008, "grad_norm": 0.08935546875, "learning_rate": 6.361359247198433e-05, "loss": 0.0005, "step": 13253 }, { "epoch": 6.186231038506418, "grad_norm": 0.09423828125, "learning_rate": 6.359992521807441e-05, "loss": 0.0005, "step": 13254 }, { "epoch": 6.186697782963828, "grad_norm": 0.031982421875, "learning_rate": 6.358625874789853e-05, "loss": 0.0004, "step": 13255 }, { "epoch": 6.187164527421237, "grad_norm": 0.03857421875, "learning_rate": 6.357259306175088e-05, "loss": 0.0004, "step": 13256 }, { "epoch": 6.187631271878646, "grad_norm": 0.028564453125, "learning_rate": 6.355892815992572e-05, "loss": 0.0003, "step": 13257 }, { "epoch": 6.188098016336056, "grad_norm": 0.0299072265625, "learning_rate": 6.354526404271726e-05, "loss": 0.0004, "step": 13258 }, { "epoch": 6.188564760793466, "grad_norm": 0.023193359375, "learning_rate": 6.353160071041972e-05, "loss": 0.0003, "step": 13259 }, { "epoch": 6.189031505250875, "grad_norm": 0.0546875, "learning_rate": 6.351793816332729e-05, "loss": 0.0022, "step": 13260 }, { "epoch": 6.189498249708285, "grad_norm": 0.0206298828125, "learning_rate": 6.350427640173411e-05, "loss": 0.0002, "step": 13261 }, { "epoch": 6.1899649941656945, "grad_norm": 0.07275390625, "learning_rate": 6.349061542593438e-05, "loss": 0.0004, "step": 13262 }, { "epoch": 6.190431738623104, "grad_norm": 0.037841796875, "learning_rate": 6.34769552362222e-05, "loss": 0.0004, "step": 13263 }, { "epoch": 6.190898483080513, "grad_norm": 0.0478515625, "learning_rate": 6.346329583289166e-05, "loss": 0.0005, "step": 13264 }, { "epoch": 6.191365227537923, "grad_norm": 0.0771484375, "learning_rate": 6.344963721623694e-05, "loss": 0.0005, "step": 13265 }, { "epoch": 6.191831971995333, "grad_norm": 0.2041015625, "learning_rate": 6.343597938655206e-05, "loss": 0.0006, "step": 13266 }, { "epoch": 6.1922987164527425, "grad_norm": 0.036376953125, "learning_rate": 6.342232234413114e-05, "loss": 0.0004, "step": 13267 }, { "epoch": 6.1927654609101515, "grad_norm": 0.031982421875, "learning_rate": 6.340866608926816e-05, "loss": 0.0003, "step": 13268 }, { "epoch": 6.193232205367561, "grad_norm": 0.04736328125, "learning_rate": 6.339501062225724e-05, "loss": 0.0004, "step": 13269 }, { "epoch": 6.193698949824971, "grad_norm": 0.03857421875, "learning_rate": 6.338135594339234e-05, "loss": 0.0023, "step": 13270 }, { "epoch": 6.194165694282381, "grad_norm": 0.154296875, "learning_rate": 6.336770205296746e-05, "loss": 0.0009, "step": 13271 }, { "epoch": 6.19463243873979, "grad_norm": 0.25390625, "learning_rate": 6.335404895127666e-05, "loss": 0.0005, "step": 13272 }, { "epoch": 6.1950991831971995, "grad_norm": 0.10205078125, "learning_rate": 6.334039663861377e-05, "loss": 0.0005, "step": 13273 }, { "epoch": 6.195565927654609, "grad_norm": 0.0111083984375, "learning_rate": 6.332674511527285e-05, "loss": 0.0002, "step": 13274 }, { "epoch": 6.196032672112018, "grad_norm": 0.10205078125, "learning_rate": 6.33130943815478e-05, "loss": 0.0046, "step": 13275 }, { "epoch": 6.196499416569428, "grad_norm": 0.07763671875, "learning_rate": 6.329944443773253e-05, "loss": 0.0033, "step": 13276 }, { "epoch": 6.196966161026838, "grad_norm": 0.150390625, "learning_rate": 6.328579528412094e-05, "loss": 0.0005, "step": 13277 }, { "epoch": 6.197432905484248, "grad_norm": 0.30859375, "learning_rate": 6.327214692100691e-05, "loss": 0.0018, "step": 13278 }, { "epoch": 6.1978996499416565, "grad_norm": 0.1162109375, "learning_rate": 6.32584993486843e-05, "loss": 0.0017, "step": 13279 }, { "epoch": 6.198366394399066, "grad_norm": 0.1298828125, "learning_rate": 6.324485256744696e-05, "loss": 0.0011, "step": 13280 }, { "epoch": 6.198833138856476, "grad_norm": 0.055419921875, "learning_rate": 6.323120657758874e-05, "loss": 0.0003, "step": 13281 }, { "epoch": 6.199299883313886, "grad_norm": 0.06689453125, "learning_rate": 6.321756137940343e-05, "loss": 0.0003, "step": 13282 }, { "epoch": 6.199766627771295, "grad_norm": 0.1806640625, "learning_rate": 6.320391697318481e-05, "loss": 0.0008, "step": 13283 }, { "epoch": 6.200233372228705, "grad_norm": 0.01953125, "learning_rate": 6.319027335922673e-05, "loss": 0.0003, "step": 13284 }, { "epoch": 6.200700116686114, "grad_norm": 0.0986328125, "learning_rate": 6.317663053782289e-05, "loss": 0.0004, "step": 13285 }, { "epoch": 6.201166861143524, "grad_norm": 0.0693359375, "learning_rate": 6.316298850926703e-05, "loss": 0.006, "step": 13286 }, { "epoch": 6.201633605600933, "grad_norm": 0.44140625, "learning_rate": 6.314934727385293e-05, "loss": 0.0051, "step": 13287 }, { "epoch": 6.202100350058343, "grad_norm": 0.035888671875, "learning_rate": 6.313570683187425e-05, "loss": 0.0003, "step": 13288 }, { "epoch": 6.202567094515753, "grad_norm": 0.291015625, "learning_rate": 6.312206718362472e-05, "loss": 0.0015, "step": 13289 }, { "epoch": 6.2030338389731625, "grad_norm": 0.03076171875, "learning_rate": 6.310842832939797e-05, "loss": 0.0004, "step": 13290 }, { "epoch": 6.203500583430571, "grad_norm": 0.07568359375, "learning_rate": 6.309479026948772e-05, "loss": 0.0004, "step": 13291 }, { "epoch": 6.203967327887981, "grad_norm": 0.064453125, "learning_rate": 6.308115300418758e-05, "loss": 0.0022, "step": 13292 }, { "epoch": 6.204434072345391, "grad_norm": 0.0361328125, "learning_rate": 6.306751653379115e-05, "loss": 0.0003, "step": 13293 }, { "epoch": 6.204900816802801, "grad_norm": 0.020751953125, "learning_rate": 6.30538808585921e-05, "loss": 0.0003, "step": 13294 }, { "epoch": 6.20536756126021, "grad_norm": 0.1328125, "learning_rate": 6.304024597888396e-05, "loss": 0.0006, "step": 13295 }, { "epoch": 6.2058343057176195, "grad_norm": 0.0264892578125, "learning_rate": 6.302661189496034e-05, "loss": 0.0003, "step": 13296 }, { "epoch": 6.206301050175029, "grad_norm": 0.21875, "learning_rate": 6.301297860711479e-05, "loss": 0.0034, "step": 13297 }, { "epoch": 6.206767794632439, "grad_norm": 0.19140625, "learning_rate": 6.299934611564087e-05, "loss": 0.0007, "step": 13298 }, { "epoch": 6.207234539089848, "grad_norm": 0.1201171875, "learning_rate": 6.298571442083207e-05, "loss": 0.0005, "step": 13299 }, { "epoch": 6.207701283547258, "grad_norm": 0.05029296875, "learning_rate": 6.297208352298187e-05, "loss": 0.0005, "step": 13300 }, { "epoch": 6.208168028004668, "grad_norm": 0.1689453125, "learning_rate": 6.295845342238384e-05, "loss": 0.0022, "step": 13301 }, { "epoch": 6.208634772462077, "grad_norm": 0.11181640625, "learning_rate": 6.29448241193314e-05, "loss": 0.0004, "step": 13302 }, { "epoch": 6.209101516919486, "grad_norm": 0.244140625, "learning_rate": 6.293119561411797e-05, "loss": 0.0087, "step": 13303 }, { "epoch": 6.209568261376896, "grad_norm": 0.1689453125, "learning_rate": 6.291756790703706e-05, "loss": 0.0014, "step": 13304 }, { "epoch": 6.210035005834306, "grad_norm": 0.259765625, "learning_rate": 6.290394099838207e-05, "loss": 0.004, "step": 13305 }, { "epoch": 6.210501750291716, "grad_norm": 0.064453125, "learning_rate": 6.289031488844636e-05, "loss": 0.0005, "step": 13306 }, { "epoch": 6.210968494749125, "grad_norm": 0.08154296875, "learning_rate": 6.287668957752336e-05, "loss": 0.0004, "step": 13307 }, { "epoch": 6.211435239206534, "grad_norm": 0.08642578125, "learning_rate": 6.286306506590642e-05, "loss": 0.0005, "step": 13308 }, { "epoch": 6.211901983663944, "grad_norm": 0.06005859375, "learning_rate": 6.284944135388891e-05, "loss": 0.0005, "step": 13309 }, { "epoch": 6.212368728121354, "grad_norm": 0.05078125, "learning_rate": 6.283581844176411e-05, "loss": 0.0004, "step": 13310 }, { "epoch": 6.212835472578763, "grad_norm": 0.060302734375, "learning_rate": 6.28221963298254e-05, "loss": 0.0049, "step": 13311 }, { "epoch": 6.213302217036173, "grad_norm": 0.18359375, "learning_rate": 6.280857501836605e-05, "loss": 0.0009, "step": 13312 }, { "epoch": 6.2137689614935825, "grad_norm": 0.083984375, "learning_rate": 6.279495450767934e-05, "loss": 0.0004, "step": 13313 }, { "epoch": 6.214235705950992, "grad_norm": 0.064453125, "learning_rate": 6.278133479805855e-05, "loss": 0.0027, "step": 13314 }, { "epoch": 6.214702450408401, "grad_norm": 0.059326171875, "learning_rate": 6.27677158897969e-05, "loss": 0.0004, "step": 13315 }, { "epoch": 6.215169194865811, "grad_norm": 0.0125732421875, "learning_rate": 6.275409778318765e-05, "loss": 0.0002, "step": 13316 }, { "epoch": 6.215635939323221, "grad_norm": 0.1865234375, "learning_rate": 6.274048047852396e-05, "loss": 0.0007, "step": 13317 }, { "epoch": 6.21610268378063, "grad_norm": 0.0220947265625, "learning_rate": 6.272686397609913e-05, "loss": 0.0002, "step": 13318 }, { "epoch": 6.2165694282380395, "grad_norm": 0.189453125, "learning_rate": 6.271324827620624e-05, "loss": 0.0009, "step": 13319 }, { "epoch": 6.217036172695449, "grad_norm": 0.09375, "learning_rate": 6.269963337913852e-05, "loss": 0.0028, "step": 13320 }, { "epoch": 6.217502917152859, "grad_norm": 0.1826171875, "learning_rate": 6.268601928518905e-05, "loss": 0.0011, "step": 13321 }, { "epoch": 6.217969661610268, "grad_norm": 0.016845703125, "learning_rate": 6.267240599465096e-05, "loss": 0.0003, "step": 13322 }, { "epoch": 6.218436406067678, "grad_norm": 0.024658203125, "learning_rate": 6.26587935078174e-05, "loss": 0.0003, "step": 13323 }, { "epoch": 6.218903150525088, "grad_norm": 0.259765625, "learning_rate": 6.264518182498147e-05, "loss": 0.0016, "step": 13324 }, { "epoch": 6.219369894982497, "grad_norm": 0.023193359375, "learning_rate": 6.26315709464362e-05, "loss": 0.0003, "step": 13325 }, { "epoch": 6.219836639439906, "grad_norm": 0.1181640625, "learning_rate": 6.261796087247467e-05, "loss": 0.0011, "step": 13326 }, { "epoch": 6.220303383897316, "grad_norm": 0.0230712890625, "learning_rate": 6.260435160338992e-05, "loss": 0.0003, "step": 13327 }, { "epoch": 6.220770128354726, "grad_norm": 0.047119140625, "learning_rate": 6.259074313947497e-05, "loss": 0.0003, "step": 13328 }, { "epoch": 6.221236872812136, "grad_norm": 0.1689453125, "learning_rate": 6.257713548102282e-05, "loss": 0.0006, "step": 13329 }, { "epoch": 6.221703617269545, "grad_norm": 0.026123046875, "learning_rate": 6.256352862832649e-05, "loss": 0.0003, "step": 13330 }, { "epoch": 6.222170361726954, "grad_norm": 0.044189453125, "learning_rate": 6.254992258167891e-05, "loss": 0.0021, "step": 13331 }, { "epoch": 6.222637106184364, "grad_norm": 0.392578125, "learning_rate": 6.253631734137301e-05, "loss": 0.0017, "step": 13332 }, { "epoch": 6.223103850641774, "grad_norm": 0.0274658203125, "learning_rate": 6.252271290770182e-05, "loss": 0.0004, "step": 13333 }, { "epoch": 6.223570595099183, "grad_norm": 0.017822265625, "learning_rate": 6.250910928095819e-05, "loss": 0.0003, "step": 13334 }, { "epoch": 6.224037339556593, "grad_norm": 0.146484375, "learning_rate": 6.249550646143501e-05, "loss": 0.0054, "step": 13335 }, { "epoch": 6.2245040840140025, "grad_norm": 0.0537109375, "learning_rate": 6.248190444942521e-05, "loss": 0.0003, "step": 13336 }, { "epoch": 6.224970828471412, "grad_norm": 0.0693359375, "learning_rate": 6.246830324522164e-05, "loss": 0.0004, "step": 13337 }, { "epoch": 6.225437572928821, "grad_norm": 0.0135498046875, "learning_rate": 6.245470284911715e-05, "loss": 0.0002, "step": 13338 }, { "epoch": 6.225904317386231, "grad_norm": 0.055908203125, "learning_rate": 6.244110326140452e-05, "loss": 0.0003, "step": 13339 }, { "epoch": 6.226371061843641, "grad_norm": 0.053955078125, "learning_rate": 6.242750448237664e-05, "loss": 0.0004, "step": 13340 }, { "epoch": 6.2268378063010505, "grad_norm": 0.0107421875, "learning_rate": 6.241390651232628e-05, "loss": 0.0002, "step": 13341 }, { "epoch": 6.2273045507584595, "grad_norm": 0.1904296875, "learning_rate": 6.24003093515462e-05, "loss": 0.0008, "step": 13342 }, { "epoch": 6.227771295215869, "grad_norm": 0.189453125, "learning_rate": 6.238671300032918e-05, "loss": 0.0006, "step": 13343 }, { "epoch": 6.228238039673279, "grad_norm": 0.0361328125, "learning_rate": 6.237311745896797e-05, "loss": 0.0003, "step": 13344 }, { "epoch": 6.228704784130689, "grad_norm": 0.1455078125, "learning_rate": 6.235952272775528e-05, "loss": 0.0005, "step": 13345 }, { "epoch": 6.229171528588098, "grad_norm": 0.061767578125, "learning_rate": 6.234592880698384e-05, "loss": 0.0004, "step": 13346 }, { "epoch": 6.2296382730455075, "grad_norm": 0.11962890625, "learning_rate": 6.233233569694633e-05, "loss": 0.0007, "step": 13347 }, { "epoch": 6.230105017502917, "grad_norm": 0.047119140625, "learning_rate": 6.231874339793544e-05, "loss": 0.003, "step": 13348 }, { "epoch": 6.230571761960327, "grad_norm": 0.0113525390625, "learning_rate": 6.230515191024376e-05, "loss": 0.0002, "step": 13349 }, { "epoch": 6.231038506417736, "grad_norm": 0.1494140625, "learning_rate": 6.229156123416403e-05, "loss": 0.0006, "step": 13350 }, { "epoch": 6.231505250875146, "grad_norm": 0.027587890625, "learning_rate": 6.227797136998881e-05, "loss": 0.0003, "step": 13351 }, { "epoch": 6.231971995332556, "grad_norm": 0.050537109375, "learning_rate": 6.22643823180107e-05, "loss": 0.0003, "step": 13352 }, { "epoch": 6.2324387397899645, "grad_norm": 0.05615234375, "learning_rate": 6.225079407852235e-05, "loss": 0.0004, "step": 13353 }, { "epoch": 6.232905484247374, "grad_norm": 0.046142578125, "learning_rate": 6.223720665181627e-05, "loss": 0.0004, "step": 13354 }, { "epoch": 6.233372228704784, "grad_norm": 0.1376953125, "learning_rate": 6.222362003818501e-05, "loss": 0.0043, "step": 13355 }, { "epoch": 6.233838973162194, "grad_norm": 0.033203125, "learning_rate": 6.221003423792114e-05, "loss": 0.0003, "step": 13356 }, { "epoch": 6.234305717619603, "grad_norm": 0.059814453125, "learning_rate": 6.219644925131716e-05, "loss": 0.0023, "step": 13357 }, { "epoch": 6.234772462077013, "grad_norm": 0.072265625, "learning_rate": 6.218286507866557e-05, "loss": 0.0004, "step": 13358 }, { "epoch": 6.235239206534422, "grad_norm": 0.08251953125, "learning_rate": 6.216928172025884e-05, "loss": 0.0006, "step": 13359 }, { "epoch": 6.235705950991832, "grad_norm": 0.0283203125, "learning_rate": 6.215569917638948e-05, "loss": 0.0002, "step": 13360 }, { "epoch": 6.236172695449241, "grad_norm": 0.023681640625, "learning_rate": 6.214211744734989e-05, "loss": 0.0003, "step": 13361 }, { "epoch": 6.236639439906651, "grad_norm": 0.04833984375, "learning_rate": 6.212853653343251e-05, "loss": 0.0018, "step": 13362 }, { "epoch": 6.237106184364061, "grad_norm": 0.326171875, "learning_rate": 6.211495643492974e-05, "loss": 0.0011, "step": 13363 }, { "epoch": 6.2375729288214705, "grad_norm": 0.04150390625, "learning_rate": 6.210137715213405e-05, "loss": 0.0002, "step": 13364 }, { "epoch": 6.238039673278879, "grad_norm": 0.029541015625, "learning_rate": 6.208779868533769e-05, "loss": 0.0003, "step": 13365 }, { "epoch": 6.238506417736289, "grad_norm": 0.0159912109375, "learning_rate": 6.207422103483314e-05, "loss": 0.0002, "step": 13366 }, { "epoch": 6.238973162193699, "grad_norm": 0.1708984375, "learning_rate": 6.206064420091266e-05, "loss": 0.0009, "step": 13367 }, { "epoch": 6.239439906651109, "grad_norm": 0.0751953125, "learning_rate": 6.204706818386865e-05, "loss": 0.0053, "step": 13368 }, { "epoch": 6.239906651108518, "grad_norm": 0.3125, "learning_rate": 6.203349298399334e-05, "loss": 0.0038, "step": 13369 }, { "epoch": 6.2403733955659275, "grad_norm": 0.46484375, "learning_rate": 6.201991860157906e-05, "loss": 0.0016, "step": 13370 }, { "epoch": 6.240840140023337, "grad_norm": 0.048828125, "learning_rate": 6.200634503691804e-05, "loss": 0.003, "step": 13371 }, { "epoch": 6.241306884480747, "grad_norm": 0.125, "learning_rate": 6.19927722903026e-05, "loss": 0.0006, "step": 13372 }, { "epoch": 6.241773628938156, "grad_norm": 0.036376953125, "learning_rate": 6.197920036202494e-05, "loss": 0.0017, "step": 13373 }, { "epoch": 6.242240373395566, "grad_norm": 0.09228515625, "learning_rate": 6.196562925237725e-05, "loss": 0.0004, "step": 13374 }, { "epoch": 6.242707117852976, "grad_norm": 0.177734375, "learning_rate": 6.19520589616518e-05, "loss": 0.0006, "step": 13375 }, { "epoch": 6.243173862310385, "grad_norm": 0.047607421875, "learning_rate": 6.193848949014071e-05, "loss": 0.0005, "step": 13376 }, { "epoch": 6.243640606767794, "grad_norm": 0.162109375, "learning_rate": 6.192492083813617e-05, "loss": 0.0005, "step": 13377 }, { "epoch": 6.244107351225204, "grad_norm": 0.12158203125, "learning_rate": 6.191135300593034e-05, "loss": 0.002, "step": 13378 }, { "epoch": 6.244574095682614, "grad_norm": 0.0225830078125, "learning_rate": 6.189778599381536e-05, "loss": 0.0003, "step": 13379 }, { "epoch": 6.245040840140024, "grad_norm": 0.054931640625, "learning_rate": 6.188421980208329e-05, "loss": 0.0004, "step": 13380 }, { "epoch": 6.245507584597433, "grad_norm": 0.02294921875, "learning_rate": 6.187065443102626e-05, "loss": 0.0003, "step": 13381 }, { "epoch": 6.245974329054842, "grad_norm": 0.00946044921875, "learning_rate": 6.185708988093635e-05, "loss": 0.0002, "step": 13382 }, { "epoch": 6.246441073512252, "grad_norm": 0.02294921875, "learning_rate": 6.184352615210563e-05, "loss": 0.0003, "step": 13383 }, { "epoch": 6.246907817969662, "grad_norm": 0.107421875, "learning_rate": 6.18299632448261e-05, "loss": 0.0006, "step": 13384 }, { "epoch": 6.247374562427071, "grad_norm": 0.10302734375, "learning_rate": 6.181640115938985e-05, "loss": 0.0005, "step": 13385 }, { "epoch": 6.247841306884481, "grad_norm": 0.259765625, "learning_rate": 6.180283989608882e-05, "loss": 0.0015, "step": 13386 }, { "epoch": 6.2483080513418905, "grad_norm": 0.28515625, "learning_rate": 6.178927945521505e-05, "loss": 0.0012, "step": 13387 }, { "epoch": 6.2487747957993, "grad_norm": 0.03271484375, "learning_rate": 6.177571983706045e-05, "loss": 0.0003, "step": 13388 }, { "epoch": 6.249241540256709, "grad_norm": 0.080078125, "learning_rate": 6.176216104191705e-05, "loss": 0.0004, "step": 13389 }, { "epoch": 6.249708284714119, "grad_norm": 0.07080078125, "learning_rate": 6.174860307007674e-05, "loss": 0.0017, "step": 13390 }, { "epoch": 6.250175029171529, "grad_norm": 0.048095703125, "learning_rate": 6.173504592183142e-05, "loss": 0.0004, "step": 13391 }, { "epoch": 6.250641773628939, "grad_norm": 0.10009765625, "learning_rate": 6.172148959747305e-05, "loss": 0.0005, "step": 13392 }, { "epoch": 6.2511085180863475, "grad_norm": 0.076171875, "learning_rate": 6.170793409729348e-05, "loss": 0.0004, "step": 13393 }, { "epoch": 6.251575262543757, "grad_norm": 0.1484375, "learning_rate": 6.169437942158453e-05, "loss": 0.0015, "step": 13394 }, { "epoch": 6.252042007001167, "grad_norm": 0.01226806640625, "learning_rate": 6.168082557063813e-05, "loss": 0.0002, "step": 13395 }, { "epoch": 6.252508751458576, "grad_norm": 0.125, "learning_rate": 6.166727254474608e-05, "loss": 0.004, "step": 13396 }, { "epoch": 6.252975495915986, "grad_norm": 0.038818359375, "learning_rate": 6.165372034420017e-05, "loss": 0.002, "step": 13397 }, { "epoch": 6.253442240373396, "grad_norm": 0.034423828125, "learning_rate": 6.164016896929218e-05, "loss": 0.0004, "step": 13398 }, { "epoch": 6.253908984830805, "grad_norm": 0.02880859375, "learning_rate": 6.162661842031396e-05, "loss": 0.0003, "step": 13399 }, { "epoch": 6.254375729288215, "grad_norm": 0.07763671875, "learning_rate": 6.161306869755721e-05, "loss": 0.0004, "step": 13400 }, { "epoch": 6.254375729288215, "eval_loss": 2.343518018722534, "eval_runtime": 65.2176, "eval_samples_per_second": 27.661, "eval_steps_per_second": 3.465, "step": 13400 }, { "epoch": 6.254842473745624, "grad_norm": 0.01953125, "learning_rate": 6.159951980131364e-05, "loss": 0.0002, "step": 13401 }, { "epoch": 6.255309218203034, "grad_norm": 0.032958984375, "learning_rate": 6.158597173187506e-05, "loss": 0.0003, "step": 13402 }, { "epoch": 6.255775962660444, "grad_norm": 0.29296875, "learning_rate": 6.157242448953314e-05, "loss": 0.003, "step": 13403 }, { "epoch": 6.256242707117853, "grad_norm": 0.038330078125, "learning_rate": 6.15588780745795e-05, "loss": 0.0003, "step": 13404 }, { "epoch": 6.256709451575262, "grad_norm": 0.043212890625, "learning_rate": 6.154533248730591e-05, "loss": 0.0006, "step": 13405 }, { "epoch": 6.257176196032672, "grad_norm": 0.1259765625, "learning_rate": 6.153178772800396e-05, "loss": 0.0005, "step": 13406 }, { "epoch": 6.257642940490082, "grad_norm": 0.36328125, "learning_rate": 6.151824379696533e-05, "loss": 0.0018, "step": 13407 }, { "epoch": 6.258109684947491, "grad_norm": 0.0703125, "learning_rate": 6.150470069448156e-05, "loss": 0.0004, "step": 13408 }, { "epoch": 6.258576429404901, "grad_norm": 0.05908203125, "learning_rate": 6.149115842084433e-05, "loss": 0.0004, "step": 13409 }, { "epoch": 6.2590431738623105, "grad_norm": 0.73828125, "learning_rate": 6.147761697634518e-05, "loss": 0.0022, "step": 13410 }, { "epoch": 6.25950991831972, "grad_norm": 0.0751953125, "learning_rate": 6.146407636127567e-05, "loss": 0.0004, "step": 13411 }, { "epoch": 6.259976662777129, "grad_norm": 0.11767578125, "learning_rate": 6.145053657592737e-05, "loss": 0.0047, "step": 13412 }, { "epoch": 6.260443407234539, "grad_norm": 0.08349609375, "learning_rate": 6.143699762059177e-05, "loss": 0.0005, "step": 13413 }, { "epoch": 6.260910151691949, "grad_norm": 0.08251953125, "learning_rate": 6.142345949556039e-05, "loss": 0.0003, "step": 13414 }, { "epoch": 6.2613768961493586, "grad_norm": 0.12060546875, "learning_rate": 6.140992220112474e-05, "loss": 0.0006, "step": 13415 }, { "epoch": 6.2618436406067675, "grad_norm": 0.030517578125, "learning_rate": 6.139638573757631e-05, "loss": 0.0003, "step": 13416 }, { "epoch": 6.262310385064177, "grad_norm": 0.1513671875, "learning_rate": 6.13828501052065e-05, "loss": 0.0078, "step": 13417 }, { "epoch": 6.262777129521587, "grad_norm": 0.11767578125, "learning_rate": 6.136931530430678e-05, "loss": 0.0006, "step": 13418 }, { "epoch": 6.263243873978997, "grad_norm": 0.02392578125, "learning_rate": 6.135578133516857e-05, "loss": 0.0003, "step": 13419 }, { "epoch": 6.263710618436406, "grad_norm": 0.0242919921875, "learning_rate": 6.134224819808323e-05, "loss": 0.0004, "step": 13420 }, { "epoch": 6.2641773628938155, "grad_norm": 0.26171875, "learning_rate": 6.132871589334221e-05, "loss": 0.0011, "step": 13421 }, { "epoch": 6.264644107351225, "grad_norm": 0.349609375, "learning_rate": 6.131518442123683e-05, "loss": 0.001, "step": 13422 }, { "epoch": 6.265110851808635, "grad_norm": 0.1533203125, "learning_rate": 6.130165378205845e-05, "loss": 0.0005, "step": 13423 }, { "epoch": 6.265577596266044, "grad_norm": 0.0277099609375, "learning_rate": 6.12881239760984e-05, "loss": 0.0003, "step": 13424 }, { "epoch": 6.266044340723454, "grad_norm": 0.042236328125, "learning_rate": 6.1274595003648e-05, "loss": 0.0003, "step": 13425 }, { "epoch": 6.266511085180864, "grad_norm": 0.0966796875, "learning_rate": 6.126106686499853e-05, "loss": 0.0021, "step": 13426 }, { "epoch": 6.266977829638273, "grad_norm": 0.1630859375, "learning_rate": 6.124753956044127e-05, "loss": 0.0006, "step": 13427 }, { "epoch": 6.267444574095682, "grad_norm": 0.09521484375, "learning_rate": 6.12340130902675e-05, "loss": 0.002, "step": 13428 }, { "epoch": 6.267911318553092, "grad_norm": 0.140625, "learning_rate": 6.122048745476843e-05, "loss": 0.003, "step": 13429 }, { "epoch": 6.268378063010502, "grad_norm": 0.08935546875, "learning_rate": 6.120696265423527e-05, "loss": 0.0006, "step": 13430 }, { "epoch": 6.268844807467912, "grad_norm": 0.2333984375, "learning_rate": 6.119343868895926e-05, "loss": 0.0009, "step": 13431 }, { "epoch": 6.269311551925321, "grad_norm": 0.1484375, "learning_rate": 6.117991555923158e-05, "loss": 0.0006, "step": 13432 }, { "epoch": 6.26977829638273, "grad_norm": 0.0654296875, "learning_rate": 6.116639326534336e-05, "loss": 0.0003, "step": 13433 }, { "epoch": 6.27024504084014, "grad_norm": 0.04638671875, "learning_rate": 6.115287180758583e-05, "loss": 0.0004, "step": 13434 }, { "epoch": 6.27071178529755, "grad_norm": 0.036376953125, "learning_rate": 6.113935118625003e-05, "loss": 0.0003, "step": 13435 }, { "epoch": 6.271178529754959, "grad_norm": 0.040771484375, "learning_rate": 6.112583140162712e-05, "loss": 0.0003, "step": 13436 }, { "epoch": 6.271645274212369, "grad_norm": 0.0262451171875, "learning_rate": 6.111231245400822e-05, "loss": 0.0003, "step": 13437 }, { "epoch": 6.2721120186697785, "grad_norm": 0.361328125, "learning_rate": 6.109879434368435e-05, "loss": 0.0008, "step": 13438 }, { "epoch": 6.272578763127187, "grad_norm": 0.01385498046875, "learning_rate": 6.108527707094662e-05, "loss": 0.0003, "step": 13439 }, { "epoch": 6.273045507584597, "grad_norm": 0.03662109375, "learning_rate": 6.107176063608602e-05, "loss": 0.0003, "step": 13440 }, { "epoch": 6.273512252042007, "grad_norm": 0.27734375, "learning_rate": 6.105824503939364e-05, "loss": 0.0007, "step": 13441 }, { "epoch": 6.273978996499417, "grad_norm": 0.0291748046875, "learning_rate": 6.104473028116045e-05, "loss": 0.0003, "step": 13442 }, { "epoch": 6.274445740956826, "grad_norm": 0.041015625, "learning_rate": 6.103121636167739e-05, "loss": 0.0004, "step": 13443 }, { "epoch": 6.2749124854142355, "grad_norm": 0.1328125, "learning_rate": 6.101770328123553e-05, "loss": 0.003, "step": 13444 }, { "epoch": 6.275379229871645, "grad_norm": 0.2197265625, "learning_rate": 6.1004191040125756e-05, "loss": 0.0007, "step": 13445 }, { "epoch": 6.275845974329055, "grad_norm": 0.05810546875, "learning_rate": 6.0990679638639025e-05, "loss": 0.0004, "step": 13446 }, { "epoch": 6.276312718786464, "grad_norm": 0.1552734375, "learning_rate": 6.0977169077066207e-05, "loss": 0.0005, "step": 13447 }, { "epoch": 6.276779463243874, "grad_norm": 0.376953125, "learning_rate": 6.0963659355698274e-05, "loss": 0.0014, "step": 13448 }, { "epoch": 6.277246207701284, "grad_norm": 0.279296875, "learning_rate": 6.0950150474826065e-05, "loss": 0.0025, "step": 13449 }, { "epoch": 6.277712952158693, "grad_norm": 0.09375, "learning_rate": 6.0936642434740396e-05, "loss": 0.0038, "step": 13450 }, { "epoch": 6.278179696616102, "grad_norm": 0.236328125, "learning_rate": 6.092313523573221e-05, "loss": 0.0013, "step": 13451 }, { "epoch": 6.278646441073512, "grad_norm": 0.0322265625, "learning_rate": 6.0909628878092263e-05, "loss": 0.0003, "step": 13452 }, { "epoch": 6.279113185530922, "grad_norm": 0.058349609375, "learning_rate": 6.089612336211136e-05, "loss": 0.0003, "step": 13453 }, { "epoch": 6.279579929988332, "grad_norm": 0.08251953125, "learning_rate": 6.088261868808033e-05, "loss": 0.0006, "step": 13454 }, { "epoch": 6.280046674445741, "grad_norm": 0.1865234375, "learning_rate": 6.086911485628992e-05, "loss": 0.0006, "step": 13455 }, { "epoch": 6.28051341890315, "grad_norm": 0.0277099609375, "learning_rate": 6.0855611867030884e-05, "loss": 0.0002, "step": 13456 }, { "epoch": 6.28098016336056, "grad_norm": 0.1044921875, "learning_rate": 6.084210972059393e-05, "loss": 0.0007, "step": 13457 }, { "epoch": 6.28144690781797, "grad_norm": 0.220703125, "learning_rate": 6.082860841726983e-05, "loss": 0.0012, "step": 13458 }, { "epoch": 6.281913652275379, "grad_norm": 0.14453125, "learning_rate": 6.0815107957349237e-05, "loss": 0.0007, "step": 13459 }, { "epoch": 6.282380396732789, "grad_norm": 0.19921875, "learning_rate": 6.080160834112284e-05, "loss": 0.0013, "step": 13460 }, { "epoch": 6.2828471411901985, "grad_norm": 0.1474609375, "learning_rate": 6.078810956888133e-05, "loss": 0.0006, "step": 13461 }, { "epoch": 6.283313885647608, "grad_norm": 0.047119140625, "learning_rate": 6.0774611640915316e-05, "loss": 0.0004, "step": 13462 }, { "epoch": 6.283780630105017, "grad_norm": 0.06494140625, "learning_rate": 6.0761114557515455e-05, "loss": 0.0003, "step": 13463 }, { "epoch": 6.284247374562427, "grad_norm": 0.0654296875, "learning_rate": 6.074761831897232e-05, "loss": 0.0004, "step": 13464 }, { "epoch": 6.284714119019837, "grad_norm": 0.1796875, "learning_rate": 6.0734122925576495e-05, "loss": 0.0028, "step": 13465 }, { "epoch": 6.285180863477247, "grad_norm": 0.404296875, "learning_rate": 6.072062837761858e-05, "loss": 0.0032, "step": 13466 }, { "epoch": 6.2856476079346555, "grad_norm": 0.2373046875, "learning_rate": 6.0707134675389134e-05, "loss": 0.0009, "step": 13467 }, { "epoch": 6.286114352392065, "grad_norm": 0.408203125, "learning_rate": 6.069364181917866e-05, "loss": 0.0062, "step": 13468 }, { "epoch": 6.286581096849475, "grad_norm": 0.0908203125, "learning_rate": 6.0680149809277676e-05, "loss": 0.0004, "step": 13469 }, { "epoch": 6.287047841306885, "grad_norm": 0.390625, "learning_rate": 6.0666658645976706e-05, "loss": 0.0034, "step": 13470 }, { "epoch": 6.287514585764294, "grad_norm": 0.263671875, "learning_rate": 6.0653168329566225e-05, "loss": 0.0016, "step": 13471 }, { "epoch": 6.287981330221704, "grad_norm": 0.373046875, "learning_rate": 6.063967886033666e-05, "loss": 0.0023, "step": 13472 }, { "epoch": 6.288448074679113, "grad_norm": 0.0859375, "learning_rate": 6.06261902385785e-05, "loss": 0.0006, "step": 13473 }, { "epoch": 6.288914819136522, "grad_norm": 0.1982421875, "learning_rate": 6.061270246458215e-05, "loss": 0.0026, "step": 13474 }, { "epoch": 6.289381563593932, "grad_norm": 0.3984375, "learning_rate": 6.059921553863799e-05, "loss": 0.004, "step": 13475 }, { "epoch": 6.289848308051342, "grad_norm": 0.15625, "learning_rate": 6.0585729461036466e-05, "loss": 0.0007, "step": 13476 }, { "epoch": 6.290315052508752, "grad_norm": 0.043212890625, "learning_rate": 6.057224423206791e-05, "loss": 0.0007, "step": 13477 }, { "epoch": 6.2907817969661615, "grad_norm": 0.05615234375, "learning_rate": 6.05587598520227e-05, "loss": 0.0024, "step": 13478 }, { "epoch": 6.29124854142357, "grad_norm": 0.0712890625, "learning_rate": 6.054527632119111e-05, "loss": 0.0041, "step": 13479 }, { "epoch": 6.29171528588098, "grad_norm": 0.055908203125, "learning_rate": 6.053179363986352e-05, "loss": 0.0004, "step": 13480 }, { "epoch": 6.29218203033839, "grad_norm": 0.0791015625, "learning_rate": 6.0518311808330206e-05, "loss": 0.0005, "step": 13481 }, { "epoch": 6.292648774795799, "grad_norm": 0.07177734375, "learning_rate": 6.050483082688143e-05, "loss": 0.0005, "step": 13482 }, { "epoch": 6.293115519253209, "grad_norm": 0.058349609375, "learning_rate": 6.049135069580749e-05, "loss": 0.0005, "step": 13483 }, { "epoch": 6.2935822637106185, "grad_norm": 0.1728515625, "learning_rate": 6.0477871415398605e-05, "loss": 0.0033, "step": 13484 }, { "epoch": 6.294049008168028, "grad_norm": 0.0262451171875, "learning_rate": 6.046439298594496e-05, "loss": 0.0003, "step": 13485 }, { "epoch": 6.294515752625437, "grad_norm": 0.1513671875, "learning_rate": 6.045091540773684e-05, "loss": 0.0014, "step": 13486 }, { "epoch": 6.294982497082847, "grad_norm": 0.2451171875, "learning_rate": 6.043743868106437e-05, "loss": 0.0007, "step": 13487 }, { "epoch": 6.295449241540257, "grad_norm": 0.0255126953125, "learning_rate": 6.042396280621776e-05, "loss": 0.0004, "step": 13488 }, { "epoch": 6.2959159859976666, "grad_norm": 0.1044921875, "learning_rate": 6.0410487783487124e-05, "loss": 0.0052, "step": 13489 }, { "epoch": 6.2963827304550755, "grad_norm": 0.0703125, "learning_rate": 6.039701361316261e-05, "loss": 0.0006, "step": 13490 }, { "epoch": 6.296849474912485, "grad_norm": 0.080078125, "learning_rate": 6.038354029553436e-05, "loss": 0.0005, "step": 13491 }, { "epoch": 6.297316219369895, "grad_norm": 0.1318359375, "learning_rate": 6.0370067830892405e-05, "loss": 0.0006, "step": 13492 }, { "epoch": 6.297782963827305, "grad_norm": 0.07763671875, "learning_rate": 6.0356596219526884e-05, "loss": 0.0038, "step": 13493 }, { "epoch": 6.298249708284714, "grad_norm": 0.1767578125, "learning_rate": 6.034312546172782e-05, "loss": 0.0069, "step": 13494 }, { "epoch": 6.2987164527421236, "grad_norm": 0.07666015625, "learning_rate": 6.0329655557785293e-05, "loss": 0.0003, "step": 13495 }, { "epoch": 6.299183197199533, "grad_norm": 0.328125, "learning_rate": 6.0316186507989245e-05, "loss": 0.002, "step": 13496 }, { "epoch": 6.299649941656943, "grad_norm": 0.1611328125, "learning_rate": 6.030271831262977e-05, "loss": 0.0007, "step": 13497 }, { "epoch": 6.300116686114352, "grad_norm": 0.0301513671875, "learning_rate": 6.0289250971996825e-05, "loss": 0.0004, "step": 13498 }, { "epoch": 6.300583430571762, "grad_norm": 0.03662109375, "learning_rate": 6.027578448638032e-05, "loss": 0.0004, "step": 13499 }, { "epoch": 6.301050175029172, "grad_norm": 0.06005859375, "learning_rate": 6.026231885607029e-05, "loss": 0.0004, "step": 13500 }, { "epoch": 6.301516919486581, "grad_norm": 0.1669921875, "learning_rate": 6.024885408135661e-05, "loss": 0.0008, "step": 13501 }, { "epoch": 6.30198366394399, "grad_norm": 0.0576171875, "learning_rate": 6.023539016252919e-05, "loss": 0.0005, "step": 13502 }, { "epoch": 6.3024504084014, "grad_norm": 0.1630859375, "learning_rate": 6.022192709987797e-05, "loss": 0.0012, "step": 13503 }, { "epoch": 6.30291715285881, "grad_norm": 0.0751953125, "learning_rate": 6.0208464893692787e-05, "loss": 0.0005, "step": 13504 }, { "epoch": 6.30338389731622, "grad_norm": 0.1728515625, "learning_rate": 6.0195003544263504e-05, "loss": 0.0008, "step": 13505 }, { "epoch": 6.303850641773629, "grad_norm": 0.74609375, "learning_rate": 6.018154305187993e-05, "loss": 0.003, "step": 13506 }, { "epoch": 6.304317386231038, "grad_norm": 0.040771484375, "learning_rate": 6.016808341683196e-05, "loss": 0.0005, "step": 13507 }, { "epoch": 6.304784130688448, "grad_norm": 0.36328125, "learning_rate": 6.0154624639409316e-05, "loss": 0.0013, "step": 13508 }, { "epoch": 6.305250875145858, "grad_norm": 0.07958984375, "learning_rate": 6.0141166719901806e-05, "loss": 0.0005, "step": 13509 }, { "epoch": 6.305717619603267, "grad_norm": 0.07177734375, "learning_rate": 6.012770965859923e-05, "loss": 0.0005, "step": 13510 }, { "epoch": 6.306184364060677, "grad_norm": 0.10009765625, "learning_rate": 6.0114253455791315e-05, "loss": 0.003, "step": 13511 }, { "epoch": 6.3066511085180865, "grad_norm": 0.03955078125, "learning_rate": 6.0100798111767766e-05, "loss": 0.0003, "step": 13512 }, { "epoch": 6.307117852975496, "grad_norm": 0.0498046875, "learning_rate": 6.0087343626818293e-05, "loss": 0.0003, "step": 13513 }, { "epoch": 6.307584597432905, "grad_norm": 0.06689453125, "learning_rate": 6.007389000123258e-05, "loss": 0.0003, "step": 13514 }, { "epoch": 6.308051341890315, "grad_norm": 0.1943359375, "learning_rate": 6.0060437235300325e-05, "loss": 0.0007, "step": 13515 }, { "epoch": 6.308518086347725, "grad_norm": 0.06787109375, "learning_rate": 6.0046985329311165e-05, "loss": 0.0004, "step": 13516 }, { "epoch": 6.308984830805134, "grad_norm": 0.0537109375, "learning_rate": 6.003353428355476e-05, "loss": 0.0003, "step": 13517 }, { "epoch": 6.3094515752625435, "grad_norm": 0.31640625, "learning_rate": 6.002008409832067e-05, "loss": 0.003, "step": 13518 }, { "epoch": 6.309918319719953, "grad_norm": 0.328125, "learning_rate": 6.000663477389856e-05, "loss": 0.0016, "step": 13519 }, { "epoch": 6.310385064177363, "grad_norm": 0.033935546875, "learning_rate": 5.9993186310577974e-05, "loss": 0.0004, "step": 13520 }, { "epoch": 6.310851808634773, "grad_norm": 0.1416015625, "learning_rate": 5.997973870864846e-05, "loss": 0.0004, "step": 13521 }, { "epoch": 6.311318553092182, "grad_norm": 0.0556640625, "learning_rate": 5.996629196839958e-05, "loss": 0.0006, "step": 13522 }, { "epoch": 6.311785297549592, "grad_norm": 0.173828125, "learning_rate": 5.995284609012086e-05, "loss": 0.0038, "step": 13523 }, { "epoch": 6.312252042007001, "grad_norm": 0.055908203125, "learning_rate": 5.9939401074101776e-05, "loss": 0.0041, "step": 13524 }, { "epoch": 6.31271878646441, "grad_norm": 0.054931640625, "learning_rate": 5.992595692063185e-05, "loss": 0.0003, "step": 13525 }, { "epoch": 6.31318553092182, "grad_norm": 0.047119140625, "learning_rate": 5.991251363000054e-05, "loss": 0.0004, "step": 13526 }, { "epoch": 6.31365227537923, "grad_norm": 0.0546875, "learning_rate": 5.989907120249729e-05, "loss": 0.0004, "step": 13527 }, { "epoch": 6.31411901983664, "grad_norm": 0.2138671875, "learning_rate": 5.988562963841151e-05, "loss": 0.0009, "step": 13528 }, { "epoch": 6.314585764294049, "grad_norm": 0.0264892578125, "learning_rate": 5.9872188938032644e-05, "loss": 0.0002, "step": 13529 }, { "epoch": 6.315052508751458, "grad_norm": 0.1748046875, "learning_rate": 5.985874910165008e-05, "loss": 0.0008, "step": 13530 }, { "epoch": 6.315519253208868, "grad_norm": 0.031494140625, "learning_rate": 5.984531012955317e-05, "loss": 0.0005, "step": 13531 }, { "epoch": 6.315985997666278, "grad_norm": 0.11865234375, "learning_rate": 5.98318720220313e-05, "loss": 0.0008, "step": 13532 }, { "epoch": 6.316452742123687, "grad_norm": 0.055908203125, "learning_rate": 5.981843477937379e-05, "loss": 0.0003, "step": 13533 }, { "epoch": 6.316919486581097, "grad_norm": 0.0947265625, "learning_rate": 5.980499840186995e-05, "loss": 0.0005, "step": 13534 }, { "epoch": 6.3173862310385065, "grad_norm": 0.03173828125, "learning_rate": 5.9791562889809095e-05, "loss": 0.0003, "step": 13535 }, { "epoch": 6.317852975495916, "grad_norm": 0.392578125, "learning_rate": 5.9778128243480515e-05, "loss": 0.001, "step": 13536 }, { "epoch": 6.318319719953325, "grad_norm": 0.08251953125, "learning_rate": 5.9764694463173456e-05, "loss": 0.0059, "step": 13537 }, { "epoch": 6.318786464410735, "grad_norm": 0.08544921875, "learning_rate": 5.975126154917715e-05, "loss": 0.0003, "step": 13538 }, { "epoch": 6.319253208868145, "grad_norm": 0.314453125, "learning_rate": 5.973782950178086e-05, "loss": 0.0009, "step": 13539 }, { "epoch": 6.319719953325555, "grad_norm": 0.2138671875, "learning_rate": 5.9724398321273764e-05, "loss": 0.0024, "step": 13540 }, { "epoch": 6.3201866977829635, "grad_norm": 0.2294921875, "learning_rate": 5.971096800794506e-05, "loss": 0.0012, "step": 13541 }, { "epoch": 6.320653442240373, "grad_norm": 0.06689453125, "learning_rate": 5.969753856208392e-05, "loss": 0.0032, "step": 13542 }, { "epoch": 6.321120186697783, "grad_norm": 0.0289306640625, "learning_rate": 5.9684109983979484e-05, "loss": 0.0006, "step": 13543 }, { "epoch": 6.321586931155193, "grad_norm": 0.06396484375, "learning_rate": 5.9670682273920896e-05, "loss": 0.0042, "step": 13544 }, { "epoch": 6.322053675612602, "grad_norm": 0.2197265625, "learning_rate": 5.9657255432197266e-05, "loss": 0.001, "step": 13545 }, { "epoch": 6.322520420070012, "grad_norm": 0.04833984375, "learning_rate": 5.964382945909769e-05, "loss": 0.0004, "step": 13546 }, { "epoch": 6.322987164527421, "grad_norm": 0.041015625, "learning_rate": 5.963040435491125e-05, "loss": 0.0002, "step": 13547 }, { "epoch": 6.323453908984831, "grad_norm": 0.0859375, "learning_rate": 5.961698011992696e-05, "loss": 0.0005, "step": 13548 }, { "epoch": 6.32392065344224, "grad_norm": 0.10888671875, "learning_rate": 5.960355675443392e-05, "loss": 0.001, "step": 13549 }, { "epoch": 6.32438739789965, "grad_norm": 0.130859375, "learning_rate": 5.959013425872113e-05, "loss": 0.0027, "step": 13550 }, { "epoch": 6.32485414235706, "grad_norm": 0.2431640625, "learning_rate": 5.9576712633077556e-05, "loss": 0.001, "step": 13551 }, { "epoch": 6.3253208868144695, "grad_norm": 0.07080078125, "learning_rate": 5.956329187779224e-05, "loss": 0.0004, "step": 13552 }, { "epoch": 6.325787631271878, "grad_norm": 0.0849609375, "learning_rate": 5.954987199315412e-05, "loss": 0.0004, "step": 13553 }, { "epoch": 6.326254375729288, "grad_norm": 0.283203125, "learning_rate": 5.953645297945212e-05, "loss": 0.0045, "step": 13554 }, { "epoch": 6.326721120186698, "grad_norm": 0.039306640625, "learning_rate": 5.952303483697517e-05, "loss": 0.0003, "step": 13555 }, { "epoch": 6.327187864644108, "grad_norm": 0.1162109375, "learning_rate": 5.95096175660122e-05, "loss": 0.0005, "step": 13556 }, { "epoch": 6.327654609101517, "grad_norm": 0.05517578125, "learning_rate": 5.949620116685209e-05, "loss": 0.0006, "step": 13557 }, { "epoch": 6.3281213535589265, "grad_norm": 0.10595703125, "learning_rate": 5.94827856397837e-05, "loss": 0.0004, "step": 13558 }, { "epoch": 6.328588098016336, "grad_norm": 0.06787109375, "learning_rate": 5.946937098509595e-05, "loss": 0.0004, "step": 13559 }, { "epoch": 6.329054842473745, "grad_norm": 0.0986328125, "learning_rate": 5.945595720307755e-05, "loss": 0.0008, "step": 13560 }, { "epoch": 6.329521586931155, "grad_norm": 0.103515625, "learning_rate": 5.944254429401739e-05, "loss": 0.0037, "step": 13561 }, { "epoch": 6.329988331388565, "grad_norm": 0.353515625, "learning_rate": 5.9429132258204256e-05, "loss": 0.0057, "step": 13562 }, { "epoch": 6.3304550758459746, "grad_norm": 0.076171875, "learning_rate": 5.941572109592689e-05, "loss": 0.0005, "step": 13563 }, { "epoch": 6.330921820303384, "grad_norm": 0.09521484375, "learning_rate": 5.94023108074741e-05, "loss": 0.0005, "step": 13564 }, { "epoch": 6.331388564760793, "grad_norm": 0.037841796875, "learning_rate": 5.938890139313461e-05, "loss": 0.0004, "step": 13565 }, { "epoch": 6.331855309218203, "grad_norm": 0.08447265625, "learning_rate": 5.9375492853197124e-05, "loss": 0.0006, "step": 13566 }, { "epoch": 6.332322053675613, "grad_norm": 0.0693359375, "learning_rate": 5.936208518795035e-05, "loss": 0.0004, "step": 13567 }, { "epoch": 6.332788798133022, "grad_norm": 0.053466796875, "learning_rate": 5.934867839768297e-05, "loss": 0.0004, "step": 13568 }, { "epoch": 6.3332555425904316, "grad_norm": 0.052001953125, "learning_rate": 5.933527248268365e-05, "loss": 0.0003, "step": 13569 }, { "epoch": 6.333722287047841, "grad_norm": 0.027587890625, "learning_rate": 5.932186744324102e-05, "loss": 0.0003, "step": 13570 }, { "epoch": 6.334189031505251, "grad_norm": 0.07421875, "learning_rate": 5.930846327964372e-05, "loss": 0.0005, "step": 13571 }, { "epoch": 6.33465577596266, "grad_norm": 0.0203857421875, "learning_rate": 5.9295059992180366e-05, "loss": 0.0003, "step": 13572 }, { "epoch": 6.33512252042007, "grad_norm": 0.08349609375, "learning_rate": 5.9281657581139514e-05, "loss": 0.0004, "step": 13573 }, { "epoch": 6.33558926487748, "grad_norm": 0.1494140625, "learning_rate": 5.926825604680977e-05, "loss": 0.0009, "step": 13574 }, { "epoch": 6.336056009334889, "grad_norm": 0.25390625, "learning_rate": 5.925485538947967e-05, "loss": 0.001, "step": 13575 }, { "epoch": 6.336522753792298, "grad_norm": 0.032958984375, "learning_rate": 5.9241455609437734e-05, "loss": 0.0003, "step": 13576 }, { "epoch": 6.336989498249708, "grad_norm": 0.0234375, "learning_rate": 5.922805670697247e-05, "loss": 0.0002, "step": 13577 }, { "epoch": 6.337456242707118, "grad_norm": 0.05224609375, "learning_rate": 5.921465868237239e-05, "loss": 0.0032, "step": 13578 }, { "epoch": 6.337922987164528, "grad_norm": 0.1259765625, "learning_rate": 5.920126153592598e-05, "loss": 0.0021, "step": 13579 }, { "epoch": 6.338389731621937, "grad_norm": 0.0211181640625, "learning_rate": 5.918786526792164e-05, "loss": 0.0003, "step": 13580 }, { "epoch": 6.338856476079346, "grad_norm": 0.1435546875, "learning_rate": 5.917446987864785e-05, "loss": 0.0005, "step": 13581 }, { "epoch": 6.339323220536756, "grad_norm": 0.1796875, "learning_rate": 5.916107536839305e-05, "loss": 0.0011, "step": 13582 }, { "epoch": 6.339789964994166, "grad_norm": 0.0135498046875, "learning_rate": 5.914768173744557e-05, "loss": 0.0002, "step": 13583 }, { "epoch": 6.340256709451575, "grad_norm": 0.055419921875, "learning_rate": 5.913428898609386e-05, "loss": 0.0004, "step": 13584 }, { "epoch": 6.340723453908985, "grad_norm": 0.028564453125, "learning_rate": 5.912089711462624e-05, "loss": 0.0003, "step": 13585 }, { "epoch": 6.3411901983663945, "grad_norm": 0.216796875, "learning_rate": 5.9107506123331066e-05, "loss": 0.0005, "step": 13586 }, { "epoch": 6.341656942823804, "grad_norm": 0.036865234375, "learning_rate": 5.9094116012496634e-05, "loss": 0.0024, "step": 13587 }, { "epoch": 6.342123687281213, "grad_norm": 0.0263671875, "learning_rate": 5.908072678241129e-05, "loss": 0.0003, "step": 13588 }, { "epoch": 6.342590431738623, "grad_norm": 0.0277099609375, "learning_rate": 5.906733843336329e-05, "loss": 0.0003, "step": 13589 }, { "epoch": 6.343057176196033, "grad_norm": 0.052490234375, "learning_rate": 5.90539509656409e-05, "loss": 0.0004, "step": 13590 }, { "epoch": 6.343523920653443, "grad_norm": 0.126953125, "learning_rate": 5.9040564379532404e-05, "loss": 0.0023, "step": 13591 }, { "epoch": 6.3439906651108515, "grad_norm": 0.021484375, "learning_rate": 5.902717867532599e-05, "loss": 0.0003, "step": 13592 }, { "epoch": 6.344457409568261, "grad_norm": 0.027099609375, "learning_rate": 5.9013793853309876e-05, "loss": 0.0003, "step": 13593 }, { "epoch": 6.344924154025671, "grad_norm": 0.059814453125, "learning_rate": 5.900040991377225e-05, "loss": 0.0005, "step": 13594 }, { "epoch": 6.345390898483081, "grad_norm": 0.056396484375, "learning_rate": 5.8987026857001306e-05, "loss": 0.0004, "step": 13595 }, { "epoch": 6.34585764294049, "grad_norm": 0.0311279296875, "learning_rate": 5.897364468328517e-05, "loss": 0.0003, "step": 13596 }, { "epoch": 6.3463243873979, "grad_norm": 0.046142578125, "learning_rate": 5.8960263392911964e-05, "loss": 0.0004, "step": 13597 }, { "epoch": 6.346791131855309, "grad_norm": 0.0869140625, "learning_rate": 5.8946882986169846e-05, "loss": 0.0005, "step": 13598 }, { "epoch": 6.347257876312719, "grad_norm": 0.055908203125, "learning_rate": 5.8933503463346886e-05, "loss": 0.0037, "step": 13599 }, { "epoch": 6.347724620770128, "grad_norm": 0.06494140625, "learning_rate": 5.8920124824731136e-05, "loss": 0.0031, "step": 13600 }, { "epoch": 6.348191365227538, "grad_norm": 0.015869140625, "learning_rate": 5.8906747070610704e-05, "loss": 0.0002, "step": 13601 }, { "epoch": 6.348658109684948, "grad_norm": 0.0810546875, "learning_rate": 5.889337020127359e-05, "loss": 0.0003, "step": 13602 }, { "epoch": 6.349124854142357, "grad_norm": 0.039306640625, "learning_rate": 5.887999421700782e-05, "loss": 0.0003, "step": 13603 }, { "epoch": 6.349591598599766, "grad_norm": 0.1650390625, "learning_rate": 5.8866619118101406e-05, "loss": 0.0037, "step": 13604 }, { "epoch": 6.350058343057176, "grad_norm": 0.234375, "learning_rate": 5.885324490484232e-05, "loss": 0.0035, "step": 13605 }, { "epoch": 6.350525087514586, "grad_norm": 0.3125, "learning_rate": 5.8839871577518535e-05, "loss": 0.0018, "step": 13606 }, { "epoch": 6.350991831971996, "grad_norm": 0.10693359375, "learning_rate": 5.8826499136418e-05, "loss": 0.0036, "step": 13607 }, { "epoch": 6.351458576429405, "grad_norm": 0.1826171875, "learning_rate": 5.88131275818286e-05, "loss": 0.001, "step": 13608 }, { "epoch": 6.3519253208868145, "grad_norm": 0.04443359375, "learning_rate": 5.8799756914038226e-05, "loss": 0.0003, "step": 13609 }, { "epoch": 6.352392065344224, "grad_norm": 0.443359375, "learning_rate": 5.878638713333484e-05, "loss": 0.0133, "step": 13610 }, { "epoch": 6.352858809801633, "grad_norm": 0.1640625, "learning_rate": 5.877301824000625e-05, "loss": 0.0005, "step": 13611 }, { "epoch": 6.353325554259043, "grad_norm": 0.125, "learning_rate": 5.8759650234340314e-05, "loss": 0.0005, "step": 13612 }, { "epoch": 6.353792298716453, "grad_norm": 0.208984375, "learning_rate": 5.874628311662488e-05, "loss": 0.0037, "step": 13613 }, { "epoch": 6.354259043173863, "grad_norm": 0.01312255859375, "learning_rate": 5.8732916887147735e-05, "loss": 0.0002, "step": 13614 }, { "epoch": 6.3547257876312715, "grad_norm": 0.04296875, "learning_rate": 5.871955154619666e-05, "loss": 0.0004, "step": 13615 }, { "epoch": 6.355192532088681, "grad_norm": 0.146484375, "learning_rate": 5.870618709405946e-05, "loss": 0.0005, "step": 13616 }, { "epoch": 6.355659276546091, "grad_norm": 0.17578125, "learning_rate": 5.869282353102388e-05, "loss": 0.0009, "step": 13617 }, { "epoch": 6.356126021003501, "grad_norm": 0.045166015625, "learning_rate": 5.8679460857377635e-05, "loss": 0.0023, "step": 13618 }, { "epoch": 6.35659276546091, "grad_norm": 0.044921875, "learning_rate": 5.866609907340841e-05, "loss": 0.0035, "step": 13619 }, { "epoch": 6.35705950991832, "grad_norm": 0.0517578125, "learning_rate": 5.8652738179403976e-05, "loss": 0.0004, "step": 13620 }, { "epoch": 6.357526254375729, "grad_norm": 0.490234375, "learning_rate": 5.8639378175651946e-05, "loss": 0.0018, "step": 13621 }, { "epoch": 6.357992998833139, "grad_norm": 0.062255859375, "learning_rate": 5.8626019062439985e-05, "loss": 0.0006, "step": 13622 }, { "epoch": 6.358459743290548, "grad_norm": 0.1201171875, "learning_rate": 5.8612660840055746e-05, "loss": 0.0005, "step": 13623 }, { "epoch": 6.358926487747958, "grad_norm": 0.0390625, "learning_rate": 5.859930350878684e-05, "loss": 0.0004, "step": 13624 }, { "epoch": 6.359393232205368, "grad_norm": 0.10888671875, "learning_rate": 5.858594706892089e-05, "loss": 0.0047, "step": 13625 }, { "epoch": 6.3598599766627775, "grad_norm": 0.11328125, "learning_rate": 5.8572591520745405e-05, "loss": 0.0005, "step": 13626 }, { "epoch": 6.360326721120186, "grad_norm": 0.193359375, "learning_rate": 5.855923686454801e-05, "loss": 0.0006, "step": 13627 }, { "epoch": 6.360793465577596, "grad_norm": 0.1357421875, "learning_rate": 5.854588310061623e-05, "loss": 0.0006, "step": 13628 }, { "epoch": 6.361260210035006, "grad_norm": 0.22265625, "learning_rate": 5.853253022923756e-05, "loss": 0.0006, "step": 13629 }, { "epoch": 6.361726954492416, "grad_norm": 0.1337890625, "learning_rate": 5.851917825069955e-05, "loss": 0.0007, "step": 13630 }, { "epoch": 6.362193698949825, "grad_norm": 0.32421875, "learning_rate": 5.850582716528964e-05, "loss": 0.0041, "step": 13631 }, { "epoch": 6.3626604434072345, "grad_norm": 0.06494140625, "learning_rate": 5.8492476973295294e-05, "loss": 0.0004, "step": 13632 }, { "epoch": 6.363127187864644, "grad_norm": 0.043212890625, "learning_rate": 5.8479127675004006e-05, "loss": 0.0003, "step": 13633 }, { "epoch": 6.363593932322054, "grad_norm": 0.376953125, "learning_rate": 5.846577927070316e-05, "loss": 0.0041, "step": 13634 }, { "epoch": 6.364060676779463, "grad_norm": 0.1103515625, "learning_rate": 5.845243176068016e-05, "loss": 0.0005, "step": 13635 }, { "epoch": 6.364527421236873, "grad_norm": 0.0159912109375, "learning_rate": 5.8439085145222396e-05, "loss": 0.0003, "step": 13636 }, { "epoch": 6.364994165694283, "grad_norm": 0.032958984375, "learning_rate": 5.8425739424617256e-05, "loss": 0.0003, "step": 13637 }, { "epoch": 6.365460910151692, "grad_norm": 0.06005859375, "learning_rate": 5.841239459915206e-05, "loss": 0.0003, "step": 13638 }, { "epoch": 6.365927654609101, "grad_norm": 0.119140625, "learning_rate": 5.8399050669114156e-05, "loss": 0.004, "step": 13639 }, { "epoch": 6.366394399066511, "grad_norm": 0.07275390625, "learning_rate": 5.8385707634790856e-05, "loss": 0.0004, "step": 13640 }, { "epoch": 6.366861143523921, "grad_norm": 0.08984375, "learning_rate": 5.837236549646943e-05, "loss": 0.0005, "step": 13641 }, { "epoch": 6.367327887981331, "grad_norm": 0.055908203125, "learning_rate": 5.835902425443718e-05, "loss": 0.0003, "step": 13642 }, { "epoch": 6.3677946324387396, "grad_norm": 0.146484375, "learning_rate": 5.834568390898132e-05, "loss": 0.0037, "step": 13643 }, { "epoch": 6.368261376896149, "grad_norm": 0.1865234375, "learning_rate": 5.833234446038909e-05, "loss": 0.0007, "step": 13644 }, { "epoch": 6.368728121353559, "grad_norm": 0.048828125, "learning_rate": 5.8319005908947765e-05, "loss": 0.0003, "step": 13645 }, { "epoch": 6.369194865810968, "grad_norm": 0.01153564453125, "learning_rate": 5.8305668254944434e-05, "loss": 0.0002, "step": 13646 }, { "epoch": 6.369661610268378, "grad_norm": 0.1298828125, "learning_rate": 5.8292331498666344e-05, "loss": 0.0006, "step": 13647 }, { "epoch": 6.370128354725788, "grad_norm": 0.040771484375, "learning_rate": 5.827899564040066e-05, "loss": 0.0003, "step": 13648 }, { "epoch": 6.370595099183197, "grad_norm": 0.037109375, "learning_rate": 5.826566068043445e-05, "loss": 0.0003, "step": 13649 }, { "epoch": 6.371061843640607, "grad_norm": 0.1396484375, "learning_rate": 5.825232661905492e-05, "loss": 0.0008, "step": 13650 }, { "epoch": 6.371528588098016, "grad_norm": 0.05126953125, "learning_rate": 5.823899345654906e-05, "loss": 0.0003, "step": 13651 }, { "epoch": 6.371995332555426, "grad_norm": 0.0810546875, "learning_rate": 5.822566119320403e-05, "loss": 0.0039, "step": 13652 }, { "epoch": 6.372462077012836, "grad_norm": 0.0203857421875, "learning_rate": 5.821232982930689e-05, "loss": 0.0002, "step": 13653 }, { "epoch": 6.372928821470245, "grad_norm": 0.205078125, "learning_rate": 5.8198999365144615e-05, "loss": 0.0008, "step": 13654 }, { "epoch": 6.373395565927654, "grad_norm": 0.1962890625, "learning_rate": 5.8185669801004306e-05, "loss": 0.0009, "step": 13655 }, { "epoch": 6.373862310385064, "grad_norm": 0.06640625, "learning_rate": 5.817234113717287e-05, "loss": 0.0004, "step": 13656 }, { "epoch": 6.374329054842474, "grad_norm": 0.279296875, "learning_rate": 5.8159013373937384e-05, "loss": 0.0007, "step": 13657 }, { "epoch": 6.374795799299883, "grad_norm": 0.043701171875, "learning_rate": 5.814568651158472e-05, "loss": 0.0004, "step": 13658 }, { "epoch": 6.375262543757293, "grad_norm": 0.033935546875, "learning_rate": 5.813236055040188e-05, "loss": 0.0003, "step": 13659 }, { "epoch": 6.3757292882147025, "grad_norm": 0.2890625, "learning_rate": 5.811903549067579e-05, "loss": 0.0008, "step": 13660 }, { "epoch": 6.376196032672112, "grad_norm": 0.05859375, "learning_rate": 5.810571133269329e-05, "loss": 0.0004, "step": 13661 }, { "epoch": 6.376662777129521, "grad_norm": 0.0201416015625, "learning_rate": 5.809238807674137e-05, "loss": 0.0003, "step": 13662 }, { "epoch": 6.377129521586931, "grad_norm": 0.451171875, "learning_rate": 5.807906572310677e-05, "loss": 0.0039, "step": 13663 }, { "epoch": 6.377596266044341, "grad_norm": 0.08056640625, "learning_rate": 5.806574427207642e-05, "loss": 0.0025, "step": 13664 }, { "epoch": 6.378063010501751, "grad_norm": 0.1279296875, "learning_rate": 5.805242372393714e-05, "loss": 0.0005, "step": 13665 }, { "epoch": 6.3785297549591595, "grad_norm": 0.1474609375, "learning_rate": 5.8039104078975694e-05, "loss": 0.0006, "step": 13666 }, { "epoch": 6.378996499416569, "grad_norm": 0.2138671875, "learning_rate": 5.802578533747889e-05, "loss": 0.0008, "step": 13667 }, { "epoch": 6.379463243873979, "grad_norm": 0.046142578125, "learning_rate": 5.801246749973354e-05, "loss": 0.0004, "step": 13668 }, { "epoch": 6.379929988331389, "grad_norm": 0.032470703125, "learning_rate": 5.7999150566026306e-05, "loss": 0.0004, "step": 13669 }, { "epoch": 6.380396732788798, "grad_norm": 0.0615234375, "learning_rate": 5.798583453664397e-05, "loss": 0.0004, "step": 13670 }, { "epoch": 6.380863477246208, "grad_norm": 0.03955078125, "learning_rate": 5.7972519411873246e-05, "loss": 0.0004, "step": 13671 }, { "epoch": 6.381330221703617, "grad_norm": 0.140625, "learning_rate": 5.79592051920008e-05, "loss": 0.0005, "step": 13672 }, { "epoch": 6.381796966161027, "grad_norm": 0.095703125, "learning_rate": 5.79458918773133e-05, "loss": 0.0006, "step": 13673 }, { "epoch": 6.382263710618436, "grad_norm": 0.06396484375, "learning_rate": 5.793257946809745e-05, "loss": 0.0005, "step": 13674 }, { "epoch": 6.382730455075846, "grad_norm": 0.03515625, "learning_rate": 5.79192679646398e-05, "loss": 0.0004, "step": 13675 }, { "epoch": 6.383197199533256, "grad_norm": 0.04248046875, "learning_rate": 5.7905957367226994e-05, "loss": 0.0003, "step": 13676 }, { "epoch": 6.3836639439906655, "grad_norm": 0.0220947265625, "learning_rate": 5.789264767614566e-05, "loss": 0.0003, "step": 13677 }, { "epoch": 6.384130688448074, "grad_norm": 0.130859375, "learning_rate": 5.787933889168232e-05, "loss": 0.001, "step": 13678 }, { "epoch": 6.384597432905484, "grad_norm": 0.1640625, "learning_rate": 5.786603101412358e-05, "loss": 0.0006, "step": 13679 }, { "epoch": 6.385064177362894, "grad_norm": 0.765625, "learning_rate": 5.78527240437559e-05, "loss": 0.0079, "step": 13680 }, { "epoch": 6.385530921820304, "grad_norm": 0.1767578125, "learning_rate": 5.783941798086584e-05, "loss": 0.0043, "step": 13681 }, { "epoch": 6.385997666277713, "grad_norm": 0.0157470703125, "learning_rate": 5.782611282573992e-05, "loss": 0.0002, "step": 13682 }, { "epoch": 6.3864644107351225, "grad_norm": 0.06396484375, "learning_rate": 5.7812808578664556e-05, "loss": 0.0004, "step": 13683 }, { "epoch": 6.386931155192532, "grad_norm": 0.08203125, "learning_rate": 5.779950523992621e-05, "loss": 0.0007, "step": 13684 }, { "epoch": 6.387397899649942, "grad_norm": 0.11376953125, "learning_rate": 5.77862028098114e-05, "loss": 0.0029, "step": 13685 }, { "epoch": 6.387864644107351, "grad_norm": 0.283203125, "learning_rate": 5.777290128860643e-05, "loss": 0.0016, "step": 13686 }, { "epoch": 6.388331388564761, "grad_norm": 0.061279296875, "learning_rate": 5.775960067659775e-05, "loss": 0.0004, "step": 13687 }, { "epoch": 6.388798133022171, "grad_norm": 0.0242919921875, "learning_rate": 5.774630097407177e-05, "loss": 0.0003, "step": 13688 }, { "epoch": 6.3892648774795795, "grad_norm": 0.10791015625, "learning_rate": 5.773300218131478e-05, "loss": 0.0028, "step": 13689 }, { "epoch": 6.389731621936989, "grad_norm": 0.0247802734375, "learning_rate": 5.771970429861313e-05, "loss": 0.0003, "step": 13690 }, { "epoch": 6.390198366394399, "grad_norm": 0.052001953125, "learning_rate": 5.770640732625322e-05, "loss": 0.0024, "step": 13691 }, { "epoch": 6.390665110851809, "grad_norm": 0.443359375, "learning_rate": 5.7693111264521235e-05, "loss": 0.0051, "step": 13692 }, { "epoch": 6.391131855309218, "grad_norm": 0.04150390625, "learning_rate": 5.76798161137035e-05, "loss": 0.0021, "step": 13693 }, { "epoch": 6.391598599766628, "grad_norm": 0.01495361328125, "learning_rate": 5.766652187408633e-05, "loss": 0.0002, "step": 13694 }, { "epoch": 6.392065344224037, "grad_norm": 0.302734375, "learning_rate": 5.7653228545955874e-05, "loss": 0.0027, "step": 13695 }, { "epoch": 6.392532088681447, "grad_norm": 0.06494140625, "learning_rate": 5.763993612959838e-05, "loss": 0.0006, "step": 13696 }, { "epoch": 6.392998833138856, "grad_norm": 0.04443359375, "learning_rate": 5.7626644625300107e-05, "loss": 0.0003, "step": 13697 }, { "epoch": 6.393465577596266, "grad_norm": 0.2421875, "learning_rate": 5.761335403334715e-05, "loss": 0.0005, "step": 13698 }, { "epoch": 6.393932322053676, "grad_norm": 0.0213623046875, "learning_rate": 5.7600064354025754e-05, "loss": 0.0002, "step": 13699 }, { "epoch": 6.3943990665110855, "grad_norm": 0.31640625, "learning_rate": 5.7586775587621966e-05, "loss": 0.0009, "step": 13700 }, { "epoch": 6.394865810968494, "grad_norm": 0.06591796875, "learning_rate": 5.757348773442197e-05, "loss": 0.0004, "step": 13701 }, { "epoch": 6.395332555425904, "grad_norm": 0.39453125, "learning_rate": 5.756020079471188e-05, "loss": 0.008, "step": 13702 }, { "epoch": 6.395799299883314, "grad_norm": 0.0203857421875, "learning_rate": 5.754691476877776e-05, "loss": 0.0003, "step": 13703 }, { "epoch": 6.396266044340724, "grad_norm": 0.3046875, "learning_rate": 5.753362965690564e-05, "loss": 0.0015, "step": 13704 }, { "epoch": 6.396732788798133, "grad_norm": 0.029541015625, "learning_rate": 5.7520345459381585e-05, "loss": 0.0003, "step": 13705 }, { "epoch": 6.3971995332555425, "grad_norm": 0.10693359375, "learning_rate": 5.7507062176491654e-05, "loss": 0.0005, "step": 13706 }, { "epoch": 6.397666277712952, "grad_norm": 0.1416015625, "learning_rate": 5.74937798085218e-05, "loss": 0.0006, "step": 13707 }, { "epoch": 6.398133022170362, "grad_norm": 0.294921875, "learning_rate": 5.7480498355758006e-05, "loss": 0.0075, "step": 13708 }, { "epoch": 6.398599766627771, "grad_norm": 0.0859375, "learning_rate": 5.746721781848631e-05, "loss": 0.0026, "step": 13709 }, { "epoch": 6.399066511085181, "grad_norm": 0.1865234375, "learning_rate": 5.745393819699256e-05, "loss": 0.0008, "step": 13710 }, { "epoch": 6.399533255542591, "grad_norm": 0.087890625, "learning_rate": 5.744065949156278e-05, "loss": 0.0004, "step": 13711 }, { "epoch": 6.4, "grad_norm": 0.04443359375, "learning_rate": 5.742738170248276e-05, "loss": 0.0004, "step": 13712 }, { "epoch": 6.400466744457409, "grad_norm": 0.2119140625, "learning_rate": 5.7414104830038465e-05, "loss": 0.0012, "step": 13713 }, { "epoch": 6.400933488914819, "grad_norm": 0.0291748046875, "learning_rate": 5.7400828874515776e-05, "loss": 0.0003, "step": 13714 }, { "epoch": 6.401400233372229, "grad_norm": 0.158203125, "learning_rate": 5.738755383620047e-05, "loss": 0.0005, "step": 13715 }, { "epoch": 6.401866977829639, "grad_norm": 0.0966796875, "learning_rate": 5.737427971537841e-05, "loss": 0.0004, "step": 13716 }, { "epoch": 6.402333722287048, "grad_norm": 0.72265625, "learning_rate": 5.736100651233544e-05, "loss": 0.0019, "step": 13717 }, { "epoch": 6.402800466744457, "grad_norm": 0.0458984375, "learning_rate": 5.734773422735726e-05, "loss": 0.0003, "step": 13718 }, { "epoch": 6.403267211201867, "grad_norm": 0.03564453125, "learning_rate": 5.73344628607297e-05, "loss": 0.0003, "step": 13719 }, { "epoch": 6.403733955659277, "grad_norm": 0.11865234375, "learning_rate": 5.732119241273852e-05, "loss": 0.0039, "step": 13720 }, { "epoch": 6.404200700116686, "grad_norm": 0.0732421875, "learning_rate": 5.7307922883669386e-05, "loss": 0.0005, "step": 13721 }, { "epoch": 6.404667444574096, "grad_norm": 0.07373046875, "learning_rate": 5.7294654273808035e-05, "loss": 0.0003, "step": 13722 }, { "epoch": 6.405134189031505, "grad_norm": 0.032958984375, "learning_rate": 5.72813865834402e-05, "loss": 0.0003, "step": 13723 }, { "epoch": 6.405600933488914, "grad_norm": 0.14453125, "learning_rate": 5.7268119812851475e-05, "loss": 0.0035, "step": 13724 }, { "epoch": 6.406067677946324, "grad_norm": 0.1728515625, "learning_rate": 5.7254853962327546e-05, "loss": 0.0007, "step": 13725 }, { "epoch": 6.406534422403734, "grad_norm": 0.059814453125, "learning_rate": 5.724158903215406e-05, "loss": 0.0003, "step": 13726 }, { "epoch": 6.407001166861144, "grad_norm": 0.2470703125, "learning_rate": 5.722832502261659e-05, "loss": 0.0019, "step": 13727 }, { "epoch": 6.4074679113185535, "grad_norm": 0.49609375, "learning_rate": 5.721506193400073e-05, "loss": 0.0078, "step": 13728 }, { "epoch": 6.407934655775962, "grad_norm": 0.11328125, "learning_rate": 5.720179976659208e-05, "loss": 0.0041, "step": 13729 }, { "epoch": 6.408401400233372, "grad_norm": 0.171875, "learning_rate": 5.718853852067614e-05, "loss": 0.0008, "step": 13730 }, { "epoch": 6.408868144690782, "grad_norm": 0.08447265625, "learning_rate": 5.717527819653849e-05, "loss": 0.0024, "step": 13731 }, { "epoch": 6.409334889148191, "grad_norm": 0.048095703125, "learning_rate": 5.71620187944646e-05, "loss": 0.0024, "step": 13732 }, { "epoch": 6.409801633605601, "grad_norm": 0.22265625, "learning_rate": 5.714876031473997e-05, "loss": 0.0007, "step": 13733 }, { "epoch": 6.4102683780630105, "grad_norm": 0.04541015625, "learning_rate": 5.713550275765011e-05, "loss": 0.0005, "step": 13734 }, { "epoch": 6.41073512252042, "grad_norm": 0.0390625, "learning_rate": 5.712224612348038e-05, "loss": 0.0021, "step": 13735 }, { "epoch": 6.411201866977829, "grad_norm": 0.40234375, "learning_rate": 5.7108990412516285e-05, "loss": 0.0051, "step": 13736 }, { "epoch": 6.411668611435239, "grad_norm": 0.05029296875, "learning_rate": 5.709573562504324e-05, "loss": 0.0022, "step": 13737 }, { "epoch": 6.412135355892649, "grad_norm": 0.298828125, "learning_rate": 5.708248176134658e-05, "loss": 0.0018, "step": 13738 }, { "epoch": 6.412602100350059, "grad_norm": 0.236328125, "learning_rate": 5.7069228821711706e-05, "loss": 0.0045, "step": 13739 }, { "epoch": 6.4130688448074675, "grad_norm": 0.12158203125, "learning_rate": 5.7055976806424004e-05, "loss": 0.0007, "step": 13740 }, { "epoch": 6.413535589264877, "grad_norm": 0.1630859375, "learning_rate": 5.704272571576873e-05, "loss": 0.0005, "step": 13741 }, { "epoch": 6.414002333722287, "grad_norm": 0.466796875, "learning_rate": 5.7029475550031244e-05, "loss": 0.0031, "step": 13742 }, { "epoch": 6.414469078179697, "grad_norm": 0.02294921875, "learning_rate": 5.701622630949684e-05, "loss": 0.0003, "step": 13743 }, { "epoch": 6.414935822637106, "grad_norm": 0.17578125, "learning_rate": 5.700297799445076e-05, "loss": 0.0038, "step": 13744 }, { "epoch": 6.415402567094516, "grad_norm": 0.0595703125, "learning_rate": 5.698973060517825e-05, "loss": 0.0033, "step": 13745 }, { "epoch": 6.415869311551925, "grad_norm": 0.10888671875, "learning_rate": 5.6976484141964615e-05, "loss": 0.0006, "step": 13746 }, { "epoch": 6.416336056009335, "grad_norm": 0.054931640625, "learning_rate": 5.696323860509496e-05, "loss": 0.0004, "step": 13747 }, { "epoch": 6.416802800466744, "grad_norm": 0.09326171875, "learning_rate": 5.694999399485459e-05, "loss": 0.0005, "step": 13748 }, { "epoch": 6.417269544924154, "grad_norm": 0.22265625, "learning_rate": 5.693675031152855e-05, "loss": 0.0008, "step": 13749 }, { "epoch": 6.417736289381564, "grad_norm": 0.07958984375, "learning_rate": 5.69235075554021e-05, "loss": 0.0005, "step": 13750 }, { "epoch": 6.4182030338389735, "grad_norm": 0.2138671875, "learning_rate": 5.69102657267603e-05, "loss": 0.0012, "step": 13751 }, { "epoch": 6.418669778296382, "grad_norm": 0.1357421875, "learning_rate": 5.6897024825888315e-05, "loss": 0.0006, "step": 13752 }, { "epoch": 6.419136522753792, "grad_norm": 0.291015625, "learning_rate": 5.6883784853071176e-05, "loss": 0.0049, "step": 13753 }, { "epoch": 6.419603267211202, "grad_norm": 0.091796875, "learning_rate": 5.687054580859399e-05, "loss": 0.0007, "step": 13754 }, { "epoch": 6.420070011668612, "grad_norm": 0.40234375, "learning_rate": 5.6857307692741824e-05, "loss": 0.0049, "step": 13755 }, { "epoch": 6.420536756126021, "grad_norm": 0.2470703125, "learning_rate": 5.684407050579967e-05, "loss": 0.0013, "step": 13756 }, { "epoch": 6.4210035005834305, "grad_norm": 0.5703125, "learning_rate": 5.683083424805255e-05, "loss": 0.0116, "step": 13757 }, { "epoch": 6.42147024504084, "grad_norm": 0.333984375, "learning_rate": 5.681759891978553e-05, "loss": 0.001, "step": 13758 }, { "epoch": 6.42193698949825, "grad_norm": 0.04736328125, "learning_rate": 5.680436452128346e-05, "loss": 0.0004, "step": 13759 }, { "epoch": 6.422403733955659, "grad_norm": 0.404296875, "learning_rate": 5.679113105283138e-05, "loss": 0.0014, "step": 13760 }, { "epoch": 6.422870478413069, "grad_norm": 0.032470703125, "learning_rate": 5.677789851471416e-05, "loss": 0.0003, "step": 13761 }, { "epoch": 6.423337222870479, "grad_norm": 0.2412109375, "learning_rate": 5.6764666907216735e-05, "loss": 0.0019, "step": 13762 }, { "epoch": 6.423803967327888, "grad_norm": 0.322265625, "learning_rate": 5.6751436230624044e-05, "loss": 0.0018, "step": 13763 }, { "epoch": 6.424270711785297, "grad_norm": 0.51171875, "learning_rate": 5.673820648522088e-05, "loss": 0.006, "step": 13764 }, { "epoch": 6.424737456242707, "grad_norm": 0.345703125, "learning_rate": 5.6724977671292124e-05, "loss": 0.0045, "step": 13765 }, { "epoch": 6.425204200700117, "grad_norm": 0.1845703125, "learning_rate": 5.671174978912266e-05, "loss": 0.0029, "step": 13766 }, { "epoch": 6.425670945157526, "grad_norm": 0.25390625, "learning_rate": 5.669852283899721e-05, "loss": 0.0084, "step": 13767 }, { "epoch": 6.426137689614936, "grad_norm": 0.275390625, "learning_rate": 5.6685296821200604e-05, "loss": 0.0024, "step": 13768 }, { "epoch": 6.426604434072345, "grad_norm": 0.03759765625, "learning_rate": 5.667207173601766e-05, "loss": 0.0004, "step": 13769 }, { "epoch": 6.427071178529755, "grad_norm": 0.5625, "learning_rate": 5.665884758373304e-05, "loss": 0.0089, "step": 13770 }, { "epoch": 6.427537922987165, "grad_norm": 0.1845703125, "learning_rate": 5.664562436463152e-05, "loss": 0.0024, "step": 13771 }, { "epoch": 6.428004667444574, "grad_norm": 0.125, "learning_rate": 5.663240207899786e-05, "loss": 0.0005, "step": 13772 }, { "epoch": 6.428471411901984, "grad_norm": 0.322265625, "learning_rate": 5.661918072711665e-05, "loss": 0.0035, "step": 13773 }, { "epoch": 6.4289381563593935, "grad_norm": 0.51953125, "learning_rate": 5.660596030927261e-05, "loss": 0.0027, "step": 13774 }, { "epoch": 6.429404900816802, "grad_norm": 0.23046875, "learning_rate": 5.659274082575043e-05, "loss": 0.0031, "step": 13775 }, { "epoch": 6.429871645274212, "grad_norm": 0.2890625, "learning_rate": 5.6579522276834676e-05, "loss": 0.0018, "step": 13776 }, { "epoch": 6.430338389731622, "grad_norm": 0.291015625, "learning_rate": 5.656630466280997e-05, "loss": 0.0039, "step": 13777 }, { "epoch": 6.430805134189032, "grad_norm": 0.1689453125, "learning_rate": 5.655308798396095e-05, "loss": 0.0011, "step": 13778 }, { "epoch": 6.431271878646441, "grad_norm": 0.330078125, "learning_rate": 5.653987224057212e-05, "loss": 0.0027, "step": 13779 }, { "epoch": 6.4317386231038505, "grad_norm": 0.400390625, "learning_rate": 5.652665743292811e-05, "loss": 0.004, "step": 13780 }, { "epoch": 6.43220536756126, "grad_norm": 0.208984375, "learning_rate": 5.651344356131334e-05, "loss": 0.0016, "step": 13781 }, { "epoch": 6.43267211201867, "grad_norm": 0.0927734375, "learning_rate": 5.65002306260124e-05, "loss": 0.0006, "step": 13782 }, { "epoch": 6.433138856476079, "grad_norm": 0.2255859375, "learning_rate": 5.648701862730979e-05, "loss": 0.0027, "step": 13783 }, { "epoch": 6.433605600933489, "grad_norm": 0.28515625, "learning_rate": 5.64738075654899e-05, "loss": 0.0039, "step": 13784 }, { "epoch": 6.434072345390899, "grad_norm": 0.16796875, "learning_rate": 5.6460597440837225e-05, "loss": 0.0045, "step": 13785 }, { "epoch": 6.434539089848308, "grad_norm": 0.13671875, "learning_rate": 5.644738825363625e-05, "loss": 0.0005, "step": 13786 }, { "epoch": 6.435005834305717, "grad_norm": 0.09814453125, "learning_rate": 5.6434180004171266e-05, "loss": 0.0009, "step": 13787 }, { "epoch": 6.435472578763127, "grad_norm": 0.49609375, "learning_rate": 5.642097269272674e-05, "loss": 0.0019, "step": 13788 }, { "epoch": 6.435939323220537, "grad_norm": 0.30078125, "learning_rate": 5.640776631958706e-05, "loss": 0.0047, "step": 13789 }, { "epoch": 6.436406067677947, "grad_norm": 0.025146484375, "learning_rate": 5.63945608850365e-05, "loss": 0.0003, "step": 13790 }, { "epoch": 6.436872812135356, "grad_norm": 0.09912109375, "learning_rate": 5.638135638935942e-05, "loss": 0.0006, "step": 13791 }, { "epoch": 6.437339556592765, "grad_norm": 0.09716796875, "learning_rate": 5.636815283284018e-05, "loss": 0.0011, "step": 13792 }, { "epoch": 6.437806301050175, "grad_norm": 0.1455078125, "learning_rate": 5.635495021576297e-05, "loss": 0.0036, "step": 13793 }, { "epoch": 6.438273045507585, "grad_norm": 0.25, "learning_rate": 5.634174853841212e-05, "loss": 0.0041, "step": 13794 }, { "epoch": 6.438739789964994, "grad_norm": 0.20703125, "learning_rate": 5.6328547801071886e-05, "loss": 0.0025, "step": 13795 }, { "epoch": 6.439206534422404, "grad_norm": 0.33984375, "learning_rate": 5.6315348004026446e-05, "loss": 0.0064, "step": 13796 }, { "epoch": 6.4396732788798134, "grad_norm": 0.341796875, "learning_rate": 5.630214914756007e-05, "loss": 0.0128, "step": 13797 }, { "epoch": 6.440140023337223, "grad_norm": 0.263671875, "learning_rate": 5.62889512319569e-05, "loss": 0.0054, "step": 13798 }, { "epoch": 6.440606767794632, "grad_norm": 0.1552734375, "learning_rate": 5.627575425750107e-05, "loss": 0.0037, "step": 13799 }, { "epoch": 6.441073512252042, "grad_norm": 0.09326171875, "learning_rate": 5.6262558224476766e-05, "loss": 0.0008, "step": 13800 }, { "epoch": 6.441540256709452, "grad_norm": 0.1494140625, "learning_rate": 5.624936313316815e-05, "loss": 0.001, "step": 13801 }, { "epoch": 6.4420070011668615, "grad_norm": 0.380859375, "learning_rate": 5.623616898385926e-05, "loss": 0.006, "step": 13802 }, { "epoch": 6.44247374562427, "grad_norm": 0.03271484375, "learning_rate": 5.6222975776834196e-05, "loss": 0.0004, "step": 13803 }, { "epoch": 6.44294049008168, "grad_norm": 0.09130859375, "learning_rate": 5.620978351237708e-05, "loss": 0.0034, "step": 13804 }, { "epoch": 6.44340723453909, "grad_norm": 0.271484375, "learning_rate": 5.6196592190771865e-05, "loss": 0.0047, "step": 13805 }, { "epoch": 6.4438739789965, "grad_norm": 0.162109375, "learning_rate": 5.618340181230263e-05, "loss": 0.0026, "step": 13806 }, { "epoch": 6.444340723453909, "grad_norm": 0.265625, "learning_rate": 5.61702123772534e-05, "loss": 0.007, "step": 13807 }, { "epoch": 6.4448074679113185, "grad_norm": 0.177734375, "learning_rate": 5.615702388590809e-05, "loss": 0.0012, "step": 13808 }, { "epoch": 6.445274212368728, "grad_norm": 0.3046875, "learning_rate": 5.6143836338550735e-05, "loss": 0.0066, "step": 13809 }, { "epoch": 6.445740956826137, "grad_norm": 0.0712890625, "learning_rate": 5.6130649735465225e-05, "loss": 0.0008, "step": 13810 }, { "epoch": 6.446207701283547, "grad_norm": 0.11279296875, "learning_rate": 5.6117464076935475e-05, "loss": 0.0008, "step": 13811 }, { "epoch": 6.446674445740957, "grad_norm": 0.265625, "learning_rate": 5.610427936324546e-05, "loss": 0.0014, "step": 13812 }, { "epoch": 6.447141190198367, "grad_norm": 0.1533203125, "learning_rate": 5.609109559467898e-05, "loss": 0.0013, "step": 13813 }, { "epoch": 6.447607934655776, "grad_norm": 0.09423828125, "learning_rate": 5.6077912771519926e-05, "loss": 0.0007, "step": 13814 }, { "epoch": 6.448074679113185, "grad_norm": 0.10986328125, "learning_rate": 5.6064730894052174e-05, "loss": 0.0006, "step": 13815 }, { "epoch": 6.448541423570595, "grad_norm": 0.1552734375, "learning_rate": 5.605154996255948e-05, "loss": 0.0011, "step": 13816 }, { "epoch": 6.449008168028005, "grad_norm": 0.2431640625, "learning_rate": 5.603836997732568e-05, "loss": 0.0013, "step": 13817 }, { "epoch": 6.449474912485414, "grad_norm": 0.314453125, "learning_rate": 5.6025190938634586e-05, "loss": 0.0014, "step": 13818 }, { "epoch": 6.449941656942824, "grad_norm": 0.08740234375, "learning_rate": 5.601201284676989e-05, "loss": 0.0028, "step": 13819 }, { "epoch": 6.450408401400233, "grad_norm": 0.53125, "learning_rate": 5.599883570201535e-05, "loss": 0.0038, "step": 13820 }, { "epoch": 6.450875145857643, "grad_norm": 0.263671875, "learning_rate": 5.598565950465474e-05, "loss": 0.0015, "step": 13821 }, { "epoch": 6.451341890315052, "grad_norm": 0.1982421875, "learning_rate": 5.597248425497168e-05, "loss": 0.0008, "step": 13822 }, { "epoch": 6.451808634772462, "grad_norm": 0.0751953125, "learning_rate": 5.5959309953249885e-05, "loss": 0.0005, "step": 13823 }, { "epoch": 6.452275379229872, "grad_norm": 0.216796875, "learning_rate": 5.594613659977305e-05, "loss": 0.0023, "step": 13824 }, { "epoch": 6.4527421236872815, "grad_norm": 0.11865234375, "learning_rate": 5.593296419482473e-05, "loss": 0.0008, "step": 13825 }, { "epoch": 6.45320886814469, "grad_norm": 0.14453125, "learning_rate": 5.5919792738688595e-05, "loss": 0.0008, "step": 13826 }, { "epoch": 6.4536756126021, "grad_norm": 0.2080078125, "learning_rate": 5.5906622231648256e-05, "loss": 0.0008, "step": 13827 }, { "epoch": 6.45414235705951, "grad_norm": 0.2275390625, "learning_rate": 5.5893452673987225e-05, "loss": 0.0012, "step": 13828 }, { "epoch": 6.45460910151692, "grad_norm": 0.11083984375, "learning_rate": 5.588028406598913e-05, "loss": 0.0027, "step": 13829 }, { "epoch": 6.455075845974329, "grad_norm": 0.39453125, "learning_rate": 5.586711640793745e-05, "loss": 0.0055, "step": 13830 }, { "epoch": 6.4555425904317385, "grad_norm": 0.55078125, "learning_rate": 5.5853949700115704e-05, "loss": 0.002, "step": 13831 }, { "epoch": 6.456009334889148, "grad_norm": 0.107421875, "learning_rate": 5.584078394280744e-05, "loss": 0.0039, "step": 13832 }, { "epoch": 6.456476079346558, "grad_norm": 0.2001953125, "learning_rate": 5.5827619136296064e-05, "loss": 0.0043, "step": 13833 }, { "epoch": 6.456942823803967, "grad_norm": 0.55859375, "learning_rate": 5.5814455280865056e-05, "loss": 0.0026, "step": 13834 }, { "epoch": 6.457409568261377, "grad_norm": 0.126953125, "learning_rate": 5.580129237679788e-05, "loss": 0.0031, "step": 13835 }, { "epoch": 6.457876312718787, "grad_norm": 0.06787109375, "learning_rate": 5.57881304243779e-05, "loss": 0.0005, "step": 13836 }, { "epoch": 6.458343057176196, "grad_norm": 0.0859375, "learning_rate": 5.57749694238885e-05, "loss": 0.0043, "step": 13837 }, { "epoch": 6.458809801633605, "grad_norm": 0.035888671875, "learning_rate": 5.5761809375613125e-05, "loss": 0.0004, "step": 13838 }, { "epoch": 6.459276546091015, "grad_norm": 0.1796875, "learning_rate": 5.574865027983505e-05, "loss": 0.0014, "step": 13839 }, { "epoch": 6.459743290548425, "grad_norm": 0.267578125, "learning_rate": 5.573549213683762e-05, "loss": 0.0011, "step": 13840 }, { "epoch": 6.460210035005835, "grad_norm": 0.12890625, "learning_rate": 5.5722334946904196e-05, "loss": 0.0039, "step": 13841 }, { "epoch": 6.460676779463244, "grad_norm": 0.302734375, "learning_rate": 5.5709178710318e-05, "loss": 0.0016, "step": 13842 }, { "epoch": 6.461143523920653, "grad_norm": 0.1826171875, "learning_rate": 5.569602342736231e-05, "loss": 0.0013, "step": 13843 }, { "epoch": 6.461610268378063, "grad_norm": 0.1953125, "learning_rate": 5.568286909832043e-05, "loss": 0.0024, "step": 13844 }, { "epoch": 6.462077012835473, "grad_norm": 0.1025390625, "learning_rate": 5.566971572347553e-05, "loss": 0.0006, "step": 13845 }, { "epoch": 6.462543757292882, "grad_norm": 0.051025390625, "learning_rate": 5.5656563303110866e-05, "loss": 0.0005, "step": 13846 }, { "epoch": 6.463010501750292, "grad_norm": 0.32421875, "learning_rate": 5.564341183750959e-05, "loss": 0.0019, "step": 13847 }, { "epoch": 6.4634772462077015, "grad_norm": 0.1474609375, "learning_rate": 5.5630261326954834e-05, "loss": 0.0009, "step": 13848 }, { "epoch": 6.463943990665111, "grad_norm": 0.353515625, "learning_rate": 5.561711177172979e-05, "loss": 0.0056, "step": 13849 }, { "epoch": 6.46441073512252, "grad_norm": 0.037353515625, "learning_rate": 5.56039631721176e-05, "loss": 0.0006, "step": 13850 }, { "epoch": 6.46487747957993, "grad_norm": 0.173828125, "learning_rate": 5.559081552840131e-05, "loss": 0.0013, "step": 13851 }, { "epoch": 6.46534422403734, "grad_norm": 0.08251953125, "learning_rate": 5.557766884086403e-05, "loss": 0.0009, "step": 13852 }, { "epoch": 6.465810968494749, "grad_norm": 0.240234375, "learning_rate": 5.556452310978887e-05, "loss": 0.0012, "step": 13853 }, { "epoch": 6.4662777129521585, "grad_norm": 0.04150390625, "learning_rate": 5.5551378335458784e-05, "loss": 0.0006, "step": 13854 }, { "epoch": 6.466744457409568, "grad_norm": 0.255859375, "learning_rate": 5.553823451815685e-05, "loss": 0.0012, "step": 13855 }, { "epoch": 6.467211201866978, "grad_norm": 0.28125, "learning_rate": 5.5525091658166084e-05, "loss": 0.0015, "step": 13856 }, { "epoch": 6.467677946324388, "grad_norm": 0.0830078125, "learning_rate": 5.551194975576941e-05, "loss": 0.0008, "step": 13857 }, { "epoch": 6.468144690781797, "grad_norm": 0.1103515625, "learning_rate": 5.549880881124985e-05, "loss": 0.0032, "step": 13858 }, { "epoch": 6.468611435239207, "grad_norm": 0.03662109375, "learning_rate": 5.548566882489028e-05, "loss": 0.0004, "step": 13859 }, { "epoch": 6.469078179696616, "grad_norm": 0.1826171875, "learning_rate": 5.547252979697364e-05, "loss": 0.0025, "step": 13860 }, { "epoch": 6.469544924154025, "grad_norm": 0.443359375, "learning_rate": 5.5459391727782894e-05, "loss": 0.0097, "step": 13861 }, { "epoch": 6.470011668611435, "grad_norm": 0.11962890625, "learning_rate": 5.5446254617600804e-05, "loss": 0.0078, "step": 13862 }, { "epoch": 6.470478413068845, "grad_norm": 0.228515625, "learning_rate": 5.54331184667103e-05, "loss": 0.0016, "step": 13863 }, { "epoch": 6.470945157526255, "grad_norm": 0.0732421875, "learning_rate": 5.541998327539424e-05, "loss": 0.0006, "step": 13864 }, { "epoch": 6.471411901983664, "grad_norm": 0.1611328125, "learning_rate": 5.540684904393536e-05, "loss": 0.0008, "step": 13865 }, { "epoch": 6.471878646441073, "grad_norm": 0.06591796875, "learning_rate": 5.5393715772616494e-05, "loss": 0.0007, "step": 13866 }, { "epoch": 6.472345390898483, "grad_norm": 0.033447265625, "learning_rate": 5.538058346172046e-05, "loss": 0.0003, "step": 13867 }, { "epoch": 6.472812135355893, "grad_norm": 0.220703125, "learning_rate": 5.536745211152993e-05, "loss": 0.001, "step": 13868 }, { "epoch": 6.473278879813302, "grad_norm": 0.353515625, "learning_rate": 5.5354321722327685e-05, "loss": 0.0011, "step": 13869 }, { "epoch": 6.473745624270712, "grad_norm": 0.099609375, "learning_rate": 5.534119229439645e-05, "loss": 0.0033, "step": 13870 }, { "epoch": 6.4742123687281214, "grad_norm": 0.255859375, "learning_rate": 5.5328063828018875e-05, "loss": 0.0012, "step": 13871 }, { "epoch": 6.474679113185531, "grad_norm": 0.0703125, "learning_rate": 5.531493632347764e-05, "loss": 0.0007, "step": 13872 }, { "epoch": 6.47514585764294, "grad_norm": 0.0859375, "learning_rate": 5.5301809781055434e-05, "loss": 0.0051, "step": 13873 }, { "epoch": 6.47561260210035, "grad_norm": 0.1640625, "learning_rate": 5.5288684201034835e-05, "loss": 0.0012, "step": 13874 }, { "epoch": 6.47607934655776, "grad_norm": 0.16015625, "learning_rate": 5.527555958369848e-05, "loss": 0.001, "step": 13875 }, { "epoch": 6.4765460910151695, "grad_norm": 0.07373046875, "learning_rate": 5.526243592932897e-05, "loss": 0.0037, "step": 13876 }, { "epoch": 6.4770128354725784, "grad_norm": 0.1455078125, "learning_rate": 5.524931323820881e-05, "loss": 0.0006, "step": 13877 }, { "epoch": 6.477479579929988, "grad_norm": 0.1494140625, "learning_rate": 5.523619151062065e-05, "loss": 0.0007, "step": 13878 }, { "epoch": 6.477946324387398, "grad_norm": 0.06689453125, "learning_rate": 5.5223070746846894e-05, "loss": 0.0041, "step": 13879 }, { "epoch": 6.478413068844808, "grad_norm": 0.049560546875, "learning_rate": 5.520995094717012e-05, "loss": 0.0005, "step": 13880 }, { "epoch": 6.478879813302217, "grad_norm": 0.3125, "learning_rate": 5.519683211187282e-05, "loss": 0.0017, "step": 13881 }, { "epoch": 6.4793465577596265, "grad_norm": 0.072265625, "learning_rate": 5.5183714241237406e-05, "loss": 0.0005, "step": 13882 }, { "epoch": 6.479813302217036, "grad_norm": 0.173828125, "learning_rate": 5.517059733554635e-05, "loss": 0.004, "step": 13883 }, { "epoch": 6.480280046674446, "grad_norm": 0.1748046875, "learning_rate": 5.51574813950821e-05, "loss": 0.0008, "step": 13884 }, { "epoch": 6.480746791131855, "grad_norm": 0.12890625, "learning_rate": 5.5144366420127e-05, "loss": 0.001, "step": 13885 }, { "epoch": 6.481213535589265, "grad_norm": 0.185546875, "learning_rate": 5.5131252410963466e-05, "loss": 0.003, "step": 13886 }, { "epoch": 6.481680280046675, "grad_norm": 0.138671875, "learning_rate": 5.5118139367873866e-05, "loss": 0.0007, "step": 13887 }, { "epoch": 6.482147024504084, "grad_norm": 0.078125, "learning_rate": 5.510502729114051e-05, "loss": 0.0006, "step": 13888 }, { "epoch": 6.482613768961493, "grad_norm": 0.2392578125, "learning_rate": 5.509191618104572e-05, "loss": 0.0012, "step": 13889 }, { "epoch": 6.483080513418903, "grad_norm": 0.12060546875, "learning_rate": 5.5078806037871834e-05, "loss": 0.0008, "step": 13890 }, { "epoch": 6.483547257876313, "grad_norm": 0.07763671875, "learning_rate": 5.506569686190106e-05, "loss": 0.0006, "step": 13891 }, { "epoch": 6.484014002333723, "grad_norm": 0.220703125, "learning_rate": 5.505258865341568e-05, "loss": 0.001, "step": 13892 }, { "epoch": 6.484480746791132, "grad_norm": 0.42578125, "learning_rate": 5.503948141269799e-05, "loss": 0.0045, "step": 13893 }, { "epoch": 6.484947491248541, "grad_norm": 0.1943359375, "learning_rate": 5.502637514003014e-05, "loss": 0.0035, "step": 13894 }, { "epoch": 6.485414235705951, "grad_norm": 0.2236328125, "learning_rate": 5.501326983569429e-05, "loss": 0.0009, "step": 13895 }, { "epoch": 6.48588098016336, "grad_norm": 0.1435546875, "learning_rate": 5.50001654999727e-05, "loss": 0.0008, "step": 13896 }, { "epoch": 6.48634772462077, "grad_norm": 0.058837890625, "learning_rate": 5.498706213314744e-05, "loss": 0.0004, "step": 13897 }, { "epoch": 6.48681446907818, "grad_norm": 0.09814453125, "learning_rate": 5.497395973550066e-05, "loss": 0.0007, "step": 13898 }, { "epoch": 6.4872812135355895, "grad_norm": 0.0810546875, "learning_rate": 5.496085830731455e-05, "loss": 0.0006, "step": 13899 }, { "epoch": 6.487747957992999, "grad_norm": 0.03466796875, "learning_rate": 5.494775784887106e-05, "loss": 0.0004, "step": 13900 }, { "epoch": 6.488214702450408, "grad_norm": 0.056640625, "learning_rate": 5.493465836045235e-05, "loss": 0.0005, "step": 13901 }, { "epoch": 6.488681446907818, "grad_norm": 0.022705078125, "learning_rate": 5.492155984234049e-05, "loss": 0.0003, "step": 13902 }, { "epoch": 6.489148191365228, "grad_norm": 0.26953125, "learning_rate": 5.490846229481741e-05, "loss": 0.0017, "step": 13903 }, { "epoch": 6.489614935822637, "grad_norm": 0.07763671875, "learning_rate": 5.489536571816516e-05, "loss": 0.0005, "step": 13904 }, { "epoch": 6.4900816802800465, "grad_norm": 0.09619140625, "learning_rate": 5.4882270112665776e-05, "loss": 0.0007, "step": 13905 }, { "epoch": 6.490548424737456, "grad_norm": 0.2265625, "learning_rate": 5.486917547860115e-05, "loss": 0.001, "step": 13906 }, { "epoch": 6.491015169194866, "grad_norm": 0.05029296875, "learning_rate": 5.485608181625323e-05, "loss": 0.0004, "step": 13907 }, { "epoch": 6.491481913652275, "grad_norm": 0.08837890625, "learning_rate": 5.484298912590402e-05, "loss": 0.0005, "step": 13908 }, { "epoch": 6.491948658109685, "grad_norm": 0.06787109375, "learning_rate": 5.4829897407835305e-05, "loss": 0.0005, "step": 13909 }, { "epoch": 6.492415402567095, "grad_norm": 0.236328125, "learning_rate": 5.481680666232905e-05, "loss": 0.001, "step": 13910 }, { "epoch": 6.492882147024504, "grad_norm": 0.22265625, "learning_rate": 5.4803716889667045e-05, "loss": 0.0009, "step": 13911 }, { "epoch": 6.493348891481913, "grad_norm": 0.138671875, "learning_rate": 5.4790628090131157e-05, "loss": 0.0009, "step": 13912 }, { "epoch": 6.493815635939323, "grad_norm": 0.046875, "learning_rate": 5.477754026400326e-05, "loss": 0.0027, "step": 13913 }, { "epoch": 6.494282380396733, "grad_norm": 0.3203125, "learning_rate": 5.476445341156504e-05, "loss": 0.0018, "step": 13914 }, { "epoch": 6.494749124854143, "grad_norm": 0.177734375, "learning_rate": 5.475136753309833e-05, "loss": 0.0041, "step": 13915 }, { "epoch": 6.495215869311552, "grad_norm": 0.412109375, "learning_rate": 5.473828262888492e-05, "loss": 0.0019, "step": 13916 }, { "epoch": 6.495682613768961, "grad_norm": 0.0277099609375, "learning_rate": 5.472519869920647e-05, "loss": 0.0004, "step": 13917 }, { "epoch": 6.496149358226371, "grad_norm": 0.2255859375, "learning_rate": 5.4712115744344726e-05, "loss": 0.002, "step": 13918 }, { "epoch": 6.496616102683781, "grad_norm": 0.04345703125, "learning_rate": 5.469903376458142e-05, "loss": 0.0004, "step": 13919 }, { "epoch": 6.49708284714119, "grad_norm": 0.275390625, "learning_rate": 5.468595276019813e-05, "loss": 0.0049, "step": 13920 }, { "epoch": 6.4975495915986, "grad_norm": 0.048828125, "learning_rate": 5.4672872731476565e-05, "loss": 0.0004, "step": 13921 }, { "epoch": 6.4980163360560095, "grad_norm": 0.05517578125, "learning_rate": 5.465979367869838e-05, "loss": 0.0005, "step": 13922 }, { "epoch": 6.498483080513419, "grad_norm": 0.030517578125, "learning_rate": 5.464671560214509e-05, "loss": 0.0003, "step": 13923 }, { "epoch": 6.498949824970828, "grad_norm": 0.09033203125, "learning_rate": 5.463363850209835e-05, "loss": 0.0005, "step": 13924 }, { "epoch": 6.499416569428238, "grad_norm": 0.03173828125, "learning_rate": 5.4620562378839734e-05, "loss": 0.0004, "step": 13925 }, { "epoch": 6.499883313885648, "grad_norm": 0.263671875, "learning_rate": 5.4607487232650724e-05, "loss": 0.0013, "step": 13926 }, { "epoch": 6.500350058343058, "grad_norm": 0.80078125, "learning_rate": 5.4594413063812924e-05, "loss": 0.0066, "step": 13927 }, { "epoch": 6.5008168028004665, "grad_norm": 0.06005859375, "learning_rate": 5.458133987260774e-05, "loss": 0.0005, "step": 13928 }, { "epoch": 6.501283547257876, "grad_norm": 0.111328125, "learning_rate": 5.4568267659316704e-05, "loss": 0.0006, "step": 13929 }, { "epoch": 6.501750291715286, "grad_norm": 0.341796875, "learning_rate": 5.455519642422131e-05, "loss": 0.0015, "step": 13930 }, { "epoch": 6.502217036172695, "grad_norm": 0.076171875, "learning_rate": 5.454212616760292e-05, "loss": 0.0004, "step": 13931 }, { "epoch": 6.502683780630105, "grad_norm": 0.0299072265625, "learning_rate": 5.452905688974299e-05, "loss": 0.0004, "step": 13932 }, { "epoch": 6.503150525087515, "grad_norm": 0.162109375, "learning_rate": 5.451598859092297e-05, "loss": 0.0038, "step": 13933 }, { "epoch": 6.503617269544924, "grad_norm": 0.171875, "learning_rate": 5.450292127142413e-05, "loss": 0.001, "step": 13934 }, { "epoch": 6.504084014002334, "grad_norm": 0.04345703125, "learning_rate": 5.4489854931527874e-05, "loss": 0.0004, "step": 13935 }, { "epoch": 6.504550758459743, "grad_norm": 0.07421875, "learning_rate": 5.4476789571515576e-05, "loss": 0.0005, "step": 13936 }, { "epoch": 6.504550758459743, "eval_loss": 2.296372175216675, "eval_runtime": 84.5427, "eval_samples_per_second": 21.338, "eval_steps_per_second": 2.673, "step": 13936 }, { "epoch": 6.505017502917153, "grad_norm": 0.21484375, "learning_rate": 5.446372519166847e-05, "loss": 0.0012, "step": 13937 }, { "epoch": 6.505484247374563, "grad_norm": 0.0986328125, "learning_rate": 5.445066179226787e-05, "loss": 0.0008, "step": 13938 }, { "epoch": 6.505950991831972, "grad_norm": 0.08544921875, "learning_rate": 5.4437599373595116e-05, "loss": 0.0006, "step": 13939 }, { "epoch": 6.506417736289381, "grad_norm": 0.09375, "learning_rate": 5.442453793593136e-05, "loss": 0.0007, "step": 13940 }, { "epoch": 6.506884480746791, "grad_norm": 0.055419921875, "learning_rate": 5.4411477479557904e-05, "loss": 0.0004, "step": 13941 }, { "epoch": 6.507351225204201, "grad_norm": 0.2099609375, "learning_rate": 5.4398418004755914e-05, "loss": 0.0011, "step": 13942 }, { "epoch": 6.507817969661611, "grad_norm": 0.318359375, "learning_rate": 5.4385359511806544e-05, "loss": 0.0017, "step": 13943 }, { "epoch": 6.50828471411902, "grad_norm": 0.15625, "learning_rate": 5.4372302000990995e-05, "loss": 0.0026, "step": 13944 }, { "epoch": 6.5087514585764294, "grad_norm": 0.3125, "learning_rate": 5.435924547259046e-05, "loss": 0.0013, "step": 13945 }, { "epoch": 6.509218203033839, "grad_norm": 0.21875, "learning_rate": 5.4346189926885957e-05, "loss": 0.0014, "step": 13946 }, { "epoch": 6.509684947491248, "grad_norm": 0.2734375, "learning_rate": 5.433313536415864e-05, "loss": 0.0013, "step": 13947 }, { "epoch": 6.510151691948658, "grad_norm": 0.087890625, "learning_rate": 5.4320081784689625e-05, "loss": 0.0004, "step": 13948 }, { "epoch": 6.510618436406068, "grad_norm": 0.12890625, "learning_rate": 5.4307029188759896e-05, "loss": 0.0009, "step": 13949 }, { "epoch": 6.5110851808634775, "grad_norm": 0.080078125, "learning_rate": 5.4293977576650526e-05, "loss": 0.0006, "step": 13950 }, { "epoch": 6.5115519253208864, "grad_norm": 0.205078125, "learning_rate": 5.428092694864256e-05, "loss": 0.0031, "step": 13951 }, { "epoch": 6.512018669778296, "grad_norm": 0.1044921875, "learning_rate": 5.426787730501694e-05, "loss": 0.0006, "step": 13952 }, { "epoch": 6.512485414235706, "grad_norm": 0.17578125, "learning_rate": 5.4254828646054646e-05, "loss": 0.0044, "step": 13953 }, { "epoch": 6.512952158693116, "grad_norm": 0.2060546875, "learning_rate": 5.424178097203669e-05, "loss": 0.001, "step": 13954 }, { "epoch": 6.513418903150525, "grad_norm": 0.0830078125, "learning_rate": 5.422873428324393e-05, "loss": 0.0006, "step": 13955 }, { "epoch": 6.5138856476079345, "grad_norm": 0.08740234375, "learning_rate": 5.421568857995729e-05, "loss": 0.002, "step": 13956 }, { "epoch": 6.514352392065344, "grad_norm": 0.045654296875, "learning_rate": 5.420264386245772e-05, "loss": 0.0003, "step": 13957 }, { "epoch": 6.514819136522754, "grad_norm": 0.020263671875, "learning_rate": 5.418960013102601e-05, "loss": 0.0002, "step": 13958 }, { "epoch": 6.515285880980163, "grad_norm": 0.220703125, "learning_rate": 5.417655738594307e-05, "loss": 0.003, "step": 13959 }, { "epoch": 6.515752625437573, "grad_norm": 0.026611328125, "learning_rate": 5.416351562748965e-05, "loss": 0.0002, "step": 13960 }, { "epoch": 6.516219369894983, "grad_norm": 0.041748046875, "learning_rate": 5.41504748559466e-05, "loss": 0.0004, "step": 13961 }, { "epoch": 6.516686114352392, "grad_norm": 0.208984375, "learning_rate": 5.413743507159473e-05, "loss": 0.0026, "step": 13962 }, { "epoch": 6.517152858809801, "grad_norm": 0.11767578125, "learning_rate": 5.412439627471473e-05, "loss": 0.0054, "step": 13963 }, { "epoch": 6.517619603267211, "grad_norm": 0.201171875, "learning_rate": 5.4111358465587384e-05, "loss": 0.0009, "step": 13964 }, { "epoch": 6.518086347724621, "grad_norm": 0.1337890625, "learning_rate": 5.4098321644493445e-05, "loss": 0.0007, "step": 13965 }, { "epoch": 6.518553092182031, "grad_norm": 0.24609375, "learning_rate": 5.408528581171352e-05, "loss": 0.001, "step": 13966 }, { "epoch": 6.51901983663944, "grad_norm": 0.2255859375, "learning_rate": 5.4072250967528335e-05, "loss": 0.0018, "step": 13967 }, { "epoch": 6.519486581096849, "grad_norm": 0.046630859375, "learning_rate": 5.4059217112218596e-05, "loss": 0.0004, "step": 13968 }, { "epoch": 6.519953325554259, "grad_norm": 0.4296875, "learning_rate": 5.404618424606483e-05, "loss": 0.0043, "step": 13969 }, { "epoch": 6.520420070011669, "grad_norm": 0.12451171875, "learning_rate": 5.403315236934772e-05, "loss": 0.0007, "step": 13970 }, { "epoch": 6.520886814469078, "grad_norm": 0.0380859375, "learning_rate": 5.4020121482347864e-05, "loss": 0.0004, "step": 13971 }, { "epoch": 6.521353558926488, "grad_norm": 0.283203125, "learning_rate": 5.400709158534578e-05, "loss": 0.0012, "step": 13972 }, { "epoch": 6.5218203033838975, "grad_norm": 0.049560546875, "learning_rate": 5.3994062678622036e-05, "loss": 0.0004, "step": 13973 }, { "epoch": 6.522287047841306, "grad_norm": 0.048828125, "learning_rate": 5.398103476245722e-05, "loss": 0.0004, "step": 13974 }, { "epoch": 6.522753792298716, "grad_norm": 0.11767578125, "learning_rate": 5.396800783713173e-05, "loss": 0.0009, "step": 13975 }, { "epoch": 6.523220536756126, "grad_norm": 0.07763671875, "learning_rate": 5.3954981902926147e-05, "loss": 0.0004, "step": 13976 }, { "epoch": 6.523687281213536, "grad_norm": 0.068359375, "learning_rate": 5.394195696012086e-05, "loss": 0.0006, "step": 13977 }, { "epoch": 6.524154025670946, "grad_norm": 0.056396484375, "learning_rate": 5.392893300899633e-05, "loss": 0.0028, "step": 13978 }, { "epoch": 6.5246207701283545, "grad_norm": 0.1396484375, "learning_rate": 5.391591004983304e-05, "loss": 0.0008, "step": 13979 }, { "epoch": 6.525087514585764, "grad_norm": 0.12890625, "learning_rate": 5.390288808291129e-05, "loss": 0.0008, "step": 13980 }, { "epoch": 6.525554259043174, "grad_norm": 0.37890625, "learning_rate": 5.388986710851152e-05, "loss": 0.0034, "step": 13981 }, { "epoch": 6.526021003500583, "grad_norm": 0.1630859375, "learning_rate": 5.3876847126914085e-05, "loss": 0.0009, "step": 13982 }, { "epoch": 6.526487747957993, "grad_norm": 0.234375, "learning_rate": 5.386382813839929e-05, "loss": 0.0018, "step": 13983 }, { "epoch": 6.526954492415403, "grad_norm": 0.0322265625, "learning_rate": 5.3850810143247444e-05, "loss": 0.0003, "step": 13984 }, { "epoch": 6.527421236872812, "grad_norm": 0.06201171875, "learning_rate": 5.383779314173891e-05, "loss": 0.0035, "step": 13985 }, { "epoch": 6.527887981330222, "grad_norm": 0.056640625, "learning_rate": 5.382477713415388e-05, "loss": 0.0015, "step": 13986 }, { "epoch": 6.528354725787631, "grad_norm": 0.431640625, "learning_rate": 5.3811762120772615e-05, "loss": 0.0027, "step": 13987 }, { "epoch": 6.528821470245041, "grad_norm": 0.054931640625, "learning_rate": 5.37987481018754e-05, "loss": 0.0004, "step": 13988 }, { "epoch": 6.529288214702451, "grad_norm": 0.034912109375, "learning_rate": 5.37857350777424e-05, "loss": 0.0004, "step": 13989 }, { "epoch": 6.52975495915986, "grad_norm": 0.310546875, "learning_rate": 5.377272304865376e-05, "loss": 0.0007, "step": 13990 }, { "epoch": 6.530221703617269, "grad_norm": 0.2265625, "learning_rate": 5.375971201488972e-05, "loss": 0.0014, "step": 13991 }, { "epoch": 6.530688448074679, "grad_norm": 0.1328125, "learning_rate": 5.374670197673035e-05, "loss": 0.0028, "step": 13992 }, { "epoch": 6.531155192532089, "grad_norm": 0.107421875, "learning_rate": 5.37336929344558e-05, "loss": 0.0007, "step": 13993 }, { "epoch": 6.531621936989498, "grad_norm": 0.040771484375, "learning_rate": 5.3720684888346225e-05, "loss": 0.0004, "step": 13994 }, { "epoch": 6.532088681446908, "grad_norm": 0.32421875, "learning_rate": 5.37076778386816e-05, "loss": 0.0012, "step": 13995 }, { "epoch": 6.5325554259043175, "grad_norm": 0.2734375, "learning_rate": 5.3694671785742036e-05, "loss": 0.0013, "step": 13996 }, { "epoch": 6.533022170361727, "grad_norm": 0.2109375, "learning_rate": 5.368166672980761e-05, "loss": 0.0014, "step": 13997 }, { "epoch": 6.533488914819136, "grad_norm": 0.068359375, "learning_rate": 5.3668662671158244e-05, "loss": 0.0005, "step": 13998 }, { "epoch": 6.533955659276546, "grad_norm": 0.042724609375, "learning_rate": 5.3655659610073986e-05, "loss": 0.0024, "step": 13999 }, { "epoch": 6.534422403733956, "grad_norm": 0.11865234375, "learning_rate": 5.364265754683483e-05, "loss": 0.0005, "step": 14000 }, { "epoch": 6.534889148191366, "grad_norm": 0.1650390625, "learning_rate": 5.3629656481720646e-05, "loss": 0.001, "step": 14001 }, { "epoch": 6.5353558926487745, "grad_norm": 0.1630859375, "learning_rate": 5.361665641501141e-05, "loss": 0.005, "step": 14002 }, { "epoch": 6.535822637106184, "grad_norm": 0.0791015625, "learning_rate": 5.3603657346987066e-05, "loss": 0.0005, "step": 14003 }, { "epoch": 6.536289381563594, "grad_norm": 0.0791015625, "learning_rate": 5.3590659277927424e-05, "loss": 0.0006, "step": 14004 }, { "epoch": 6.536756126021004, "grad_norm": 0.039794921875, "learning_rate": 5.357766220811238e-05, "loss": 0.0004, "step": 14005 }, { "epoch": 6.537222870478413, "grad_norm": 0.0242919921875, "learning_rate": 5.356466613782181e-05, "loss": 0.0002, "step": 14006 }, { "epoch": 6.537689614935823, "grad_norm": 0.265625, "learning_rate": 5.355167106733545e-05, "loss": 0.0016, "step": 14007 }, { "epoch": 6.538156359393232, "grad_norm": 0.054443359375, "learning_rate": 5.35386769969332e-05, "loss": 0.0004, "step": 14008 }, { "epoch": 6.538623103850642, "grad_norm": 0.34375, "learning_rate": 5.3525683926894746e-05, "loss": 0.002, "step": 14009 }, { "epoch": 6.539089848308051, "grad_norm": 0.283203125, "learning_rate": 5.351269185749988e-05, "loss": 0.0009, "step": 14010 }, { "epoch": 6.539556592765461, "grad_norm": 0.10400390625, "learning_rate": 5.349970078902835e-05, "loss": 0.0007, "step": 14011 }, { "epoch": 6.540023337222871, "grad_norm": 0.349609375, "learning_rate": 5.3486710721759835e-05, "loss": 0.0015, "step": 14012 }, { "epoch": 6.5404900816802805, "grad_norm": 0.15625, "learning_rate": 5.347372165597406e-05, "loss": 0.0018, "step": 14013 }, { "epoch": 6.540956826137689, "grad_norm": 0.2890625, "learning_rate": 5.3460733591950695e-05, "loss": 0.0009, "step": 14014 }, { "epoch": 6.541423570595099, "grad_norm": 0.08837890625, "learning_rate": 5.344774652996936e-05, "loss": 0.0006, "step": 14015 }, { "epoch": 6.541890315052509, "grad_norm": 0.033447265625, "learning_rate": 5.3434760470309666e-05, "loss": 0.0004, "step": 14016 }, { "epoch": 6.542357059509918, "grad_norm": 0.4375, "learning_rate": 5.34217754132513e-05, "loss": 0.0018, "step": 14017 }, { "epoch": 6.542823803967328, "grad_norm": 0.02880859375, "learning_rate": 5.340879135907374e-05, "loss": 0.0004, "step": 14018 }, { "epoch": 6.5432905484247375, "grad_norm": 0.036376953125, "learning_rate": 5.3395808308056596e-05, "loss": 0.0003, "step": 14019 }, { "epoch": 6.543757292882147, "grad_norm": 0.1650390625, "learning_rate": 5.3382826260479454e-05, "loss": 0.001, "step": 14020 }, { "epoch": 6.544224037339557, "grad_norm": 0.06298828125, "learning_rate": 5.336984521662174e-05, "loss": 0.0004, "step": 14021 }, { "epoch": 6.544690781796966, "grad_norm": 0.1591796875, "learning_rate": 5.335686517676299e-05, "loss": 0.0041, "step": 14022 }, { "epoch": 6.545157526254376, "grad_norm": 0.271484375, "learning_rate": 5.334388614118273e-05, "loss": 0.0039, "step": 14023 }, { "epoch": 6.5456242707117855, "grad_norm": 0.0556640625, "learning_rate": 5.333090811016034e-05, "loss": 0.0005, "step": 14024 }, { "epoch": 6.5460910151691944, "grad_norm": 0.11279296875, "learning_rate": 5.3317931083975305e-05, "loss": 0.0007, "step": 14025 }, { "epoch": 6.546557759626604, "grad_norm": 0.0634765625, "learning_rate": 5.330495506290697e-05, "loss": 0.0005, "step": 14026 }, { "epoch": 6.547024504084014, "grad_norm": 0.12353515625, "learning_rate": 5.3291980047234754e-05, "loss": 0.0016, "step": 14027 }, { "epoch": 6.547491248541424, "grad_norm": 0.1552734375, "learning_rate": 5.3279006037238075e-05, "loss": 0.0008, "step": 14028 }, { "epoch": 6.547957992998834, "grad_norm": 0.19140625, "learning_rate": 5.326603303319621e-05, "loss": 0.0034, "step": 14029 }, { "epoch": 6.5484247374562425, "grad_norm": 0.10546875, "learning_rate": 5.3253061035388495e-05, "loss": 0.0028, "step": 14030 }, { "epoch": 6.548891481913652, "grad_norm": 0.22265625, "learning_rate": 5.324009004409427e-05, "loss": 0.001, "step": 14031 }, { "epoch": 6.549358226371062, "grad_norm": 0.1396484375, "learning_rate": 5.322712005959276e-05, "loss": 0.0006, "step": 14032 }, { "epoch": 6.549824970828471, "grad_norm": 0.24609375, "learning_rate": 5.3214151082163255e-05, "loss": 0.001, "step": 14033 }, { "epoch": 6.550291715285881, "grad_norm": 0.036865234375, "learning_rate": 5.320118311208502e-05, "loss": 0.0025, "step": 14034 }, { "epoch": 6.550758459743291, "grad_norm": 0.048095703125, "learning_rate": 5.3188216149637204e-05, "loss": 0.0004, "step": 14035 }, { "epoch": 6.5512252042007, "grad_norm": 0.169921875, "learning_rate": 5.3175250195099025e-05, "loss": 0.0007, "step": 14036 }, { "epoch": 6.551691948658109, "grad_norm": 0.018310546875, "learning_rate": 5.316228524874974e-05, "loss": 0.0003, "step": 14037 }, { "epoch": 6.552158693115519, "grad_norm": 0.033447265625, "learning_rate": 5.314932131086835e-05, "loss": 0.0004, "step": 14038 }, { "epoch": 6.552625437572929, "grad_norm": 0.306640625, "learning_rate": 5.313635838173405e-05, "loss": 0.0038, "step": 14039 }, { "epoch": 6.553092182030339, "grad_norm": 0.08203125, "learning_rate": 5.3123396461625986e-05, "loss": 0.001, "step": 14040 }, { "epoch": 6.553558926487748, "grad_norm": 0.091796875, "learning_rate": 5.3110435550823176e-05, "loss": 0.0033, "step": 14041 }, { "epoch": 6.554025670945157, "grad_norm": 0.0260009765625, "learning_rate": 5.30974756496047e-05, "loss": 0.0003, "step": 14042 }, { "epoch": 6.554492415402567, "grad_norm": 0.134765625, "learning_rate": 5.308451675824966e-05, "loss": 0.0026, "step": 14043 }, { "epoch": 6.554959159859977, "grad_norm": 0.259765625, "learning_rate": 5.307155887703699e-05, "loss": 0.0036, "step": 14044 }, { "epoch": 6.555425904317386, "grad_norm": 0.251953125, "learning_rate": 5.305860200624573e-05, "loss": 0.0008, "step": 14045 }, { "epoch": 6.555892648774796, "grad_norm": 0.166015625, "learning_rate": 5.304564614615488e-05, "loss": 0.0008, "step": 14046 }, { "epoch": 6.5563593932322055, "grad_norm": 0.171875, "learning_rate": 5.3032691297043335e-05, "loss": 0.004, "step": 14047 }, { "epoch": 6.556826137689615, "grad_norm": 0.01513671875, "learning_rate": 5.301973745919004e-05, "loss": 0.0002, "step": 14048 }, { "epoch": 6.557292882147024, "grad_norm": 0.357421875, "learning_rate": 5.300678463287399e-05, "loss": 0.002, "step": 14049 }, { "epoch": 6.557759626604434, "grad_norm": 0.0849609375, "learning_rate": 5.2993832818373935e-05, "loss": 0.0005, "step": 14050 }, { "epoch": 6.558226371061844, "grad_norm": 0.0625, "learning_rate": 5.298088201596884e-05, "loss": 0.0006, "step": 14051 }, { "epoch": 6.558693115519253, "grad_norm": 0.185546875, "learning_rate": 5.296793222593755e-05, "loss": 0.001, "step": 14052 }, { "epoch": 6.5591598599766625, "grad_norm": 0.14453125, "learning_rate": 5.2954983448558824e-05, "loss": 0.0008, "step": 14053 }, { "epoch": 6.559626604434072, "grad_norm": 0.06494140625, "learning_rate": 5.294203568411151e-05, "loss": 0.0032, "step": 14054 }, { "epoch": 6.560093348891482, "grad_norm": 0.2001953125, "learning_rate": 5.2929088932874405e-05, "loss": 0.0011, "step": 14055 }, { "epoch": 6.560560093348892, "grad_norm": 0.0673828125, "learning_rate": 5.2916143195126214e-05, "loss": 0.0006, "step": 14056 }, { "epoch": 6.561026837806301, "grad_norm": 0.28125, "learning_rate": 5.290319847114573e-05, "loss": 0.0019, "step": 14057 }, { "epoch": 6.561493582263711, "grad_norm": 0.1337890625, "learning_rate": 5.289025476121161e-05, "loss": 0.0009, "step": 14058 }, { "epoch": 6.56196032672112, "grad_norm": 0.08740234375, "learning_rate": 5.287731206560257e-05, "loss": 0.003, "step": 14059 }, { "epoch": 6.562427071178529, "grad_norm": 0.1455078125, "learning_rate": 5.286437038459733e-05, "loss": 0.0006, "step": 14060 }, { "epoch": 6.562893815635939, "grad_norm": 0.13671875, "learning_rate": 5.285142971847445e-05, "loss": 0.0008, "step": 14061 }, { "epoch": 6.563360560093349, "grad_norm": 0.408203125, "learning_rate": 5.283849006751258e-05, "loss": 0.0017, "step": 14062 }, { "epoch": 6.563827304550759, "grad_norm": 0.08447265625, "learning_rate": 5.282555143199042e-05, "loss": 0.0005, "step": 14063 }, { "epoch": 6.5642940490081685, "grad_norm": 0.0810546875, "learning_rate": 5.2812613812186404e-05, "loss": 0.0006, "step": 14064 }, { "epoch": 6.564760793465577, "grad_norm": 0.0311279296875, "learning_rate": 5.27996772083792e-05, "loss": 0.0004, "step": 14065 }, { "epoch": 6.565227537922987, "grad_norm": 0.0634765625, "learning_rate": 5.278674162084733e-05, "loss": 0.0005, "step": 14066 }, { "epoch": 6.565694282380397, "grad_norm": 0.1708984375, "learning_rate": 5.2773807049869273e-05, "loss": 0.0029, "step": 14067 }, { "epoch": 6.566161026837806, "grad_norm": 0.06982421875, "learning_rate": 5.2760873495723536e-05, "loss": 0.0053, "step": 14068 }, { "epoch": 6.566627771295216, "grad_norm": 0.28125, "learning_rate": 5.274794095868866e-05, "loss": 0.002, "step": 14069 }, { "epoch": 6.5670945157526255, "grad_norm": 0.10400390625, "learning_rate": 5.2735009439042995e-05, "loss": 0.0007, "step": 14070 }, { "epoch": 6.567561260210035, "grad_norm": 0.0751953125, "learning_rate": 5.272207893706501e-05, "loss": 0.0006, "step": 14071 }, { "epoch": 6.568028004667444, "grad_norm": 0.2314453125, "learning_rate": 5.270914945303317e-05, "loss": 0.0008, "step": 14072 }, { "epoch": 6.568494749124854, "grad_norm": 0.8125, "learning_rate": 5.269622098722578e-05, "loss": 0.0086, "step": 14073 }, { "epoch": 6.568961493582264, "grad_norm": 0.140625, "learning_rate": 5.268329353992122e-05, "loss": 0.0049, "step": 14074 }, { "epoch": 6.569428238039674, "grad_norm": 0.1435546875, "learning_rate": 5.2670367111397903e-05, "loss": 0.0027, "step": 14075 }, { "epoch": 6.5698949824970825, "grad_norm": 0.058349609375, "learning_rate": 5.265744170193404e-05, "loss": 0.0004, "step": 14076 }, { "epoch": 6.570361726954492, "grad_norm": 0.09130859375, "learning_rate": 5.264451731180804e-05, "loss": 0.0006, "step": 14077 }, { "epoch": 6.570828471411902, "grad_norm": 0.037841796875, "learning_rate": 5.263159394129807e-05, "loss": 0.0004, "step": 14078 }, { "epoch": 6.571295215869312, "grad_norm": 0.078125, "learning_rate": 5.2618671590682457e-05, "loss": 0.0004, "step": 14079 }, { "epoch": 6.571761960326721, "grad_norm": 0.35546875, "learning_rate": 5.260575026023944e-05, "loss": 0.0012, "step": 14080 }, { "epoch": 6.572228704784131, "grad_norm": 0.11474609375, "learning_rate": 5.2592829950247166e-05, "loss": 0.0006, "step": 14081 }, { "epoch": 6.57269544924154, "grad_norm": 0.0830078125, "learning_rate": 5.257991066098387e-05, "loss": 0.0006, "step": 14082 }, { "epoch": 6.57316219369895, "grad_norm": 0.0595703125, "learning_rate": 5.2566992392727746e-05, "loss": 0.0003, "step": 14083 }, { "epoch": 6.573628938156359, "grad_norm": 0.375, "learning_rate": 5.255407514575685e-05, "loss": 0.0035, "step": 14084 }, { "epoch": 6.574095682613769, "grad_norm": 0.14453125, "learning_rate": 5.254115892034942e-05, "loss": 0.0027, "step": 14085 }, { "epoch": 6.574562427071179, "grad_norm": 0.0230712890625, "learning_rate": 5.2528243716783445e-05, "loss": 0.0003, "step": 14086 }, { "epoch": 6.5750291715285885, "grad_norm": 0.2216796875, "learning_rate": 5.2515329535337096e-05, "loss": 0.0034, "step": 14087 }, { "epoch": 6.575495915985997, "grad_norm": 0.166015625, "learning_rate": 5.250241637628835e-05, "loss": 0.001, "step": 14088 }, { "epoch": 6.575962660443407, "grad_norm": 0.0712890625, "learning_rate": 5.2489504239915323e-05, "loss": 0.0006, "step": 14089 }, { "epoch": 6.576429404900817, "grad_norm": 0.181640625, "learning_rate": 5.2476593126495933e-05, "loss": 0.0009, "step": 14090 }, { "epoch": 6.576896149358227, "grad_norm": 0.041748046875, "learning_rate": 5.246368303630822e-05, "loss": 0.0003, "step": 14091 }, { "epoch": 6.577362893815636, "grad_norm": 0.04541015625, "learning_rate": 5.245077396963021e-05, "loss": 0.0004, "step": 14092 }, { "epoch": 6.5778296382730455, "grad_norm": 0.10693359375, "learning_rate": 5.2437865926739736e-05, "loss": 0.0006, "step": 14093 }, { "epoch": 6.578296382730455, "grad_norm": 0.048095703125, "learning_rate": 5.242495890791478e-05, "loss": 0.0004, "step": 14094 }, { "epoch": 6.578763127187864, "grad_norm": 0.07666015625, "learning_rate": 5.24120529134333e-05, "loss": 0.0057, "step": 14095 }, { "epoch": 6.579229871645274, "grad_norm": 0.052490234375, "learning_rate": 5.239914794357306e-05, "loss": 0.0003, "step": 14096 }, { "epoch": 6.579696616102684, "grad_norm": 0.146484375, "learning_rate": 5.238624399861198e-05, "loss": 0.0006, "step": 14097 }, { "epoch": 6.5801633605600935, "grad_norm": 0.251953125, "learning_rate": 5.237334107882795e-05, "loss": 0.0015, "step": 14098 }, { "epoch": 6.580630105017503, "grad_norm": 0.150390625, "learning_rate": 5.236043918449869e-05, "loss": 0.0005, "step": 14099 }, { "epoch": 6.581096849474912, "grad_norm": 0.107421875, "learning_rate": 5.2347538315902026e-05, "loss": 0.0006, "step": 14100 }, { "epoch": 6.581563593932322, "grad_norm": 0.21484375, "learning_rate": 5.2334638473315766e-05, "loss": 0.002, "step": 14101 }, { "epoch": 6.582030338389732, "grad_norm": 0.080078125, "learning_rate": 5.232173965701759e-05, "loss": 0.0036, "step": 14102 }, { "epoch": 6.582497082847141, "grad_norm": 0.251953125, "learning_rate": 5.230884186728526e-05, "loss": 0.0009, "step": 14103 }, { "epoch": 6.5829638273045505, "grad_norm": 0.12890625, "learning_rate": 5.2295945104396524e-05, "loss": 0.0007, "step": 14104 }, { "epoch": 6.58343057176196, "grad_norm": 0.11572265625, "learning_rate": 5.2283049368628964e-05, "loss": 0.0006, "step": 14105 }, { "epoch": 6.58389731621937, "grad_norm": 0.2734375, "learning_rate": 5.227015466026035e-05, "loss": 0.0011, "step": 14106 }, { "epoch": 6.58436406067678, "grad_norm": 0.18359375, "learning_rate": 5.225726097956822e-05, "loss": 0.0009, "step": 14107 }, { "epoch": 6.584830805134189, "grad_norm": 0.376953125, "learning_rate": 5.224436832683022e-05, "loss": 0.0019, "step": 14108 }, { "epoch": 6.585297549591599, "grad_norm": 0.09619140625, "learning_rate": 5.223147670232401e-05, "loss": 0.0005, "step": 14109 }, { "epoch": 6.585764294049008, "grad_norm": 0.12060546875, "learning_rate": 5.221858610632706e-05, "loss": 0.0006, "step": 14110 }, { "epoch": 6.586231038506417, "grad_norm": 0.07568359375, "learning_rate": 5.220569653911698e-05, "loss": 0.0005, "step": 14111 }, { "epoch": 6.586697782963827, "grad_norm": 0.099609375, "learning_rate": 5.2192808000971304e-05, "loss": 0.0004, "step": 14112 }, { "epoch": 6.587164527421237, "grad_norm": 0.1806640625, "learning_rate": 5.2179920492167464e-05, "loss": 0.0007, "step": 14113 }, { "epoch": 6.587631271878647, "grad_norm": 0.19140625, "learning_rate": 5.216703401298302e-05, "loss": 0.0011, "step": 14114 }, { "epoch": 6.588098016336056, "grad_norm": 0.248046875, "learning_rate": 5.2154148563695426e-05, "loss": 0.0014, "step": 14115 }, { "epoch": 6.588564760793465, "grad_norm": 0.1806640625, "learning_rate": 5.214126414458207e-05, "loss": 0.0009, "step": 14116 }, { "epoch": 6.589031505250875, "grad_norm": 0.0186767578125, "learning_rate": 5.2128380755920404e-05, "loss": 0.0003, "step": 14117 }, { "epoch": 6.589498249708285, "grad_norm": 0.138671875, "learning_rate": 5.2115498397987836e-05, "loss": 0.0006, "step": 14118 }, { "epoch": 6.589964994165694, "grad_norm": 0.1474609375, "learning_rate": 5.21026170710617e-05, "loss": 0.0007, "step": 14119 }, { "epoch": 6.590431738623104, "grad_norm": 0.205078125, "learning_rate": 5.208973677541934e-05, "loss": 0.001, "step": 14120 }, { "epoch": 6.5908984830805135, "grad_norm": 0.109375, "learning_rate": 5.207685751133815e-05, "loss": 0.0004, "step": 14121 }, { "epoch": 6.591365227537923, "grad_norm": 0.08203125, "learning_rate": 5.2063979279095355e-05, "loss": 0.0006, "step": 14122 }, { "epoch": 6.591831971995332, "grad_norm": 0.283203125, "learning_rate": 5.205110207896827e-05, "loss": 0.002, "step": 14123 }, { "epoch": 6.592298716452742, "grad_norm": 0.08740234375, "learning_rate": 5.20382259112342e-05, "loss": 0.0005, "step": 14124 }, { "epoch": 6.592765460910152, "grad_norm": 0.1962890625, "learning_rate": 5.20253507761703e-05, "loss": 0.001, "step": 14125 }, { "epoch": 6.593232205367562, "grad_norm": 0.037353515625, "learning_rate": 5.2012476674053856e-05, "loss": 0.0004, "step": 14126 }, { "epoch": 6.5936989498249705, "grad_norm": 0.255859375, "learning_rate": 5.199960360516201e-05, "loss": 0.0011, "step": 14127 }, { "epoch": 6.59416569428238, "grad_norm": 0.06787109375, "learning_rate": 5.198673156977194e-05, "loss": 0.0004, "step": 14128 }, { "epoch": 6.59463243873979, "grad_norm": 0.02880859375, "learning_rate": 5.1973860568160845e-05, "loss": 0.0019, "step": 14129 }, { "epoch": 6.5950991831972, "grad_norm": 0.1875, "learning_rate": 5.19609906006058e-05, "loss": 0.001, "step": 14130 }, { "epoch": 6.595565927654609, "grad_norm": 0.515625, "learning_rate": 5.194812166738392e-05, "loss": 0.0022, "step": 14131 }, { "epoch": 6.596032672112019, "grad_norm": 0.0478515625, "learning_rate": 5.193525376877233e-05, "loss": 0.0004, "step": 14132 }, { "epoch": 6.596499416569428, "grad_norm": 0.064453125, "learning_rate": 5.192238690504805e-05, "loss": 0.0023, "step": 14133 }, { "epoch": 6.596966161026838, "grad_norm": 0.03173828125, "learning_rate": 5.19095210764881e-05, "loss": 0.0003, "step": 14134 }, { "epoch": 6.597432905484247, "grad_norm": 0.1572265625, "learning_rate": 5.1896656283369506e-05, "loss": 0.0004, "step": 14135 }, { "epoch": 6.597899649941657, "grad_norm": 0.103515625, "learning_rate": 5.188379252596931e-05, "loss": 0.0041, "step": 14136 }, { "epoch": 6.598366394399067, "grad_norm": 0.0712890625, "learning_rate": 5.187092980456441e-05, "loss": 0.0005, "step": 14137 }, { "epoch": 6.598833138856476, "grad_norm": 0.0703125, "learning_rate": 5.185806811943184e-05, "loss": 0.0042, "step": 14138 }, { "epoch": 6.599299883313885, "grad_norm": 0.0966796875, "learning_rate": 5.184520747084842e-05, "loss": 0.0004, "step": 14139 }, { "epoch": 6.599766627771295, "grad_norm": 0.1748046875, "learning_rate": 5.1832347859091136e-05, "loss": 0.007, "step": 14140 }, { "epoch": 6.600233372228705, "grad_norm": 0.255859375, "learning_rate": 5.181948928443687e-05, "loss": 0.0013, "step": 14141 }, { "epoch": 6.600700116686115, "grad_norm": 0.193359375, "learning_rate": 5.180663174716243e-05, "loss": 0.0033, "step": 14142 }, { "epoch": 6.601166861143524, "grad_norm": 0.255859375, "learning_rate": 5.1793775247544664e-05, "loss": 0.0034, "step": 14143 }, { "epoch": 6.6016336056009335, "grad_norm": 0.30078125, "learning_rate": 5.178091978586045e-05, "loss": 0.0017, "step": 14144 }, { "epoch": 6.602100350058343, "grad_norm": 0.057373046875, "learning_rate": 5.17680653623865e-05, "loss": 0.0021, "step": 14145 }, { "epoch": 6.602567094515752, "grad_norm": 0.06103515625, "learning_rate": 5.17552119773996e-05, "loss": 0.0004, "step": 14146 }, { "epoch": 6.603033838973162, "grad_norm": 0.011474609375, "learning_rate": 5.1742359631176564e-05, "loss": 0.0002, "step": 14147 }, { "epoch": 6.603500583430572, "grad_norm": 0.185546875, "learning_rate": 5.172950832399403e-05, "loss": 0.0006, "step": 14148 }, { "epoch": 6.603967327887982, "grad_norm": 0.2578125, "learning_rate": 5.171665805612873e-05, "loss": 0.0014, "step": 14149 }, { "epoch": 6.604434072345391, "grad_norm": 0.09375, "learning_rate": 5.17038088278574e-05, "loss": 0.0048, "step": 14150 }, { "epoch": 6.6049008168028, "grad_norm": 0.08056640625, "learning_rate": 5.169096063945662e-05, "loss": 0.0053, "step": 14151 }, { "epoch": 6.60536756126021, "grad_norm": 0.21484375, "learning_rate": 5.167811349120304e-05, "loss": 0.0021, "step": 14152 }, { "epoch": 6.60583430571762, "grad_norm": 0.07958984375, "learning_rate": 5.166526738337334e-05, "loss": 0.003, "step": 14153 }, { "epoch": 6.606301050175029, "grad_norm": 0.1103515625, "learning_rate": 5.165242231624402e-05, "loss": 0.0007, "step": 14154 }, { "epoch": 6.606767794632439, "grad_norm": 0.2060546875, "learning_rate": 5.163957829009174e-05, "loss": 0.0006, "step": 14155 }, { "epoch": 6.607234539089848, "grad_norm": 0.050537109375, "learning_rate": 5.1626735305192944e-05, "loss": 0.0004, "step": 14156 }, { "epoch": 6.607701283547258, "grad_norm": 0.123046875, "learning_rate": 5.161389336182421e-05, "loss": 0.0006, "step": 14157 }, { "epoch": 6.608168028004667, "grad_norm": 0.1884765625, "learning_rate": 5.1601052460262076e-05, "loss": 0.0016, "step": 14158 }, { "epoch": 6.608634772462077, "grad_norm": 0.036865234375, "learning_rate": 5.158821260078295e-05, "loss": 0.0004, "step": 14159 }, { "epoch": 6.609101516919487, "grad_norm": 0.047119140625, "learning_rate": 5.15753737836633e-05, "loss": 0.0003, "step": 14160 }, { "epoch": 6.6095682613768965, "grad_norm": 0.07275390625, "learning_rate": 5.156253600917964e-05, "loss": 0.0006, "step": 14161 }, { "epoch": 6.610035005834305, "grad_norm": 0.1455078125, "learning_rate": 5.154969927760829e-05, "loss": 0.0005, "step": 14162 }, { "epoch": 6.610501750291715, "grad_norm": 0.045166015625, "learning_rate": 5.1536863589225646e-05, "loss": 0.0028, "step": 14163 }, { "epoch": 6.610968494749125, "grad_norm": 0.0235595703125, "learning_rate": 5.1524028944308156e-05, "loss": 0.0003, "step": 14164 }, { "epoch": 6.611435239206535, "grad_norm": 0.03076171875, "learning_rate": 5.1511195343132066e-05, "loss": 0.0004, "step": 14165 }, { "epoch": 6.611901983663944, "grad_norm": 0.041015625, "learning_rate": 5.149836278597373e-05, "loss": 0.0004, "step": 14166 }, { "epoch": 6.6123687281213535, "grad_norm": 0.10205078125, "learning_rate": 5.148553127310951e-05, "loss": 0.0007, "step": 14167 }, { "epoch": 6.612835472578763, "grad_norm": 0.115234375, "learning_rate": 5.147270080481559e-05, "loss": 0.0009, "step": 14168 }, { "epoch": 6.613302217036173, "grad_norm": 0.026123046875, "learning_rate": 5.145987138136825e-05, "loss": 0.0002, "step": 14169 }, { "epoch": 6.613768961493582, "grad_norm": 0.0279541015625, "learning_rate": 5.1447043003043784e-05, "loss": 0.0003, "step": 14170 }, { "epoch": 6.614235705950992, "grad_norm": 0.103515625, "learning_rate": 5.1434215670118316e-05, "loss": 0.0006, "step": 14171 }, { "epoch": 6.6147024504084015, "grad_norm": 0.0703125, "learning_rate": 5.142138938286806e-05, "loss": 0.0004, "step": 14172 }, { "epoch": 6.615169194865811, "grad_norm": 0.0732421875, "learning_rate": 5.140856414156923e-05, "loss": 0.0003, "step": 14173 }, { "epoch": 6.61563593932322, "grad_norm": 0.1787109375, "learning_rate": 5.139573994649789e-05, "loss": 0.0008, "step": 14174 }, { "epoch": 6.61610268378063, "grad_norm": 0.2255859375, "learning_rate": 5.138291679793023e-05, "loss": 0.0019, "step": 14175 }, { "epoch": 6.61656942823804, "grad_norm": 0.490234375, "learning_rate": 5.137009469614229e-05, "loss": 0.0027, "step": 14176 }, { "epoch": 6.61703617269545, "grad_norm": 0.232421875, "learning_rate": 5.1357273641410166e-05, "loss": 0.0013, "step": 14177 }, { "epoch": 6.6175029171528585, "grad_norm": 0.049560546875, "learning_rate": 5.1344453634009935e-05, "loss": 0.0019, "step": 14178 }, { "epoch": 6.617969661610268, "grad_norm": 0.019287109375, "learning_rate": 5.133163467421758e-05, "loss": 0.0003, "step": 14179 }, { "epoch": 6.618436406067678, "grad_norm": 0.251953125, "learning_rate": 5.131881676230914e-05, "loss": 0.0038, "step": 14180 }, { "epoch": 6.618903150525087, "grad_norm": 0.06005859375, "learning_rate": 5.1305999898560574e-05, "loss": 0.0005, "step": 14181 }, { "epoch": 6.619369894982497, "grad_norm": 0.0791015625, "learning_rate": 5.1293184083247884e-05, "loss": 0.0005, "step": 14182 }, { "epoch": 6.619836639439907, "grad_norm": 0.2060546875, "learning_rate": 5.128036931664695e-05, "loss": 0.0046, "step": 14183 }, { "epoch": 6.620303383897316, "grad_norm": 0.03857421875, "learning_rate": 5.1267555599033715e-05, "loss": 0.0004, "step": 14184 }, { "epoch": 6.620770128354726, "grad_norm": 0.059814453125, "learning_rate": 5.1254742930684116e-05, "loss": 0.0026, "step": 14185 }, { "epoch": 6.621236872812135, "grad_norm": 0.06689453125, "learning_rate": 5.1241931311873946e-05, "loss": 0.0004, "step": 14186 }, { "epoch": 6.621703617269545, "grad_norm": 0.06640625, "learning_rate": 5.1229120742879135e-05, "loss": 0.0004, "step": 14187 }, { "epoch": 6.622170361726955, "grad_norm": 0.038818359375, "learning_rate": 5.121631122397543e-05, "loss": 0.0003, "step": 14188 }, { "epoch": 6.622637106184364, "grad_norm": 0.12109375, "learning_rate": 5.120350275543866e-05, "loss": 0.0006, "step": 14189 }, { "epoch": 6.623103850641773, "grad_norm": 0.01385498046875, "learning_rate": 5.1190695337544666e-05, "loss": 0.0003, "step": 14190 }, { "epoch": 6.623570595099183, "grad_norm": 0.2412109375, "learning_rate": 5.117788897056912e-05, "loss": 0.0012, "step": 14191 }, { "epoch": 6.624037339556593, "grad_norm": 0.0272216796875, "learning_rate": 5.1165083654787784e-05, "loss": 0.0003, "step": 14192 }, { "epoch": 6.624504084014003, "grad_norm": 0.037109375, "learning_rate": 5.115227939047642e-05, "loss": 0.0003, "step": 14193 }, { "epoch": 6.624970828471412, "grad_norm": 0.1826171875, "learning_rate": 5.113947617791065e-05, "loss": 0.0008, "step": 14194 }, { "epoch": 6.6254375729288215, "grad_norm": 0.111328125, "learning_rate": 5.112667401736616e-05, "loss": 0.0005, "step": 14195 }, { "epoch": 6.625904317386231, "grad_norm": 0.046142578125, "learning_rate": 5.111387290911864e-05, "loss": 0.0003, "step": 14196 }, { "epoch": 6.62637106184364, "grad_norm": 0.373046875, "learning_rate": 5.110107285344365e-05, "loss": 0.003, "step": 14197 }, { "epoch": 6.62683780630105, "grad_norm": 0.068359375, "learning_rate": 5.108827385061681e-05, "loss": 0.0003, "step": 14198 }, { "epoch": 6.62730455075846, "grad_norm": 0.0498046875, "learning_rate": 5.107547590091373e-05, "loss": 0.0004, "step": 14199 }, { "epoch": 6.62777129521587, "grad_norm": 0.09716796875, "learning_rate": 5.1062679004609917e-05, "loss": 0.0022, "step": 14200 }, { "epoch": 6.6282380396732785, "grad_norm": 0.0732421875, "learning_rate": 5.104988316198092e-05, "loss": 0.0046, "step": 14201 }, { "epoch": 6.628704784130688, "grad_norm": 0.09326171875, "learning_rate": 5.103708837330227e-05, "loss": 0.0006, "step": 14202 }, { "epoch": 6.629171528588098, "grad_norm": 0.1650390625, "learning_rate": 5.10242946388494e-05, "loss": 0.0033, "step": 14203 }, { "epoch": 6.629638273045508, "grad_norm": 0.54296875, "learning_rate": 5.101150195889781e-05, "loss": 0.0009, "step": 14204 }, { "epoch": 6.630105017502917, "grad_norm": 0.033447265625, "learning_rate": 5.099871033372299e-05, "loss": 0.0003, "step": 14205 }, { "epoch": 6.630571761960327, "grad_norm": 0.1220703125, "learning_rate": 5.0985919763600244e-05, "loss": 0.0005, "step": 14206 }, { "epoch": 6.631038506417736, "grad_norm": 0.20703125, "learning_rate": 5.0973130248805076e-05, "loss": 0.0093, "step": 14207 }, { "epoch": 6.631505250875146, "grad_norm": 0.3125, "learning_rate": 5.0960341789612766e-05, "loss": 0.0019, "step": 14208 }, { "epoch": 6.631971995332555, "grad_norm": 0.294921875, "learning_rate": 5.09475543862987e-05, "loss": 0.001, "step": 14209 }, { "epoch": 6.632438739789965, "grad_norm": 0.1689453125, "learning_rate": 5.093476803913827e-05, "loss": 0.0006, "step": 14210 }, { "epoch": 6.632905484247375, "grad_norm": 0.1494140625, "learning_rate": 5.092198274840667e-05, "loss": 0.0051, "step": 14211 }, { "epoch": 6.6333722287047845, "grad_norm": 0.244140625, "learning_rate": 5.0909198514379255e-05, "loss": 0.0007, "step": 14212 }, { "epoch": 6.633838973162193, "grad_norm": 0.076171875, "learning_rate": 5.089641533733128e-05, "loss": 0.0005, "step": 14213 }, { "epoch": 6.634305717619603, "grad_norm": 0.058837890625, "learning_rate": 5.088363321753793e-05, "loss": 0.0004, "step": 14214 }, { "epoch": 6.634772462077013, "grad_norm": 0.03662109375, "learning_rate": 5.087085215527445e-05, "loss": 0.0003, "step": 14215 }, { "epoch": 6.635239206534423, "grad_norm": 0.041259765625, "learning_rate": 5.085807215081608e-05, "loss": 0.002, "step": 14216 }, { "epoch": 6.635705950991832, "grad_norm": 0.1318359375, "learning_rate": 5.084529320443791e-05, "loss": 0.0009, "step": 14217 }, { "epoch": 6.6361726954492415, "grad_norm": 0.0849609375, "learning_rate": 5.08325153164151e-05, "loss": 0.0004, "step": 14218 }, { "epoch": 6.636639439906651, "grad_norm": 0.09033203125, "learning_rate": 5.0819738487022824e-05, "loss": 0.0005, "step": 14219 }, { "epoch": 6.637106184364061, "grad_norm": 0.056640625, "learning_rate": 5.080696271653611e-05, "loss": 0.0004, "step": 14220 }, { "epoch": 6.63757292882147, "grad_norm": 0.029052734375, "learning_rate": 5.079418800523008e-05, "loss": 0.0004, "step": 14221 }, { "epoch": 6.63803967327888, "grad_norm": 0.45703125, "learning_rate": 5.078141435337981e-05, "loss": 0.0014, "step": 14222 }, { "epoch": 6.63850641773629, "grad_norm": 0.07177734375, "learning_rate": 5.076864176126026e-05, "loss": 0.0005, "step": 14223 }, { "epoch": 6.6389731621936985, "grad_norm": 0.5, "learning_rate": 5.0755870229146506e-05, "loss": 0.0093, "step": 14224 }, { "epoch": 6.639439906651108, "grad_norm": 0.047607421875, "learning_rate": 5.074309975731346e-05, "loss": 0.0005, "step": 14225 }, { "epoch": 6.639906651108518, "grad_norm": 0.08154296875, "learning_rate": 5.073033034603615e-05, "loss": 0.0004, "step": 14226 }, { "epoch": 6.640373395565928, "grad_norm": 0.154296875, "learning_rate": 5.0717561995589526e-05, "loss": 0.0031, "step": 14227 }, { "epoch": 6.640840140023338, "grad_norm": 0.1455078125, "learning_rate": 5.070479470624845e-05, "loss": 0.0006, "step": 14228 }, { "epoch": 6.641306884480747, "grad_norm": 0.19921875, "learning_rate": 5.0692028478287825e-05, "loss": 0.0017, "step": 14229 }, { "epoch": 6.641773628938156, "grad_norm": 0.166015625, "learning_rate": 5.0679263311982516e-05, "loss": 0.0007, "step": 14230 }, { "epoch": 6.642240373395566, "grad_norm": 0.384765625, "learning_rate": 5.066649920760744e-05, "loss": 0.0035, "step": 14231 }, { "epoch": 6.642707117852975, "grad_norm": 0.058349609375, "learning_rate": 5.0653736165437337e-05, "loss": 0.0004, "step": 14232 }, { "epoch": 6.643173862310385, "grad_norm": 0.2001953125, "learning_rate": 5.0640974185747035e-05, "loss": 0.0008, "step": 14233 }, { "epoch": 6.643640606767795, "grad_norm": 0.060546875, "learning_rate": 5.0628213268811374e-05, "loss": 0.0005, "step": 14234 }, { "epoch": 6.6441073512252045, "grad_norm": 0.13671875, "learning_rate": 5.061545341490501e-05, "loss": 0.0008, "step": 14235 }, { "epoch": 6.644574095682614, "grad_norm": 0.0179443359375, "learning_rate": 5.0602694624302774e-05, "loss": 0.0002, "step": 14236 }, { "epoch": 6.645040840140023, "grad_norm": 0.1865234375, "learning_rate": 5.058993689727929e-05, "loss": 0.0007, "step": 14237 }, { "epoch": 6.645507584597433, "grad_norm": 0.0703125, "learning_rate": 5.057718023410928e-05, "loss": 0.0003, "step": 14238 }, { "epoch": 6.645974329054843, "grad_norm": 0.049560546875, "learning_rate": 5.056442463506745e-05, "loss": 0.0004, "step": 14239 }, { "epoch": 6.646441073512252, "grad_norm": 0.0257568359375, "learning_rate": 5.0551670100428384e-05, "loss": 0.0003, "step": 14240 }, { "epoch": 6.6469078179696615, "grad_norm": 0.061279296875, "learning_rate": 5.0538916630466716e-05, "loss": 0.0044, "step": 14241 }, { "epoch": 6.647374562427071, "grad_norm": 0.376953125, "learning_rate": 5.052616422545708e-05, "loss": 0.0026, "step": 14242 }, { "epoch": 6.647841306884481, "grad_norm": 0.11376953125, "learning_rate": 5.0513412885674e-05, "loss": 0.0035, "step": 14243 }, { "epoch": 6.64830805134189, "grad_norm": 0.30859375, "learning_rate": 5.050066261139203e-05, "loss": 0.0017, "step": 14244 }, { "epoch": 6.6487747957993, "grad_norm": 0.0703125, "learning_rate": 5.048791340288577e-05, "loss": 0.0003, "step": 14245 }, { "epoch": 6.6492415402567095, "grad_norm": 0.04296875, "learning_rate": 5.047516526042962e-05, "loss": 0.0004, "step": 14246 }, { "epoch": 6.649708284714119, "grad_norm": 0.018798828125, "learning_rate": 5.046241818429811e-05, "loss": 0.0003, "step": 14247 }, { "epoch": 6.650175029171528, "grad_norm": 0.0947265625, "learning_rate": 5.044967217476575e-05, "loss": 0.0005, "step": 14248 }, { "epoch": 6.650641773628938, "grad_norm": 0.220703125, "learning_rate": 5.043692723210687e-05, "loss": 0.0033, "step": 14249 }, { "epoch": 6.651108518086348, "grad_norm": 0.022216796875, "learning_rate": 5.0424183356595956e-05, "loss": 0.0003, "step": 14250 }, { "epoch": 6.651575262543758, "grad_norm": 0.058837890625, "learning_rate": 5.0411440548507406e-05, "loss": 0.0004, "step": 14251 }, { "epoch": 6.6520420070011665, "grad_norm": 0.0810546875, "learning_rate": 5.039869880811552e-05, "loss": 0.0005, "step": 14252 }, { "epoch": 6.652508751458576, "grad_norm": 0.1064453125, "learning_rate": 5.038595813569469e-05, "loss": 0.003, "step": 14253 }, { "epoch": 6.652975495915986, "grad_norm": 0.1796875, "learning_rate": 5.037321853151926e-05, "loss": 0.001, "step": 14254 }, { "epoch": 6.653442240373396, "grad_norm": 0.146484375, "learning_rate": 5.036047999586346e-05, "loss": 0.008, "step": 14255 }, { "epoch": 6.653908984830805, "grad_norm": 0.048828125, "learning_rate": 5.0347742529001655e-05, "loss": 0.0004, "step": 14256 }, { "epoch": 6.654375729288215, "grad_norm": 0.034423828125, "learning_rate": 5.033500613120799e-05, "loss": 0.0003, "step": 14257 }, { "epoch": 6.654842473745624, "grad_norm": 0.042236328125, "learning_rate": 5.0322270802756735e-05, "loss": 0.0003, "step": 14258 }, { "epoch": 6.655309218203034, "grad_norm": 0.478515625, "learning_rate": 5.030953654392216e-05, "loss": 0.0039, "step": 14259 }, { "epoch": 6.655775962660443, "grad_norm": 0.1318359375, "learning_rate": 5.029680335497837e-05, "loss": 0.0006, "step": 14260 }, { "epoch": 6.656242707117853, "grad_norm": 0.140625, "learning_rate": 5.028407123619954e-05, "loss": 0.0016, "step": 14261 }, { "epoch": 6.656709451575263, "grad_norm": 0.259765625, "learning_rate": 5.0271340187859836e-05, "loss": 0.0059, "step": 14262 }, { "epoch": 6.6571761960326725, "grad_norm": 0.10400390625, "learning_rate": 5.025861021023335e-05, "loss": 0.0005, "step": 14263 }, { "epoch": 6.657642940490081, "grad_norm": 0.11181640625, "learning_rate": 5.024588130359415e-05, "loss": 0.0004, "step": 14264 }, { "epoch": 6.658109684947491, "grad_norm": 0.03369140625, "learning_rate": 5.023315346821638e-05, "loss": 0.0003, "step": 14265 }, { "epoch": 6.658576429404901, "grad_norm": 0.06298828125, "learning_rate": 5.0220426704373994e-05, "loss": 0.0005, "step": 14266 }, { "epoch": 6.65904317386231, "grad_norm": 0.345703125, "learning_rate": 5.020770101234103e-05, "loss": 0.0011, "step": 14267 }, { "epoch": 6.65950991831972, "grad_norm": 0.337890625, "learning_rate": 5.019497639239157e-05, "loss": 0.005, "step": 14268 }, { "epoch": 6.6599766627771295, "grad_norm": 0.0927734375, "learning_rate": 5.0182252844799495e-05, "loss": 0.0023, "step": 14269 }, { "epoch": 6.660443407234539, "grad_norm": 0.045166015625, "learning_rate": 5.0169530369838766e-05, "loss": 0.0005, "step": 14270 }, { "epoch": 6.660910151691949, "grad_norm": 0.11181640625, "learning_rate": 5.0156808967783385e-05, "loss": 0.0006, "step": 14271 }, { "epoch": 6.661376896149358, "grad_norm": 0.07373046875, "learning_rate": 5.014408863890718e-05, "loss": 0.002, "step": 14272 }, { "epoch": 6.661843640606768, "grad_norm": 0.1064453125, "learning_rate": 5.013136938348409e-05, "loss": 0.003, "step": 14273 }, { "epoch": 6.662310385064178, "grad_norm": 0.412109375, "learning_rate": 5.011865120178791e-05, "loss": 0.0048, "step": 14274 }, { "epoch": 6.6627771295215865, "grad_norm": 0.03173828125, "learning_rate": 5.010593409409252e-05, "loss": 0.0004, "step": 14275 }, { "epoch": 6.663243873978996, "grad_norm": 0.0947265625, "learning_rate": 5.009321806067178e-05, "loss": 0.0058, "step": 14276 }, { "epoch": 6.663710618436406, "grad_norm": 0.12353515625, "learning_rate": 5.008050310179939e-05, "loss": 0.0006, "step": 14277 }, { "epoch": 6.664177362893816, "grad_norm": 0.0673828125, "learning_rate": 5.006778921774915e-05, "loss": 0.0005, "step": 14278 }, { "epoch": 6.664644107351226, "grad_norm": 0.0400390625, "learning_rate": 5.0055076408794786e-05, "loss": 0.0004, "step": 14279 }, { "epoch": 6.665110851808635, "grad_norm": 0.045166015625, "learning_rate": 5.004236467521009e-05, "loss": 0.0003, "step": 14280 }, { "epoch": 6.665577596266044, "grad_norm": 0.275390625, "learning_rate": 5.002965401726868e-05, "loss": 0.0013, "step": 14281 }, { "epoch": 6.666044340723454, "grad_norm": 0.046142578125, "learning_rate": 5.0016944435244254e-05, "loss": 0.0004, "step": 14282 }, { "epoch": 6.666511085180863, "grad_norm": 0.07373046875, "learning_rate": 5.0004235929410514e-05, "loss": 0.0004, "step": 14283 }, { "epoch": 6.666977829638273, "grad_norm": 0.064453125, "learning_rate": 4.999152850004101e-05, "loss": 0.0035, "step": 14284 }, { "epoch": 6.667444574095683, "grad_norm": 0.21875, "learning_rate": 4.997882214740941e-05, "loss": 0.0032, "step": 14285 }, { "epoch": 6.6679113185530925, "grad_norm": 0.040771484375, "learning_rate": 4.996611687178924e-05, "loss": 0.0004, "step": 14286 }, { "epoch": 6.668378063010501, "grad_norm": 0.1982421875, "learning_rate": 4.995341267345408e-05, "loss": 0.001, "step": 14287 }, { "epoch": 6.668844807467911, "grad_norm": 0.038818359375, "learning_rate": 4.994070955267751e-05, "loss": 0.0003, "step": 14288 }, { "epoch": 6.669311551925321, "grad_norm": 0.05419921875, "learning_rate": 4.9928007509732975e-05, "loss": 0.0004, "step": 14289 }, { "epoch": 6.669778296382731, "grad_norm": 0.0888671875, "learning_rate": 4.9915306544893994e-05, "loss": 0.0005, "step": 14290 }, { "epoch": 6.67024504084014, "grad_norm": 0.11669921875, "learning_rate": 4.990260665843406e-05, "loss": 0.0028, "step": 14291 }, { "epoch": 6.6707117852975495, "grad_norm": 0.0260009765625, "learning_rate": 4.988990785062656e-05, "loss": 0.0003, "step": 14292 }, { "epoch": 6.671178529754959, "grad_norm": 0.158203125, "learning_rate": 4.9877210121744945e-05, "loss": 0.0005, "step": 14293 }, { "epoch": 6.671645274212369, "grad_norm": 0.0546875, "learning_rate": 4.9864513472062634e-05, "loss": 0.0005, "step": 14294 }, { "epoch": 6.672112018669778, "grad_norm": 0.018310546875, "learning_rate": 4.985181790185295e-05, "loss": 0.0003, "step": 14295 }, { "epoch": 6.672578763127188, "grad_norm": 0.0947265625, "learning_rate": 4.983912341138926e-05, "loss": 0.0004, "step": 14296 }, { "epoch": 6.673045507584598, "grad_norm": 0.055908203125, "learning_rate": 4.982643000094493e-05, "loss": 0.0004, "step": 14297 }, { "epoch": 6.673512252042007, "grad_norm": 0.04443359375, "learning_rate": 4.98137376707932e-05, "loss": 0.0003, "step": 14298 }, { "epoch": 6.673978996499416, "grad_norm": 0.08056640625, "learning_rate": 4.980104642120737e-05, "loss": 0.0005, "step": 14299 }, { "epoch": 6.674445740956826, "grad_norm": 0.040771484375, "learning_rate": 4.9788356252460764e-05, "loss": 0.0004, "step": 14300 }, { "epoch": 6.674912485414236, "grad_norm": 0.060546875, "learning_rate": 4.9775667164826514e-05, "loss": 0.0004, "step": 14301 }, { "epoch": 6.675379229871645, "grad_norm": 0.07958984375, "learning_rate": 4.976297915857787e-05, "loss": 0.0045, "step": 14302 }, { "epoch": 6.675845974329055, "grad_norm": 0.02685546875, "learning_rate": 4.975029223398806e-05, "loss": 0.0004, "step": 14303 }, { "epoch": 6.676312718786464, "grad_norm": 0.1875, "learning_rate": 4.973760639133018e-05, "loss": 0.0009, "step": 14304 }, { "epoch": 6.676779463243874, "grad_norm": 0.05810546875, "learning_rate": 4.972492163087743e-05, "loss": 0.0004, "step": 14305 }, { "epoch": 6.677246207701284, "grad_norm": 0.0264892578125, "learning_rate": 4.971223795290287e-05, "loss": 0.0004, "step": 14306 }, { "epoch": 6.677712952158693, "grad_norm": 0.1708984375, "learning_rate": 4.9699555357679616e-05, "loss": 0.001, "step": 14307 }, { "epoch": 6.678179696616103, "grad_norm": 0.0169677734375, "learning_rate": 4.968687384548078e-05, "loss": 0.0003, "step": 14308 }, { "epoch": 6.6786464410735125, "grad_norm": 0.052001953125, "learning_rate": 4.967419341657935e-05, "loss": 0.001, "step": 14309 }, { "epoch": 6.679113185530921, "grad_norm": 0.38671875, "learning_rate": 4.966151407124836e-05, "loss": 0.0032, "step": 14310 }, { "epoch": 6.679579929988331, "grad_norm": 0.1708984375, "learning_rate": 4.9648835809760876e-05, "loss": 0.0029, "step": 14311 }, { "epoch": 6.680046674445741, "grad_norm": 0.064453125, "learning_rate": 4.963615863238976e-05, "loss": 0.0004, "step": 14312 }, { "epoch": 6.680513418903151, "grad_norm": 0.0244140625, "learning_rate": 4.962348253940805e-05, "loss": 0.0003, "step": 14313 }, { "epoch": 6.6809801633605606, "grad_norm": 0.029296875, "learning_rate": 4.961080753108869e-05, "loss": 0.0004, "step": 14314 }, { "epoch": 6.6814469078179695, "grad_norm": 0.275390625, "learning_rate": 4.95981336077045e-05, "loss": 0.0011, "step": 14315 }, { "epoch": 6.681913652275379, "grad_norm": 0.013916015625, "learning_rate": 4.958546076952842e-05, "loss": 0.0002, "step": 14316 }, { "epoch": 6.682380396732789, "grad_norm": 0.087890625, "learning_rate": 4.9572789016833354e-05, "loss": 0.0004, "step": 14317 }, { "epoch": 6.682847141190198, "grad_norm": 0.72265625, "learning_rate": 4.956011834989205e-05, "loss": 0.0018, "step": 14318 }, { "epoch": 6.683313885647608, "grad_norm": 0.09326171875, "learning_rate": 4.9547448768977354e-05, "loss": 0.0075, "step": 14319 }, { "epoch": 6.6837806301050176, "grad_norm": 0.06005859375, "learning_rate": 4.95347802743621e-05, "loss": 0.0004, "step": 14320 }, { "epoch": 6.684247374562427, "grad_norm": 0.055419921875, "learning_rate": 4.952211286631899e-05, "loss": 0.0005, "step": 14321 }, { "epoch": 6.684714119019836, "grad_norm": 0.0247802734375, "learning_rate": 4.950944654512083e-05, "loss": 0.0003, "step": 14322 }, { "epoch": 6.685180863477246, "grad_norm": 0.146484375, "learning_rate": 4.9496781311040275e-05, "loss": 0.0006, "step": 14323 }, { "epoch": 6.685647607934656, "grad_norm": 0.5625, "learning_rate": 4.948411716435008e-05, "loss": 0.0027, "step": 14324 }, { "epoch": 6.686114352392066, "grad_norm": 0.169921875, "learning_rate": 4.947145410532286e-05, "loss": 0.0024, "step": 14325 }, { "epoch": 6.6865810968494745, "grad_norm": 0.0732421875, "learning_rate": 4.945879213423132e-05, "loss": 0.0032, "step": 14326 }, { "epoch": 6.687047841306884, "grad_norm": 0.27734375, "learning_rate": 4.944613125134805e-05, "loss": 0.0013, "step": 14327 }, { "epoch": 6.687514585764294, "grad_norm": 0.1181640625, "learning_rate": 4.9433471456945646e-05, "loss": 0.0006, "step": 14328 }, { "epoch": 6.687981330221704, "grad_norm": 0.0361328125, "learning_rate": 4.942081275129674e-05, "loss": 0.0003, "step": 14329 }, { "epoch": 6.688448074679113, "grad_norm": 0.1181640625, "learning_rate": 4.940815513467383e-05, "loss": 0.0005, "step": 14330 }, { "epoch": 6.688914819136523, "grad_norm": 0.017333984375, "learning_rate": 4.9395498607349463e-05, "loss": 0.0003, "step": 14331 }, { "epoch": 6.689381563593932, "grad_norm": 0.020751953125, "learning_rate": 4.9382843169596205e-05, "loss": 0.0003, "step": 14332 }, { "epoch": 6.689848308051342, "grad_norm": 0.09619140625, "learning_rate": 4.9370188821686446e-05, "loss": 0.0005, "step": 14333 }, { "epoch": 6.690315052508751, "grad_norm": 0.1640625, "learning_rate": 4.935753556389273e-05, "loss": 0.0007, "step": 14334 }, { "epoch": 6.690781796966161, "grad_norm": 0.041015625, "learning_rate": 4.934488339648744e-05, "loss": 0.0004, "step": 14335 }, { "epoch": 6.691248541423571, "grad_norm": 0.208984375, "learning_rate": 4.933223231974301e-05, "loss": 0.003, "step": 14336 }, { "epoch": 6.6917152858809805, "grad_norm": 0.16015625, "learning_rate": 4.931958233393187e-05, "loss": 0.0007, "step": 14337 }, { "epoch": 6.692182030338389, "grad_norm": 0.07666015625, "learning_rate": 4.930693343932631e-05, "loss": 0.0005, "step": 14338 }, { "epoch": 6.692648774795799, "grad_norm": 0.053955078125, "learning_rate": 4.929428563619872e-05, "loss": 0.0028, "step": 14339 }, { "epoch": 6.693115519253209, "grad_norm": 0.1220703125, "learning_rate": 4.928163892482146e-05, "loss": 0.0006, "step": 14340 }, { "epoch": 6.693582263710619, "grad_norm": 0.01275634765625, "learning_rate": 4.926899330546675e-05, "loss": 0.0002, "step": 14341 }, { "epoch": 6.694049008168028, "grad_norm": 0.21484375, "learning_rate": 4.925634877840689e-05, "loss": 0.0009, "step": 14342 }, { "epoch": 6.6945157526254375, "grad_norm": 0.146484375, "learning_rate": 4.924370534391419e-05, "loss": 0.0033, "step": 14343 }, { "epoch": 6.694982497082847, "grad_norm": 0.06494140625, "learning_rate": 4.9231063002260794e-05, "loss": 0.0006, "step": 14344 }, { "epoch": 6.695449241540256, "grad_norm": 0.216796875, "learning_rate": 4.921842175371893e-05, "loss": 0.0013, "step": 14345 }, { "epoch": 6.695915985997666, "grad_norm": 0.1328125, "learning_rate": 4.9205781598560844e-05, "loss": 0.0031, "step": 14346 }, { "epoch": 6.696382730455076, "grad_norm": 0.08984375, "learning_rate": 4.919314253705859e-05, "loss": 0.0005, "step": 14347 }, { "epoch": 6.696849474912486, "grad_norm": 0.265625, "learning_rate": 4.9180504569484346e-05, "loss": 0.001, "step": 14348 }, { "epoch": 6.697316219369895, "grad_norm": 0.08935546875, "learning_rate": 4.916786769611026e-05, "loss": 0.0004, "step": 14349 }, { "epoch": 6.697782963827304, "grad_norm": 0.07763671875, "learning_rate": 4.9155231917208344e-05, "loss": 0.0005, "step": 14350 }, { "epoch": 6.698249708284714, "grad_norm": 0.212890625, "learning_rate": 4.9142597233050705e-05, "loss": 0.0007, "step": 14351 }, { "epoch": 6.698716452742124, "grad_norm": 0.2119140625, "learning_rate": 4.91299636439094e-05, "loss": 0.001, "step": 14352 }, { "epoch": 6.699183197199533, "grad_norm": 0.0703125, "learning_rate": 4.91173311500564e-05, "loss": 0.0032, "step": 14353 }, { "epoch": 6.699649941656943, "grad_norm": 0.083984375, "learning_rate": 4.910469975176374e-05, "loss": 0.002, "step": 14354 }, { "epoch": 6.700116686114352, "grad_norm": 0.0208740234375, "learning_rate": 4.9092069449303334e-05, "loss": 0.0003, "step": 14355 }, { "epoch": 6.700583430571762, "grad_norm": 0.08984375, "learning_rate": 4.907944024294715e-05, "loss": 0.0005, "step": 14356 }, { "epoch": 6.701050175029172, "grad_norm": 0.060546875, "learning_rate": 4.906681213296715e-05, "loss": 0.0038, "step": 14357 }, { "epoch": 6.701516919486581, "grad_norm": 0.34765625, "learning_rate": 4.9054185119635165e-05, "loss": 0.003, "step": 14358 }, { "epoch": 6.701983663943991, "grad_norm": 0.21484375, "learning_rate": 4.9041559203223095e-05, "loss": 0.001, "step": 14359 }, { "epoch": 6.7024504084014005, "grad_norm": 0.158203125, "learning_rate": 4.9028934384002834e-05, "loss": 0.0005, "step": 14360 }, { "epoch": 6.702917152858809, "grad_norm": 0.078125, "learning_rate": 4.901631066224613e-05, "loss": 0.0005, "step": 14361 }, { "epoch": 6.703383897316219, "grad_norm": 0.158203125, "learning_rate": 4.900368803822483e-05, "loss": 0.0006, "step": 14362 }, { "epoch": 6.703850641773629, "grad_norm": 0.34765625, "learning_rate": 4.8991066512210736e-05, "loss": 0.0098, "step": 14363 }, { "epoch": 6.704317386231039, "grad_norm": 0.0732421875, "learning_rate": 4.897844608447555e-05, "loss": 0.0009, "step": 14364 }, { "epoch": 6.704784130688448, "grad_norm": 0.3203125, "learning_rate": 4.896582675529102e-05, "loss": 0.0027, "step": 14365 }, { "epoch": 6.7052508751458575, "grad_norm": 0.27734375, "learning_rate": 4.895320852492891e-05, "loss": 0.0053, "step": 14366 }, { "epoch": 6.705717619603267, "grad_norm": 0.0556640625, "learning_rate": 4.8940591393660804e-05, "loss": 0.0004, "step": 14367 }, { "epoch": 6.706184364060677, "grad_norm": 0.06787109375, "learning_rate": 4.8927975361758436e-05, "loss": 0.0022, "step": 14368 }, { "epoch": 6.706651108518086, "grad_norm": 0.2373046875, "learning_rate": 4.891536042949345e-05, "loss": 0.0011, "step": 14369 }, { "epoch": 6.707117852975496, "grad_norm": 0.06396484375, "learning_rate": 4.890274659713739e-05, "loss": 0.0004, "step": 14370 }, { "epoch": 6.707584597432906, "grad_norm": 0.053955078125, "learning_rate": 4.889013386496194e-05, "loss": 0.0004, "step": 14371 }, { "epoch": 6.708051341890315, "grad_norm": 0.08154296875, "learning_rate": 4.8877522233238614e-05, "loss": 0.0006, "step": 14372 }, { "epoch": 6.708518086347724, "grad_norm": 0.12451171875, "learning_rate": 4.8864911702238916e-05, "loss": 0.0006, "step": 14373 }, { "epoch": 6.708984830805134, "grad_norm": 0.1787109375, "learning_rate": 4.885230227223441e-05, "loss": 0.0007, "step": 14374 }, { "epoch": 6.709451575262544, "grad_norm": 0.115234375, "learning_rate": 4.883969394349661e-05, "loss": 0.0029, "step": 14375 }, { "epoch": 6.709918319719954, "grad_norm": 0.046142578125, "learning_rate": 4.882708671629693e-05, "loss": 0.0004, "step": 14376 }, { "epoch": 6.710385064177363, "grad_norm": 0.10888671875, "learning_rate": 4.881448059090685e-05, "loss": 0.0008, "step": 14377 }, { "epoch": 6.710851808634772, "grad_norm": 0.035888671875, "learning_rate": 4.880187556759783e-05, "loss": 0.0027, "step": 14378 }, { "epoch": 6.711318553092182, "grad_norm": 0.267578125, "learning_rate": 4.87892716466412e-05, "loss": 0.0096, "step": 14379 }, { "epoch": 6.711785297549592, "grad_norm": 0.0615234375, "learning_rate": 4.877666882830837e-05, "loss": 0.0004, "step": 14380 }, { "epoch": 6.712252042007001, "grad_norm": 0.0230712890625, "learning_rate": 4.876406711287074e-05, "loss": 0.0004, "step": 14381 }, { "epoch": 6.712718786464411, "grad_norm": 0.052490234375, "learning_rate": 4.875146650059954e-05, "loss": 0.0035, "step": 14382 }, { "epoch": 6.7131855309218205, "grad_norm": 0.09619140625, "learning_rate": 4.873886699176613e-05, "loss": 0.0005, "step": 14383 }, { "epoch": 6.71365227537923, "grad_norm": 0.390625, "learning_rate": 4.872626858664183e-05, "loss": 0.0016, "step": 14384 }, { "epoch": 6.714119019836639, "grad_norm": 0.08056640625, "learning_rate": 4.8713671285497816e-05, "loss": 0.0005, "step": 14385 }, { "epoch": 6.714585764294049, "grad_norm": 0.02978515625, "learning_rate": 4.8701075088605405e-05, "loss": 0.0003, "step": 14386 }, { "epoch": 6.715052508751459, "grad_norm": 0.0196533203125, "learning_rate": 4.8688479996235737e-05, "loss": 0.0003, "step": 14387 }, { "epoch": 6.715519253208868, "grad_norm": 0.06005859375, "learning_rate": 4.867588600866001e-05, "loss": 0.0005, "step": 14388 }, { "epoch": 6.7159859976662775, "grad_norm": 0.01361083984375, "learning_rate": 4.866329312614944e-05, "loss": 0.0002, "step": 14389 }, { "epoch": 6.716452742123687, "grad_norm": 0.267578125, "learning_rate": 4.8650701348975095e-05, "loss": 0.0032, "step": 14390 }, { "epoch": 6.716919486581097, "grad_norm": 0.036376953125, "learning_rate": 4.863811067740812e-05, "loss": 0.0003, "step": 14391 }, { "epoch": 6.717386231038507, "grad_norm": 0.1611328125, "learning_rate": 4.862552111171965e-05, "loss": 0.0005, "step": 14392 }, { "epoch": 6.717852975495916, "grad_norm": 0.1083984375, "learning_rate": 4.861293265218068e-05, "loss": 0.0004, "step": 14393 }, { "epoch": 6.7183197199533256, "grad_norm": 0.033935546875, "learning_rate": 4.860034529906227e-05, "loss": 0.0003, "step": 14394 }, { "epoch": 6.718786464410735, "grad_norm": 0.1259765625, "learning_rate": 4.8587759052635505e-05, "loss": 0.0005, "step": 14395 }, { "epoch": 6.719253208868144, "grad_norm": 0.043212890625, "learning_rate": 4.857517391317128e-05, "loss": 0.0004, "step": 14396 }, { "epoch": 6.719719953325554, "grad_norm": 0.037353515625, "learning_rate": 4.8562589880940636e-05, "loss": 0.0003, "step": 14397 }, { "epoch": 6.720186697782964, "grad_norm": 0.134765625, "learning_rate": 4.855000695621452e-05, "loss": 0.0018, "step": 14398 }, { "epoch": 6.720653442240374, "grad_norm": 0.025390625, "learning_rate": 4.853742513926381e-05, "loss": 0.0003, "step": 14399 }, { "epoch": 6.721120186697783, "grad_norm": 0.333984375, "learning_rate": 4.8524844430359436e-05, "loss": 0.0042, "step": 14400 }, { "epoch": 6.721586931155192, "grad_norm": 0.1728515625, "learning_rate": 4.8512264829772314e-05, "loss": 0.0007, "step": 14401 }, { "epoch": 6.722053675612602, "grad_norm": 0.095703125, "learning_rate": 4.849968633777322e-05, "loss": 0.0006, "step": 14402 }, { "epoch": 6.722520420070012, "grad_norm": 0.0240478515625, "learning_rate": 4.8487108954633054e-05, "loss": 0.0004, "step": 14403 }, { "epoch": 6.722987164527421, "grad_norm": 0.0185546875, "learning_rate": 4.8474532680622556e-05, "loss": 0.0002, "step": 14404 }, { "epoch": 6.723453908984831, "grad_norm": 0.051513671875, "learning_rate": 4.8461957516012526e-05, "loss": 0.0022, "step": 14405 }, { "epoch": 6.72392065344224, "grad_norm": 0.072265625, "learning_rate": 4.844938346107377e-05, "loss": 0.0012, "step": 14406 }, { "epoch": 6.72438739789965, "grad_norm": 0.0869140625, "learning_rate": 4.843681051607696e-05, "loss": 0.0004, "step": 14407 }, { "epoch": 6.724854142357059, "grad_norm": 0.11376953125, "learning_rate": 4.8424238681292825e-05, "loss": 0.001, "step": 14408 }, { "epoch": 6.725320886814469, "grad_norm": 0.431640625, "learning_rate": 4.8411667956992094e-05, "loss": 0.0038, "step": 14409 }, { "epoch": 6.725787631271879, "grad_norm": 0.0927734375, "learning_rate": 4.839909834344535e-05, "loss": 0.0005, "step": 14410 }, { "epoch": 6.7262543757292885, "grad_norm": 0.05615234375, "learning_rate": 4.8386529840923266e-05, "loss": 0.0005, "step": 14411 }, { "epoch": 6.726721120186697, "grad_norm": 0.041259765625, "learning_rate": 4.83739624496965e-05, "loss": 0.0024, "step": 14412 }, { "epoch": 6.727187864644107, "grad_norm": 0.03369140625, "learning_rate": 4.8361396170035566e-05, "loss": 0.0003, "step": 14413 }, { "epoch": 6.727654609101517, "grad_norm": 0.0135498046875, "learning_rate": 4.834883100221107e-05, "loss": 0.0002, "step": 14414 }, { "epoch": 6.728121353558927, "grad_norm": 0.283203125, "learning_rate": 4.833626694649359e-05, "loss": 0.0033, "step": 14415 }, { "epoch": 6.728588098016336, "grad_norm": 0.037109375, "learning_rate": 4.8323704003153555e-05, "loss": 0.0004, "step": 14416 }, { "epoch": 6.7290548424737455, "grad_norm": 0.052978515625, "learning_rate": 4.831114217246152e-05, "loss": 0.0004, "step": 14417 }, { "epoch": 6.729521586931155, "grad_norm": 0.0281982421875, "learning_rate": 4.8298581454687984e-05, "loss": 0.0003, "step": 14418 }, { "epoch": 6.729988331388565, "grad_norm": 0.11669921875, "learning_rate": 4.828602185010335e-05, "loss": 0.0006, "step": 14419 }, { "epoch": 6.730455075845974, "grad_norm": 0.142578125, "learning_rate": 4.8273463358978e-05, "loss": 0.0005, "step": 14420 }, { "epoch": 6.730921820303384, "grad_norm": 0.03564453125, "learning_rate": 4.8260905981582414e-05, "loss": 0.0003, "step": 14421 }, { "epoch": 6.731388564760794, "grad_norm": 0.392578125, "learning_rate": 4.8248349718186894e-05, "loss": 0.003, "step": 14422 }, { "epoch": 6.731855309218203, "grad_norm": 0.1865234375, "learning_rate": 4.823579456906182e-05, "loss": 0.0007, "step": 14423 }, { "epoch": 6.732322053675612, "grad_norm": 0.054443359375, "learning_rate": 4.8223240534477565e-05, "loss": 0.0003, "step": 14424 }, { "epoch": 6.732788798133022, "grad_norm": 0.026123046875, "learning_rate": 4.821068761470434e-05, "loss": 0.0003, "step": 14425 }, { "epoch": 6.733255542590432, "grad_norm": 0.042236328125, "learning_rate": 4.819813581001248e-05, "loss": 0.0003, "step": 14426 }, { "epoch": 6.733722287047842, "grad_norm": 0.052490234375, "learning_rate": 4.818558512067227e-05, "loss": 0.0027, "step": 14427 }, { "epoch": 6.734189031505251, "grad_norm": 0.203125, "learning_rate": 4.817303554695386e-05, "loss": 0.0007, "step": 14428 }, { "epoch": 6.73465577596266, "grad_norm": 0.1103515625, "learning_rate": 4.81604870891275e-05, "loss": 0.0024, "step": 14429 }, { "epoch": 6.73512252042007, "grad_norm": 0.060791015625, "learning_rate": 4.814793974746341e-05, "loss": 0.0003, "step": 14430 }, { "epoch": 6.735589264877479, "grad_norm": 0.3203125, "learning_rate": 4.813539352223167e-05, "loss": 0.0013, "step": 14431 }, { "epoch": 6.736056009334889, "grad_norm": 0.23046875, "learning_rate": 4.812284841370245e-05, "loss": 0.001, "step": 14432 }, { "epoch": 6.736522753792299, "grad_norm": 0.0380859375, "learning_rate": 4.81103044221459e-05, "loss": 0.0022, "step": 14433 }, { "epoch": 6.7369894982497085, "grad_norm": 0.057861328125, "learning_rate": 4.809776154783203e-05, "loss": 0.0004, "step": 14434 }, { "epoch": 6.737456242707118, "grad_norm": 0.051025390625, "learning_rate": 4.8085219791030975e-05, "loss": 0.0023, "step": 14435 }, { "epoch": 6.737922987164527, "grad_norm": 0.020263671875, "learning_rate": 4.8072679152012714e-05, "loss": 0.0002, "step": 14436 }, { "epoch": 6.738389731621937, "grad_norm": 0.06982421875, "learning_rate": 4.8060139631047275e-05, "loss": 0.0005, "step": 14437 }, { "epoch": 6.738856476079347, "grad_norm": 0.421875, "learning_rate": 4.804760122840469e-05, "loss": 0.0048, "step": 14438 }, { "epoch": 6.739323220536756, "grad_norm": 0.0302734375, "learning_rate": 4.803506394435486e-05, "loss": 0.0003, "step": 14439 }, { "epoch": 6.7397899649941655, "grad_norm": 0.1044921875, "learning_rate": 4.802252777916777e-05, "loss": 0.0031, "step": 14440 }, { "epoch": 6.740256709451575, "grad_norm": 0.03125, "learning_rate": 4.800999273311335e-05, "loss": 0.0003, "step": 14441 }, { "epoch": 6.740723453908985, "grad_norm": 0.049072265625, "learning_rate": 4.7997458806461446e-05, "loss": 0.0004, "step": 14442 }, { "epoch": 6.741190198366395, "grad_norm": 0.19921875, "learning_rate": 4.798492599948195e-05, "loss": 0.0009, "step": 14443 }, { "epoch": 6.741656942823804, "grad_norm": 0.2158203125, "learning_rate": 4.797239431244474e-05, "loss": 0.0011, "step": 14444 }, { "epoch": 6.742123687281214, "grad_norm": 0.0888671875, "learning_rate": 4.795986374561956e-05, "loss": 0.0008, "step": 14445 }, { "epoch": 6.742590431738623, "grad_norm": 0.0279541015625, "learning_rate": 4.794733429927627e-05, "loss": 0.0003, "step": 14446 }, { "epoch": 6.743057176196032, "grad_norm": 0.0634765625, "learning_rate": 4.793480597368465e-05, "loss": 0.0003, "step": 14447 }, { "epoch": 6.743523920653442, "grad_norm": 0.0260009765625, "learning_rate": 4.792227876911439e-05, "loss": 0.0003, "step": 14448 }, { "epoch": 6.743990665110852, "grad_norm": 0.1279296875, "learning_rate": 4.7909752685835254e-05, "loss": 0.0007, "step": 14449 }, { "epoch": 6.744457409568262, "grad_norm": 0.07470703125, "learning_rate": 4.789722772411698e-05, "loss": 0.0004, "step": 14450 }, { "epoch": 6.744924154025671, "grad_norm": 0.046875, "learning_rate": 4.788470388422915e-05, "loss": 0.0028, "step": 14451 }, { "epoch": 6.74539089848308, "grad_norm": 0.330078125, "learning_rate": 4.787218116644152e-05, "loss": 0.0019, "step": 14452 }, { "epoch": 6.74585764294049, "grad_norm": 0.26953125, "learning_rate": 4.7859659571023615e-05, "loss": 0.0019, "step": 14453 }, { "epoch": 6.7463243873979, "grad_norm": 0.5, "learning_rate": 4.784713909824511e-05, "loss": 0.0015, "step": 14454 }, { "epoch": 6.746791131855309, "grad_norm": 0.134765625, "learning_rate": 4.783461974837559e-05, "loss": 0.0029, "step": 14455 }, { "epoch": 6.747257876312719, "grad_norm": 0.2470703125, "learning_rate": 4.782210152168455e-05, "loss": 0.0011, "step": 14456 }, { "epoch": 6.7477246207701285, "grad_norm": 0.036865234375, "learning_rate": 4.780958441844155e-05, "loss": 0.0004, "step": 14457 }, { "epoch": 6.748191365227538, "grad_norm": 0.05908203125, "learning_rate": 4.7797068438916145e-05, "loss": 0.0019, "step": 14458 }, { "epoch": 6.748658109684947, "grad_norm": 0.018310546875, "learning_rate": 4.778455358337774e-05, "loss": 0.0003, "step": 14459 }, { "epoch": 6.749124854142357, "grad_norm": 0.04052734375, "learning_rate": 4.777203985209583e-05, "loss": 0.0004, "step": 14460 }, { "epoch": 6.749591598599767, "grad_norm": 0.265625, "learning_rate": 4.7759527245339884e-05, "loss": 0.0019, "step": 14461 }, { "epoch": 6.750058343057177, "grad_norm": 0.15234375, "learning_rate": 4.7747015763379245e-05, "loss": 0.0123, "step": 14462 }, { "epoch": 6.7505250875145855, "grad_norm": 0.04248046875, "learning_rate": 4.7734505406483334e-05, "loss": 0.0004, "step": 14463 }, { "epoch": 6.750991831971995, "grad_norm": 0.0301513671875, "learning_rate": 4.7721996174921544e-05, "loss": 0.0003, "step": 14464 }, { "epoch": 6.751458576429405, "grad_norm": 0.34375, "learning_rate": 4.770948806896315e-05, "loss": 0.0025, "step": 14465 }, { "epoch": 6.751925320886815, "grad_norm": 0.2177734375, "learning_rate": 4.769698108887749e-05, "loss": 0.0009, "step": 14466 }, { "epoch": 6.752392065344224, "grad_norm": 0.26953125, "learning_rate": 4.768447523493393e-05, "loss": 0.0035, "step": 14467 }, { "epoch": 6.7528588098016336, "grad_norm": 0.05078125, "learning_rate": 4.76719705074016e-05, "loss": 0.0024, "step": 14468 }, { "epoch": 6.753325554259043, "grad_norm": 0.05126953125, "learning_rate": 4.7659466906549785e-05, "loss": 0.0004, "step": 14469 }, { "epoch": 6.753792298716453, "grad_norm": 0.16015625, "learning_rate": 4.764696443264778e-05, "loss": 0.0007, "step": 14470 }, { "epoch": 6.754259043173862, "grad_norm": 0.126953125, "learning_rate": 4.763446308596467e-05, "loss": 0.0006, "step": 14471 }, { "epoch": 6.754725787631272, "grad_norm": 0.0771484375, "learning_rate": 4.7621962866769664e-05, "loss": 0.0004, "step": 14472 }, { "epoch": 6.754725787631272, "eval_loss": 2.3073253631591797, "eval_runtime": 85.1391, "eval_samples_per_second": 21.189, "eval_steps_per_second": 2.654, "step": 14472 }, { "epoch": 6.755192532088682, "grad_norm": 0.134765625, "learning_rate": 4.760946377533195e-05, "loss": 0.0005, "step": 14473 }, { "epoch": 6.7556592765460906, "grad_norm": 0.0693359375, "learning_rate": 4.759696581192058e-05, "loss": 0.0004, "step": 14474 }, { "epoch": 6.7561260210035, "grad_norm": 0.046142578125, "learning_rate": 4.7584468976804674e-05, "loss": 0.0004, "step": 14475 }, { "epoch": 6.75659276546091, "grad_norm": 0.0712890625, "learning_rate": 4.757197327025335e-05, "loss": 0.0006, "step": 14476 }, { "epoch": 6.75705950991832, "grad_norm": 0.0157470703125, "learning_rate": 4.7559478692535565e-05, "loss": 0.0002, "step": 14477 }, { "epoch": 6.75752625437573, "grad_norm": 0.033935546875, "learning_rate": 4.7546985243920386e-05, "loss": 0.0004, "step": 14478 }, { "epoch": 6.757992998833139, "grad_norm": 0.11181640625, "learning_rate": 4.7534492924676854e-05, "loss": 0.0033, "step": 14479 }, { "epoch": 6.758459743290548, "grad_norm": 0.306640625, "learning_rate": 4.752200173507387e-05, "loss": 0.0012, "step": 14480 }, { "epoch": 6.758926487747958, "grad_norm": 0.029296875, "learning_rate": 4.7509511675380406e-05, "loss": 0.0023, "step": 14481 }, { "epoch": 6.759393232205367, "grad_norm": 0.2197265625, "learning_rate": 4.7497022745865436e-05, "loss": 0.0021, "step": 14482 }, { "epoch": 6.759859976662777, "grad_norm": 0.072265625, "learning_rate": 4.748453494679778e-05, "loss": 0.0003, "step": 14483 }, { "epoch": 6.760326721120187, "grad_norm": 0.3203125, "learning_rate": 4.747204827844639e-05, "loss": 0.002, "step": 14484 }, { "epoch": 6.7607934655775965, "grad_norm": 0.056396484375, "learning_rate": 4.745956274108005e-05, "loss": 0.0003, "step": 14485 }, { "epoch": 6.761260210035006, "grad_norm": 0.0537109375, "learning_rate": 4.744707833496761e-05, "loss": 0.0003, "step": 14486 }, { "epoch": 6.761726954492415, "grad_norm": 0.08447265625, "learning_rate": 4.7434595060377926e-05, "loss": 0.0026, "step": 14487 }, { "epoch": 6.762193698949825, "grad_norm": 0.07763671875, "learning_rate": 4.7422112917579686e-05, "loss": 0.0006, "step": 14488 }, { "epoch": 6.762660443407235, "grad_norm": 0.0223388671875, "learning_rate": 4.74096319068417e-05, "loss": 0.0002, "step": 14489 }, { "epoch": 6.763127187864644, "grad_norm": 0.275390625, "learning_rate": 4.7397152028432724e-05, "loss": 0.0045, "step": 14490 }, { "epoch": 6.7635939323220535, "grad_norm": 0.0291748046875, "learning_rate": 4.738467328262139e-05, "loss": 0.0024, "step": 14491 }, { "epoch": 6.764060676779463, "grad_norm": 0.059814453125, "learning_rate": 4.7372195669676425e-05, "loss": 0.0005, "step": 14492 }, { "epoch": 6.764527421236873, "grad_norm": 0.0634765625, "learning_rate": 4.735971918986651e-05, "loss": 0.0005, "step": 14493 }, { "epoch": 6.764994165694282, "grad_norm": 0.0186767578125, "learning_rate": 4.734724384346021e-05, "loss": 0.0003, "step": 14494 }, { "epoch": 6.765460910151692, "grad_norm": 0.056396484375, "learning_rate": 4.733476963072617e-05, "loss": 0.0019, "step": 14495 }, { "epoch": 6.765927654609102, "grad_norm": 0.1376953125, "learning_rate": 4.732229655193301e-05, "loss": 0.0008, "step": 14496 }, { "epoch": 6.766394399066511, "grad_norm": 0.056640625, "learning_rate": 4.7309824607349204e-05, "loss": 0.0032, "step": 14497 }, { "epoch": 6.76686114352392, "grad_norm": 0.03857421875, "learning_rate": 4.729735379724335e-05, "loss": 0.0003, "step": 14498 }, { "epoch": 6.76732788798133, "grad_norm": 0.03564453125, "learning_rate": 4.728488412188398e-05, "loss": 0.0003, "step": 14499 }, { "epoch": 6.76779463243874, "grad_norm": 0.197265625, "learning_rate": 4.7272415581539495e-05, "loss": 0.0039, "step": 14500 }, { "epoch": 6.76826137689615, "grad_norm": 0.0169677734375, "learning_rate": 4.725994817647845e-05, "loss": 0.0003, "step": 14501 }, { "epoch": 6.768728121353559, "grad_norm": 0.130859375, "learning_rate": 4.72474819069692e-05, "loss": 0.0038, "step": 14502 }, { "epoch": 6.769194865810968, "grad_norm": 0.028076171875, "learning_rate": 4.723501677328019e-05, "loss": 0.0003, "step": 14503 }, { "epoch": 6.769661610268378, "grad_norm": 0.146484375, "learning_rate": 4.7222552775679854e-05, "loss": 0.0007, "step": 14504 }, { "epoch": 6.770128354725788, "grad_norm": 0.0498046875, "learning_rate": 4.7210089914436475e-05, "loss": 0.0004, "step": 14505 }, { "epoch": 6.770595099183197, "grad_norm": 0.10498046875, "learning_rate": 4.719762818981843e-05, "loss": 0.0049, "step": 14506 }, { "epoch": 6.771061843640607, "grad_norm": 0.01007080078125, "learning_rate": 4.7185167602094085e-05, "loss": 0.0002, "step": 14507 }, { "epoch": 6.7715285880980165, "grad_norm": 0.095703125, "learning_rate": 4.717270815153163e-05, "loss": 0.0025, "step": 14508 }, { "epoch": 6.771995332555425, "grad_norm": 0.0751953125, "learning_rate": 4.716024983839938e-05, "loss": 0.0006, "step": 14509 }, { "epoch": 6.772462077012835, "grad_norm": 0.08740234375, "learning_rate": 4.7147792662965614e-05, "loss": 0.0004, "step": 14510 }, { "epoch": 6.772928821470245, "grad_norm": 0.0306396484375, "learning_rate": 4.713533662549848e-05, "loss": 0.0004, "step": 14511 }, { "epoch": 6.773395565927655, "grad_norm": 0.02587890625, "learning_rate": 4.712288172626621e-05, "loss": 0.0003, "step": 14512 }, { "epoch": 6.773862310385065, "grad_norm": 0.017578125, "learning_rate": 4.7110427965537e-05, "loss": 0.0003, "step": 14513 }, { "epoch": 6.7743290548424735, "grad_norm": 0.173828125, "learning_rate": 4.709797534357891e-05, "loss": 0.0005, "step": 14514 }, { "epoch": 6.774795799299883, "grad_norm": 0.1142578125, "learning_rate": 4.7085523860660154e-05, "loss": 0.0026, "step": 14515 }, { "epoch": 6.775262543757293, "grad_norm": 0.04736328125, "learning_rate": 4.707307351704877e-05, "loss": 0.0005, "step": 14516 }, { "epoch": 6.775729288214702, "grad_norm": 0.0206298828125, "learning_rate": 4.706062431301281e-05, "loss": 0.0003, "step": 14517 }, { "epoch": 6.776196032672112, "grad_norm": 0.12890625, "learning_rate": 4.704817624882034e-05, "loss": 0.0005, "step": 14518 }, { "epoch": 6.776662777129522, "grad_norm": 0.046630859375, "learning_rate": 4.703572932473942e-05, "loss": 0.0003, "step": 14519 }, { "epoch": 6.777129521586931, "grad_norm": 0.02099609375, "learning_rate": 4.702328354103798e-05, "loss": 0.0002, "step": 14520 }, { "epoch": 6.777596266044341, "grad_norm": 0.0299072265625, "learning_rate": 4.701083889798402e-05, "loss": 0.0003, "step": 14521 }, { "epoch": 6.77806301050175, "grad_norm": 0.1474609375, "learning_rate": 4.699839539584553e-05, "loss": 0.0045, "step": 14522 }, { "epoch": 6.77852975495916, "grad_norm": 0.302734375, "learning_rate": 4.698595303489036e-05, "loss": 0.0019, "step": 14523 }, { "epoch": 6.77899649941657, "grad_norm": 0.00885009765625, "learning_rate": 4.697351181538644e-05, "loss": 0.0002, "step": 14524 }, { "epoch": 6.779463243873979, "grad_norm": 0.0233154296875, "learning_rate": 4.696107173760168e-05, "loss": 0.0003, "step": 14525 }, { "epoch": 6.779929988331388, "grad_norm": 0.01446533203125, "learning_rate": 4.694863280180386e-05, "loss": 0.0003, "step": 14526 }, { "epoch": 6.780396732788798, "grad_norm": 0.034912109375, "learning_rate": 4.693619500826083e-05, "loss": 0.0003, "step": 14527 }, { "epoch": 6.780863477246208, "grad_norm": 0.0546875, "learning_rate": 4.6923758357240446e-05, "loss": 0.0004, "step": 14528 }, { "epoch": 6.781330221703618, "grad_norm": 0.04150390625, "learning_rate": 4.691132284901039e-05, "loss": 0.0003, "step": 14529 }, { "epoch": 6.781796966161027, "grad_norm": 0.06298828125, "learning_rate": 4.6898888483838446e-05, "loss": 0.0004, "step": 14530 }, { "epoch": 6.7822637106184365, "grad_norm": 0.07421875, "learning_rate": 4.688645526199239e-05, "loss": 0.0033, "step": 14531 }, { "epoch": 6.782730455075846, "grad_norm": 0.05029296875, "learning_rate": 4.687402318373986e-05, "loss": 0.0003, "step": 14532 }, { "epoch": 6.783197199533255, "grad_norm": 0.061767578125, "learning_rate": 4.686159224934857e-05, "loss": 0.0025, "step": 14533 }, { "epoch": 6.783663943990665, "grad_norm": 0.02587890625, "learning_rate": 4.684916245908614e-05, "loss": 0.0002, "step": 14534 }, { "epoch": 6.784130688448075, "grad_norm": 0.024169921875, "learning_rate": 4.68367338132202e-05, "loss": 0.0004, "step": 14535 }, { "epoch": 6.784597432905485, "grad_norm": 0.08837890625, "learning_rate": 4.682430631201842e-05, "loss": 0.0052, "step": 14536 }, { "epoch": 6.7850641773628935, "grad_norm": 0.025390625, "learning_rate": 4.681187995574827e-05, "loss": 0.0003, "step": 14537 }, { "epoch": 6.785530921820303, "grad_norm": 0.0654296875, "learning_rate": 4.6799454744677376e-05, "loss": 0.0004, "step": 14538 }, { "epoch": 6.785997666277713, "grad_norm": 0.076171875, "learning_rate": 4.678703067907327e-05, "loss": 0.0023, "step": 14539 }, { "epoch": 6.786464410735123, "grad_norm": 0.1845703125, "learning_rate": 4.6774607759203407e-05, "loss": 0.0005, "step": 14540 }, { "epoch": 6.786931155192532, "grad_norm": 0.022216796875, "learning_rate": 4.6762185985335284e-05, "loss": 0.0002, "step": 14541 }, { "epoch": 6.787397899649942, "grad_norm": 0.046875, "learning_rate": 4.6749765357736405e-05, "loss": 0.0004, "step": 14542 }, { "epoch": 6.787864644107351, "grad_norm": 0.142578125, "learning_rate": 4.6737345876674134e-05, "loss": 0.0007, "step": 14543 }, { "epoch": 6.788331388564761, "grad_norm": 0.033203125, "learning_rate": 4.672492754241589e-05, "loss": 0.0004, "step": 14544 }, { "epoch": 6.78879813302217, "grad_norm": 0.0888671875, "learning_rate": 4.67125103552291e-05, "loss": 0.0025, "step": 14545 }, { "epoch": 6.78926487747958, "grad_norm": 0.040771484375, "learning_rate": 4.670009431538106e-05, "loss": 0.0004, "step": 14546 }, { "epoch": 6.78973162193699, "grad_norm": 0.0302734375, "learning_rate": 4.6687679423139117e-05, "loss": 0.0003, "step": 14547 }, { "epoch": 6.790198366394399, "grad_norm": 0.0166015625, "learning_rate": 4.667526567877063e-05, "loss": 0.0002, "step": 14548 }, { "epoch": 6.790665110851808, "grad_norm": 0.345703125, "learning_rate": 4.66628530825428e-05, "loss": 0.0008, "step": 14549 }, { "epoch": 6.791131855309218, "grad_norm": 0.0216064453125, "learning_rate": 4.665044163472292e-05, "loss": 0.0003, "step": 14550 }, { "epoch": 6.791598599766628, "grad_norm": 0.057861328125, "learning_rate": 4.663803133557826e-05, "loss": 0.0003, "step": 14551 }, { "epoch": 6.792065344224037, "grad_norm": 0.0859375, "learning_rate": 4.662562218537596e-05, "loss": 0.0004, "step": 14552 }, { "epoch": 6.792532088681447, "grad_norm": 0.037353515625, "learning_rate": 4.661321418438327e-05, "loss": 0.0003, "step": 14553 }, { "epoch": 6.792998833138856, "grad_norm": 0.1650390625, "learning_rate": 4.660080733286727e-05, "loss": 0.0005, "step": 14554 }, { "epoch": 6.793465577596266, "grad_norm": 0.03564453125, "learning_rate": 4.658840163109514e-05, "loss": 0.0003, "step": 14555 }, { "epoch": 6.793932322053676, "grad_norm": 0.0625, "learning_rate": 4.657599707933401e-05, "loss": 0.0005, "step": 14556 }, { "epoch": 6.794399066511085, "grad_norm": 0.018310546875, "learning_rate": 4.656359367785091e-05, "loss": 0.0003, "step": 14557 }, { "epoch": 6.794865810968495, "grad_norm": 0.11474609375, "learning_rate": 4.655119142691293e-05, "loss": 0.0005, "step": 14558 }, { "epoch": 6.7953325554259045, "grad_norm": 0.025146484375, "learning_rate": 4.653879032678714e-05, "loss": 0.0003, "step": 14559 }, { "epoch": 6.795799299883313, "grad_norm": 0.0247802734375, "learning_rate": 4.652639037774047e-05, "loss": 0.0003, "step": 14560 }, { "epoch": 6.796266044340723, "grad_norm": 0.08544921875, "learning_rate": 4.651399158003994e-05, "loss": 0.0003, "step": 14561 }, { "epoch": 6.796732788798133, "grad_norm": 0.01513671875, "learning_rate": 4.650159393395256e-05, "loss": 0.0002, "step": 14562 }, { "epoch": 6.797199533255543, "grad_norm": 0.2333984375, "learning_rate": 4.648919743974521e-05, "loss": 0.0009, "step": 14563 }, { "epoch": 6.797666277712953, "grad_norm": 0.04296875, "learning_rate": 4.647680209768478e-05, "loss": 0.0003, "step": 14564 }, { "epoch": 6.7981330221703615, "grad_norm": 0.060302734375, "learning_rate": 4.646440790803822e-05, "loss": 0.0005, "step": 14565 }, { "epoch": 6.798599766627771, "grad_norm": 0.09814453125, "learning_rate": 4.6452014871072325e-05, "loss": 0.0003, "step": 14566 }, { "epoch": 6.799066511085181, "grad_norm": 0.06787109375, "learning_rate": 4.643962298705395e-05, "loss": 0.0033, "step": 14567 }, { "epoch": 6.79953325554259, "grad_norm": 0.1611328125, "learning_rate": 4.642723225624997e-05, "loss": 0.0009, "step": 14568 }, { "epoch": 6.8, "grad_norm": 0.033447265625, "learning_rate": 4.6414842678927085e-05, "loss": 0.0003, "step": 14569 }, { "epoch": 6.80046674445741, "grad_norm": 0.0341796875, "learning_rate": 4.640245425535208e-05, "loss": 0.0004, "step": 14570 }, { "epoch": 6.800933488914819, "grad_norm": 0.0693359375, "learning_rate": 4.639006698579174e-05, "loss": 0.0004, "step": 14571 }, { "epoch": 6.801400233372228, "grad_norm": 0.031005859375, "learning_rate": 4.637768087051271e-05, "loss": 0.0003, "step": 14572 }, { "epoch": 6.801866977829638, "grad_norm": 0.10498046875, "learning_rate": 4.636529590978169e-05, "loss": 0.0005, "step": 14573 }, { "epoch": 6.802333722287048, "grad_norm": 0.03759765625, "learning_rate": 4.635291210386541e-05, "loss": 0.0004, "step": 14574 }, { "epoch": 6.802800466744458, "grad_norm": 0.014404296875, "learning_rate": 4.6340529453030416e-05, "loss": 0.0002, "step": 14575 }, { "epoch": 6.803267211201867, "grad_norm": 0.0184326171875, "learning_rate": 4.6328147957543354e-05, "loss": 0.0002, "step": 14576 }, { "epoch": 6.803733955659276, "grad_norm": 0.016357421875, "learning_rate": 4.631576761767085e-05, "loss": 0.0002, "step": 14577 }, { "epoch": 6.804200700116686, "grad_norm": 0.0152587890625, "learning_rate": 4.630338843367941e-05, "loss": 0.0002, "step": 14578 }, { "epoch": 6.804667444574096, "grad_norm": 0.0859375, "learning_rate": 4.629101040583558e-05, "loss": 0.0004, "step": 14579 }, { "epoch": 6.805134189031505, "grad_norm": 0.0732421875, "learning_rate": 4.6278633534405925e-05, "loss": 0.0004, "step": 14580 }, { "epoch": 6.805600933488915, "grad_norm": 0.32421875, "learning_rate": 4.6266257819656866e-05, "loss": 0.0042, "step": 14581 }, { "epoch": 6.8060676779463245, "grad_norm": 0.041015625, "learning_rate": 4.6253883261854924e-05, "loss": 0.0003, "step": 14582 }, { "epoch": 6.806534422403734, "grad_norm": 0.16015625, "learning_rate": 4.6241509861266474e-05, "loss": 0.0008, "step": 14583 }, { "epoch": 6.807001166861143, "grad_norm": 0.2333984375, "learning_rate": 4.622913761815797e-05, "loss": 0.0009, "step": 14584 }, { "epoch": 6.807467911318553, "grad_norm": 0.09814453125, "learning_rate": 4.621676653279582e-05, "loss": 0.0005, "step": 14585 }, { "epoch": 6.807934655775963, "grad_norm": 0.08984375, "learning_rate": 4.6204396605446333e-05, "loss": 0.0004, "step": 14586 }, { "epoch": 6.808401400233373, "grad_norm": 0.0184326171875, "learning_rate": 4.6192027836375865e-05, "loss": 0.0003, "step": 14587 }, { "epoch": 6.8088681446907815, "grad_norm": 0.36328125, "learning_rate": 4.6179660225850776e-05, "loss": 0.002, "step": 14588 }, { "epoch": 6.809334889148191, "grad_norm": 0.048095703125, "learning_rate": 4.616729377413729e-05, "loss": 0.0003, "step": 14589 }, { "epoch": 6.809801633605601, "grad_norm": 0.01312255859375, "learning_rate": 4.6154928481501684e-05, "loss": 0.0002, "step": 14590 }, { "epoch": 6.810268378063011, "grad_norm": 0.017822265625, "learning_rate": 4.6142564348210246e-05, "loss": 0.0002, "step": 14591 }, { "epoch": 6.81073512252042, "grad_norm": 0.037109375, "learning_rate": 4.6130201374529126e-05, "loss": 0.0003, "step": 14592 }, { "epoch": 6.81120186697783, "grad_norm": 0.01373291015625, "learning_rate": 4.611783956072454e-05, "loss": 0.0002, "step": 14593 }, { "epoch": 6.811668611435239, "grad_norm": 0.02880859375, "learning_rate": 4.610547890706268e-05, "loss": 0.0003, "step": 14594 }, { "epoch": 6.812135355892648, "grad_norm": 0.02392578125, "learning_rate": 4.609311941380964e-05, "loss": 0.0003, "step": 14595 }, { "epoch": 6.812602100350058, "grad_norm": 0.03955078125, "learning_rate": 4.6080761081231525e-05, "loss": 0.0024, "step": 14596 }, { "epoch": 6.813068844807468, "grad_norm": 0.09716796875, "learning_rate": 4.606840390959449e-05, "loss": 0.0004, "step": 14597 }, { "epoch": 6.813535589264878, "grad_norm": 0.0556640625, "learning_rate": 4.6056047899164535e-05, "loss": 0.0021, "step": 14598 }, { "epoch": 6.8140023337222875, "grad_norm": 0.060791015625, "learning_rate": 4.6043693050207704e-05, "loss": 0.0004, "step": 14599 }, { "epoch": 6.814469078179696, "grad_norm": 0.0157470703125, "learning_rate": 4.6031339362990075e-05, "loss": 0.0001, "step": 14600 }, { "epoch": 6.814935822637106, "grad_norm": 0.014892578125, "learning_rate": 4.601898683777754e-05, "loss": 0.0002, "step": 14601 }, { "epoch": 6.815402567094516, "grad_norm": 0.11083984375, "learning_rate": 4.600663547483617e-05, "loss": 0.0005, "step": 14602 }, { "epoch": 6.815869311551925, "grad_norm": 0.1787109375, "learning_rate": 4.599428527443179e-05, "loss": 0.002, "step": 14603 }, { "epoch": 6.816336056009335, "grad_norm": 0.020263671875, "learning_rate": 4.598193623683037e-05, "loss": 0.0003, "step": 14604 }, { "epoch": 6.8168028004667445, "grad_norm": 0.197265625, "learning_rate": 4.596958836229783e-05, "loss": 0.0005, "step": 14605 }, { "epoch": 6.817269544924154, "grad_norm": 0.037353515625, "learning_rate": 4.595724165109998e-05, "loss": 0.0003, "step": 14606 }, { "epoch": 6.817736289381564, "grad_norm": 0.05126953125, "learning_rate": 4.5944896103502666e-05, "loss": 0.0004, "step": 14607 }, { "epoch": 6.818203033838973, "grad_norm": 0.07470703125, "learning_rate": 4.593255171977176e-05, "loss": 0.0005, "step": 14608 }, { "epoch": 6.818669778296383, "grad_norm": 0.158203125, "learning_rate": 4.592020850017296e-05, "loss": 0.0006, "step": 14609 }, { "epoch": 6.819136522753793, "grad_norm": 0.169921875, "learning_rate": 4.5907866444972104e-05, "loss": 0.0005, "step": 14610 }, { "epoch": 6.8196032672112015, "grad_norm": 0.049072265625, "learning_rate": 4.589552555443487e-05, "loss": 0.0004, "step": 14611 }, { "epoch": 6.820070011668611, "grad_norm": 0.07421875, "learning_rate": 4.588318582882704e-05, "loss": 0.0038, "step": 14612 }, { "epoch": 6.820536756126021, "grad_norm": 0.034423828125, "learning_rate": 4.587084726841422e-05, "loss": 0.0018, "step": 14613 }, { "epoch": 6.821003500583431, "grad_norm": 0.0255126953125, "learning_rate": 4.5858509873462164e-05, "loss": 0.0002, "step": 14614 }, { "epoch": 6.82147024504084, "grad_norm": 0.08447265625, "learning_rate": 4.5846173644236415e-05, "loss": 0.0004, "step": 14615 }, { "epoch": 6.82193698949825, "grad_norm": 0.453125, "learning_rate": 4.583383858100263e-05, "loss": 0.0021, "step": 14616 }, { "epoch": 6.822403733955659, "grad_norm": 0.1748046875, "learning_rate": 4.5821504684026436e-05, "loss": 0.001, "step": 14617 }, { "epoch": 6.822870478413069, "grad_norm": 0.01080322265625, "learning_rate": 4.580917195357333e-05, "loss": 0.0002, "step": 14618 }, { "epoch": 6.823337222870478, "grad_norm": 0.05615234375, "learning_rate": 4.579684038990887e-05, "loss": 0.0004, "step": 14619 }, { "epoch": 6.823803967327888, "grad_norm": 0.05908203125, "learning_rate": 4.578450999329862e-05, "loss": 0.0004, "step": 14620 }, { "epoch": 6.824270711785298, "grad_norm": 0.06201171875, "learning_rate": 4.577218076400798e-05, "loss": 0.0003, "step": 14621 }, { "epoch": 6.8247374562427074, "grad_norm": 0.0810546875, "learning_rate": 4.575985270230245e-05, "loss": 0.0045, "step": 14622 }, { "epoch": 6.825204200700116, "grad_norm": 0.0888671875, "learning_rate": 4.5747525808447524e-05, "loss": 0.0003, "step": 14623 }, { "epoch": 6.825670945157526, "grad_norm": 0.0203857421875, "learning_rate": 4.573520008270851e-05, "loss": 0.0002, "step": 14624 }, { "epoch": 6.826137689614936, "grad_norm": 0.09423828125, "learning_rate": 4.572287552535085e-05, "loss": 0.0005, "step": 14625 }, { "epoch": 6.826604434072346, "grad_norm": 0.484375, "learning_rate": 4.571055213663994e-05, "loss": 0.0019, "step": 14626 }, { "epoch": 6.827071178529755, "grad_norm": 0.68359375, "learning_rate": 4.569822991684104e-05, "loss": 0.0021, "step": 14627 }, { "epoch": 6.827537922987164, "grad_norm": 0.150390625, "learning_rate": 4.568590886621951e-05, "loss": 0.0005, "step": 14628 }, { "epoch": 6.828004667444574, "grad_norm": 0.07275390625, "learning_rate": 4.567358898504065e-05, "loss": 0.0046, "step": 14629 }, { "epoch": 6.828471411901984, "grad_norm": 0.2578125, "learning_rate": 4.5661270273569654e-05, "loss": 0.0026, "step": 14630 }, { "epoch": 6.828938156359393, "grad_norm": 0.1767578125, "learning_rate": 4.564895273207184e-05, "loss": 0.0007, "step": 14631 }, { "epoch": 6.829404900816803, "grad_norm": 0.146484375, "learning_rate": 4.563663636081235e-05, "loss": 0.0008, "step": 14632 }, { "epoch": 6.8298716452742125, "grad_norm": 0.09375, "learning_rate": 4.562432116005638e-05, "loss": 0.0005, "step": 14633 }, { "epoch": 6.830338389731622, "grad_norm": 0.0264892578125, "learning_rate": 4.5612007130069155e-05, "loss": 0.0002, "step": 14634 }, { "epoch": 6.830805134189031, "grad_norm": 0.045166015625, "learning_rate": 4.559969427111571e-05, "loss": 0.0003, "step": 14635 }, { "epoch": 6.831271878646441, "grad_norm": 0.08544921875, "learning_rate": 4.5587382583461225e-05, "loss": 0.0006, "step": 14636 }, { "epoch": 6.831738623103851, "grad_norm": 0.04345703125, "learning_rate": 4.557507206737079e-05, "loss": 0.0021, "step": 14637 }, { "epoch": 6.83220536756126, "grad_norm": 0.3046875, "learning_rate": 4.556276272310941e-05, "loss": 0.0013, "step": 14638 }, { "epoch": 6.8326721120186695, "grad_norm": 0.0849609375, "learning_rate": 4.5550454550942136e-05, "loss": 0.0005, "step": 14639 }, { "epoch": 6.833138856476079, "grad_norm": 0.040771484375, "learning_rate": 4.5538147551134034e-05, "loss": 0.0003, "step": 14640 }, { "epoch": 6.833605600933489, "grad_norm": 0.0189208984375, "learning_rate": 4.552584172395e-05, "loss": 0.0002, "step": 14641 }, { "epoch": 6.834072345390899, "grad_norm": 0.0196533203125, "learning_rate": 4.5513537069655035e-05, "loss": 0.0003, "step": 14642 }, { "epoch": 6.834539089848308, "grad_norm": 0.08984375, "learning_rate": 4.5501233588514115e-05, "loss": 0.0006, "step": 14643 }, { "epoch": 6.835005834305718, "grad_norm": 0.0341796875, "learning_rate": 4.5488931280792055e-05, "loss": 0.0003, "step": 14644 }, { "epoch": 6.835472578763127, "grad_norm": 0.11767578125, "learning_rate": 4.5476630146753784e-05, "loss": 0.0011, "step": 14645 }, { "epoch": 6.835939323220536, "grad_norm": 0.04296875, "learning_rate": 4.546433018666421e-05, "loss": 0.0005, "step": 14646 }, { "epoch": 6.836406067677946, "grad_norm": 0.25, "learning_rate": 4.5452031400788075e-05, "loss": 0.0018, "step": 14647 }, { "epoch": 6.836872812135356, "grad_norm": 0.059326171875, "learning_rate": 4.5439733789390216e-05, "loss": 0.0004, "step": 14648 }, { "epoch": 6.837339556592766, "grad_norm": 0.1435546875, "learning_rate": 4.5427437352735466e-05, "loss": 0.0007, "step": 14649 }, { "epoch": 6.8378063010501755, "grad_norm": 0.322265625, "learning_rate": 4.5415142091088514e-05, "loss": 0.0038, "step": 14650 }, { "epoch": 6.838273045507584, "grad_norm": 0.032470703125, "learning_rate": 4.540284800471414e-05, "loss": 0.0002, "step": 14651 }, { "epoch": 6.838739789964994, "grad_norm": 0.2060546875, "learning_rate": 4.5390555093876996e-05, "loss": 0.0051, "step": 14652 }, { "epoch": 6.839206534422404, "grad_norm": 0.2041015625, "learning_rate": 4.5378263358841786e-05, "loss": 0.001, "step": 14653 }, { "epoch": 6.839673278879813, "grad_norm": 0.012451171875, "learning_rate": 4.5365972799873205e-05, "loss": 0.0002, "step": 14654 }, { "epoch": 6.840140023337223, "grad_norm": 0.045166015625, "learning_rate": 4.535368341723581e-05, "loss": 0.0003, "step": 14655 }, { "epoch": 6.8406067677946325, "grad_norm": 0.052734375, "learning_rate": 4.534139521119425e-05, "loss": 0.0025, "step": 14656 }, { "epoch": 6.841073512252042, "grad_norm": 0.07568359375, "learning_rate": 4.5329108182013124e-05, "loss": 0.0036, "step": 14657 }, { "epoch": 6.841540256709451, "grad_norm": 0.06591796875, "learning_rate": 4.531682232995697e-05, "loss": 0.0031, "step": 14658 }, { "epoch": 6.842007001166861, "grad_norm": 0.0458984375, "learning_rate": 4.530453765529025e-05, "loss": 0.0028, "step": 14659 }, { "epoch": 6.842473745624271, "grad_norm": 0.037841796875, "learning_rate": 4.5292254158277526e-05, "loss": 0.0003, "step": 14660 }, { "epoch": 6.842940490081681, "grad_norm": 0.03173828125, "learning_rate": 4.527997183918331e-05, "loss": 0.0003, "step": 14661 }, { "epoch": 6.8434072345390895, "grad_norm": 0.05126953125, "learning_rate": 4.526769069827197e-05, "loss": 0.002, "step": 14662 }, { "epoch": 6.843873978996499, "grad_norm": 0.11279296875, "learning_rate": 4.5255410735808015e-05, "loss": 0.0004, "step": 14663 }, { "epoch": 6.844340723453909, "grad_norm": 0.052734375, "learning_rate": 4.5243131952055775e-05, "loss": 0.0003, "step": 14664 }, { "epoch": 6.844807467911319, "grad_norm": 0.0888671875, "learning_rate": 4.523085434727965e-05, "loss": 0.0034, "step": 14665 }, { "epoch": 6.845274212368728, "grad_norm": 0.03515625, "learning_rate": 4.521857792174404e-05, "loss": 0.0003, "step": 14666 }, { "epoch": 6.845740956826138, "grad_norm": 0.031494140625, "learning_rate": 4.5206302675713195e-05, "loss": 0.0004, "step": 14667 }, { "epoch": 6.846207701283547, "grad_norm": 0.224609375, "learning_rate": 4.5194028609451445e-05, "loss": 0.001, "step": 14668 }, { "epoch": 6.846674445740957, "grad_norm": 0.08154296875, "learning_rate": 4.5181755723223095e-05, "loss": 0.0004, "step": 14669 }, { "epoch": 6.847141190198366, "grad_norm": 0.057373046875, "learning_rate": 4.516948401729234e-05, "loss": 0.0004, "step": 14670 }, { "epoch": 6.847607934655776, "grad_norm": 0.04833984375, "learning_rate": 4.5157213491923435e-05, "loss": 0.0026, "step": 14671 }, { "epoch": 6.848074679113186, "grad_norm": 0.0164794921875, "learning_rate": 4.51449441473806e-05, "loss": 0.0003, "step": 14672 }, { "epoch": 6.8485414235705955, "grad_norm": 0.06591796875, "learning_rate": 4.513267598392794e-05, "loss": 0.0032, "step": 14673 }, { "epoch": 6.849008168028004, "grad_norm": 0.2265625, "learning_rate": 4.5120409001829646e-05, "loss": 0.0008, "step": 14674 }, { "epoch": 6.849474912485414, "grad_norm": 0.0220947265625, "learning_rate": 4.5108143201349876e-05, "loss": 0.0003, "step": 14675 }, { "epoch": 6.849941656942824, "grad_norm": 0.0296630859375, "learning_rate": 4.5095878582752646e-05, "loss": 0.0002, "step": 14676 }, { "epoch": 6.850408401400234, "grad_norm": 0.07861328125, "learning_rate": 4.508361514630207e-05, "loss": 0.0005, "step": 14677 }, { "epoch": 6.850875145857643, "grad_norm": 0.033203125, "learning_rate": 4.507135289226223e-05, "loss": 0.0003, "step": 14678 }, { "epoch": 6.8513418903150525, "grad_norm": 0.02587890625, "learning_rate": 4.505909182089707e-05, "loss": 0.0003, "step": 14679 }, { "epoch": 6.851808634772462, "grad_norm": 0.61328125, "learning_rate": 4.504683193247065e-05, "loss": 0.0025, "step": 14680 }, { "epoch": 6.852275379229871, "grad_norm": 0.08984375, "learning_rate": 4.503457322724688e-05, "loss": 0.0038, "step": 14681 }, { "epoch": 6.852742123687281, "grad_norm": 0.123046875, "learning_rate": 4.502231570548973e-05, "loss": 0.0006, "step": 14682 }, { "epoch": 6.853208868144691, "grad_norm": 0.11181640625, "learning_rate": 4.501005936746315e-05, "loss": 0.0007, "step": 14683 }, { "epoch": 6.853675612602101, "grad_norm": 0.087890625, "learning_rate": 4.499780421343098e-05, "loss": 0.0041, "step": 14684 }, { "epoch": 6.85414235705951, "grad_norm": 0.0146484375, "learning_rate": 4.498555024365711e-05, "loss": 0.0002, "step": 14685 }, { "epoch": 6.854609101516919, "grad_norm": 0.02685546875, "learning_rate": 4.4973297458405426e-05, "loss": 0.0003, "step": 14686 }, { "epoch": 6.855075845974329, "grad_norm": 0.041015625, "learning_rate": 4.496104585793966e-05, "loss": 0.0019, "step": 14687 }, { "epoch": 6.855542590431739, "grad_norm": 0.1806640625, "learning_rate": 4.494879544252363e-05, "loss": 0.0006, "step": 14688 }, { "epoch": 6.856009334889148, "grad_norm": 0.11865234375, "learning_rate": 4.493654621242116e-05, "loss": 0.0005, "step": 14689 }, { "epoch": 6.856476079346558, "grad_norm": 0.0201416015625, "learning_rate": 4.4924298167895903e-05, "loss": 0.0002, "step": 14690 }, { "epoch": 6.856942823803967, "grad_norm": 0.040283203125, "learning_rate": 4.491205130921162e-05, "loss": 0.0003, "step": 14691 }, { "epoch": 6.857409568261377, "grad_norm": 0.1708984375, "learning_rate": 4.4899805636632044e-05, "loss": 0.0005, "step": 14692 }, { "epoch": 6.857876312718787, "grad_norm": 0.134765625, "learning_rate": 4.488756115042073e-05, "loss": 0.0005, "step": 14693 }, { "epoch": 6.858343057176196, "grad_norm": 0.1123046875, "learning_rate": 4.4875317850841384e-05, "loss": 0.0003, "step": 14694 }, { "epoch": 6.858809801633606, "grad_norm": 0.031982421875, "learning_rate": 4.4863075738157635e-05, "loss": 0.0021, "step": 14695 }, { "epoch": 6.8592765460910154, "grad_norm": 0.0142822265625, "learning_rate": 4.4850834812633e-05, "loss": 0.0002, "step": 14696 }, { "epoch": 6.859743290548424, "grad_norm": 0.021484375, "learning_rate": 4.483859507453109e-05, "loss": 0.0003, "step": 14697 }, { "epoch": 6.860210035005834, "grad_norm": 0.0576171875, "learning_rate": 4.482635652411547e-05, "loss": 0.0003, "step": 14698 }, { "epoch": 6.860676779463244, "grad_norm": 0.0111083984375, "learning_rate": 4.481411916164957e-05, "loss": 0.0002, "step": 14699 }, { "epoch": 6.861143523920654, "grad_norm": 0.048095703125, "learning_rate": 4.4801882987396945e-05, "loss": 0.0003, "step": 14700 }, { "epoch": 6.861610268378063, "grad_norm": 0.040283203125, "learning_rate": 4.4789648001620984e-05, "loss": 0.0004, "step": 14701 }, { "epoch": 6.8620770128354724, "grad_norm": 0.0155029296875, "learning_rate": 4.477741420458517e-05, "loss": 0.0002, "step": 14702 }, { "epoch": 6.862543757292882, "grad_norm": 0.026611328125, "learning_rate": 4.476518159655293e-05, "loss": 0.0002, "step": 14703 }, { "epoch": 6.863010501750292, "grad_norm": 0.0791015625, "learning_rate": 4.475295017778758e-05, "loss": 0.0005, "step": 14704 }, { "epoch": 6.863477246207701, "grad_norm": 0.07568359375, "learning_rate": 4.474071994855251e-05, "loss": 0.0025, "step": 14705 }, { "epoch": 6.863943990665111, "grad_norm": 0.1484375, "learning_rate": 4.472849090911109e-05, "loss": 0.0007, "step": 14706 }, { "epoch": 6.8644107351225205, "grad_norm": 0.07373046875, "learning_rate": 4.471626305972657e-05, "loss": 0.0025, "step": 14707 }, { "epoch": 6.86487747957993, "grad_norm": 0.035400390625, "learning_rate": 4.470403640066223e-05, "loss": 0.0003, "step": 14708 }, { "epoch": 6.865344224037339, "grad_norm": 0.041259765625, "learning_rate": 4.469181093218132e-05, "loss": 0.0002, "step": 14709 }, { "epoch": 6.865810968494749, "grad_norm": 0.0245361328125, "learning_rate": 4.4679586654547136e-05, "loss": 0.0003, "step": 14710 }, { "epoch": 6.866277712952159, "grad_norm": 0.019287109375, "learning_rate": 4.466736356802278e-05, "loss": 0.0003, "step": 14711 }, { "epoch": 6.866744457409569, "grad_norm": 0.0859375, "learning_rate": 4.4655141672871526e-05, "loss": 0.0003, "step": 14712 }, { "epoch": 6.8672112018669775, "grad_norm": 0.06298828125, "learning_rate": 4.464292096935644e-05, "loss": 0.002, "step": 14713 }, { "epoch": 6.867677946324387, "grad_norm": 0.0223388671875, "learning_rate": 4.4630701457740686e-05, "loss": 0.0003, "step": 14714 }, { "epoch": 6.868144690781797, "grad_norm": 0.0286865234375, "learning_rate": 4.461848313828739e-05, "loss": 0.0003, "step": 14715 }, { "epoch": 6.868611435239207, "grad_norm": 0.02294921875, "learning_rate": 4.4606266011259576e-05, "loss": 0.0003, "step": 14716 }, { "epoch": 6.869078179696616, "grad_norm": 0.06689453125, "learning_rate": 4.459405007692031e-05, "loss": 0.0033, "step": 14717 }, { "epoch": 6.869544924154026, "grad_norm": 0.0546875, "learning_rate": 4.4581835335532664e-05, "loss": 0.0027, "step": 14718 }, { "epoch": 6.870011668611435, "grad_norm": 0.0888671875, "learning_rate": 4.456962178735954e-05, "loss": 0.0005, "step": 14719 }, { "epoch": 6.870478413068845, "grad_norm": 0.0196533203125, "learning_rate": 4.455740943266397e-05, "loss": 0.0002, "step": 14720 }, { "epoch": 6.870945157526254, "grad_norm": 0.056640625, "learning_rate": 4.454519827170892e-05, "loss": 0.0003, "step": 14721 }, { "epoch": 6.871411901983664, "grad_norm": 0.10107421875, "learning_rate": 4.453298830475724e-05, "loss": 0.0007, "step": 14722 }, { "epoch": 6.871878646441074, "grad_norm": 0.021240234375, "learning_rate": 4.452077953207187e-05, "loss": 0.0002, "step": 14723 }, { "epoch": 6.872345390898483, "grad_norm": 0.038818359375, "learning_rate": 4.45085719539157e-05, "loss": 0.0003, "step": 14724 }, { "epoch": 6.872812135355892, "grad_norm": 0.0771484375, "learning_rate": 4.449636557055152e-05, "loss": 0.0005, "step": 14725 }, { "epoch": 6.873278879813302, "grad_norm": 0.0361328125, "learning_rate": 4.448416038224217e-05, "loss": 0.0003, "step": 14726 }, { "epoch": 6.873745624270712, "grad_norm": 0.0810546875, "learning_rate": 4.447195638925047e-05, "loss": 0.0009, "step": 14727 }, { "epoch": 6.874212368728122, "grad_norm": 0.01336669921875, "learning_rate": 4.445975359183913e-05, "loss": 0.0002, "step": 14728 }, { "epoch": 6.874679113185531, "grad_norm": 0.263671875, "learning_rate": 4.444755199027092e-05, "loss": 0.0013, "step": 14729 }, { "epoch": 6.8751458576429405, "grad_norm": 0.01385498046875, "learning_rate": 4.443535158480858e-05, "loss": 0.0002, "step": 14730 }, { "epoch": 6.87561260210035, "grad_norm": 0.01806640625, "learning_rate": 4.442315237571474e-05, "loss": 0.0002, "step": 14731 }, { "epoch": 6.876079346557759, "grad_norm": 0.10498046875, "learning_rate": 4.441095436325212e-05, "loss": 0.0004, "step": 14732 }, { "epoch": 6.876546091015169, "grad_norm": 0.048828125, "learning_rate": 4.439875754768331e-05, "loss": 0.0022, "step": 14733 }, { "epoch": 6.877012835472579, "grad_norm": 0.052734375, "learning_rate": 4.438656192927093e-05, "loss": 0.0004, "step": 14734 }, { "epoch": 6.877479579929989, "grad_norm": 0.6015625, "learning_rate": 4.4374367508277614e-05, "loss": 0.0011, "step": 14735 }, { "epoch": 6.877946324387398, "grad_norm": 0.052978515625, "learning_rate": 4.4362174284965854e-05, "loss": 0.0027, "step": 14736 }, { "epoch": 6.878413068844807, "grad_norm": 0.03076171875, "learning_rate": 4.43499822595982e-05, "loss": 0.0002, "step": 14737 }, { "epoch": 6.878879813302217, "grad_norm": 0.1474609375, "learning_rate": 4.433779143243722e-05, "loss": 0.0008, "step": 14738 }, { "epoch": 6.879346557759627, "grad_norm": 0.024169921875, "learning_rate": 4.43256018037453e-05, "loss": 0.0003, "step": 14739 }, { "epoch": 6.879813302217036, "grad_norm": 0.033447265625, "learning_rate": 4.431341337378495e-05, "loss": 0.0004, "step": 14740 }, { "epoch": 6.880280046674446, "grad_norm": 0.0257568359375, "learning_rate": 4.430122614281864e-05, "loss": 0.0003, "step": 14741 }, { "epoch": 6.880746791131855, "grad_norm": 0.039794921875, "learning_rate": 4.4289040111108685e-05, "loss": 0.0003, "step": 14742 }, { "epoch": 6.881213535589265, "grad_norm": 0.0150146484375, "learning_rate": 4.427685527891751e-05, "loss": 0.0003, "step": 14743 }, { "epoch": 6.881680280046674, "grad_norm": 0.02880859375, "learning_rate": 4.42646716465075e-05, "loss": 0.0003, "step": 14744 }, { "epoch": 6.882147024504084, "grad_norm": 0.201171875, "learning_rate": 4.425248921414093e-05, "loss": 0.0008, "step": 14745 }, { "epoch": 6.882613768961494, "grad_norm": 0.053955078125, "learning_rate": 4.4240307982080096e-05, "loss": 0.0004, "step": 14746 }, { "epoch": 6.8830805134189035, "grad_norm": 0.02099609375, "learning_rate": 4.422812795058735e-05, "loss": 0.0003, "step": 14747 }, { "epoch": 6.883547257876312, "grad_norm": 0.0308837890625, "learning_rate": 4.421594911992485e-05, "loss": 0.0003, "step": 14748 }, { "epoch": 6.884014002333722, "grad_norm": 0.038330078125, "learning_rate": 4.420377149035489e-05, "loss": 0.0022, "step": 14749 }, { "epoch": 6.884480746791132, "grad_norm": 0.1572265625, "learning_rate": 4.41915950621396e-05, "loss": 0.0006, "step": 14750 }, { "epoch": 6.884947491248542, "grad_norm": 0.11279296875, "learning_rate": 4.417941983554119e-05, "loss": 0.0005, "step": 14751 }, { "epoch": 6.885414235705951, "grad_norm": 0.1474609375, "learning_rate": 4.4167245810821844e-05, "loss": 0.0007, "step": 14752 }, { "epoch": 6.8858809801633605, "grad_norm": 0.057373046875, "learning_rate": 4.415507298824361e-05, "loss": 0.0027, "step": 14753 }, { "epoch": 6.88634772462077, "grad_norm": 0.06640625, "learning_rate": 4.414290136806864e-05, "loss": 0.0004, "step": 14754 }, { "epoch": 6.88681446907818, "grad_norm": 0.0296630859375, "learning_rate": 4.4130730950558964e-05, "loss": 0.0003, "step": 14755 }, { "epoch": 6.887281213535589, "grad_norm": 0.0194091796875, "learning_rate": 4.411856173597666e-05, "loss": 0.0002, "step": 14756 }, { "epoch": 6.887747957992999, "grad_norm": 0.052978515625, "learning_rate": 4.4106393724583685e-05, "loss": 0.0023, "step": 14757 }, { "epoch": 6.888214702450409, "grad_norm": 0.03857421875, "learning_rate": 4.409422691664207e-05, "loss": 0.0003, "step": 14758 }, { "epoch": 6.8886814469078175, "grad_norm": 0.07177734375, "learning_rate": 4.4082061312413814e-05, "loss": 0.0003, "step": 14759 }, { "epoch": 6.889148191365227, "grad_norm": 0.09423828125, "learning_rate": 4.406989691216079e-05, "loss": 0.0005, "step": 14760 }, { "epoch": 6.889614935822637, "grad_norm": 0.018310546875, "learning_rate": 4.4057733716144965e-05, "loss": 0.0002, "step": 14761 }, { "epoch": 6.890081680280047, "grad_norm": 0.036376953125, "learning_rate": 4.404557172462817e-05, "loss": 0.0003, "step": 14762 }, { "epoch": 6.890548424737457, "grad_norm": 0.2333984375, "learning_rate": 4.403341093787231e-05, "loss": 0.004, "step": 14763 }, { "epoch": 6.891015169194866, "grad_norm": 0.1767578125, "learning_rate": 4.4021251356139235e-05, "loss": 0.0009, "step": 14764 }, { "epoch": 6.891481913652275, "grad_norm": 0.062255859375, "learning_rate": 4.400909297969069e-05, "loss": 0.0004, "step": 14765 }, { "epoch": 6.891948658109685, "grad_norm": 0.0252685546875, "learning_rate": 4.3996935808788496e-05, "loss": 0.0002, "step": 14766 }, { "epoch": 6.892415402567094, "grad_norm": 0.03369140625, "learning_rate": 4.3984779843694454e-05, "loss": 0.0002, "step": 14767 }, { "epoch": 6.892882147024504, "grad_norm": 0.0167236328125, "learning_rate": 4.3972625084670215e-05, "loss": 0.0002, "step": 14768 }, { "epoch": 6.893348891481914, "grad_norm": 0.017578125, "learning_rate": 4.396047153197752e-05, "loss": 0.0002, "step": 14769 }, { "epoch": 6.8938156359393234, "grad_norm": 0.06591796875, "learning_rate": 4.3948319185878086e-05, "loss": 0.0003, "step": 14770 }, { "epoch": 6.894282380396733, "grad_norm": 0.130859375, "learning_rate": 4.393616804663349e-05, "loss": 0.0004, "step": 14771 }, { "epoch": 6.894749124854142, "grad_norm": 0.044189453125, "learning_rate": 4.3924018114505394e-05, "loss": 0.0003, "step": 14772 }, { "epoch": 6.895215869311552, "grad_norm": 0.0185546875, "learning_rate": 4.391186938975546e-05, "loss": 0.0002, "step": 14773 }, { "epoch": 6.895682613768962, "grad_norm": 0.0189208984375, "learning_rate": 4.3899721872645164e-05, "loss": 0.0002, "step": 14774 }, { "epoch": 6.896149358226371, "grad_norm": 0.02685546875, "learning_rate": 4.388757556343609e-05, "loss": 0.0003, "step": 14775 }, { "epoch": 6.8966161026837804, "grad_norm": 0.064453125, "learning_rate": 4.387543046238982e-05, "loss": 0.0003, "step": 14776 }, { "epoch": 6.89708284714119, "grad_norm": 0.0255126953125, "learning_rate": 4.3863286569767756e-05, "loss": 0.0003, "step": 14777 }, { "epoch": 6.8975495915986, "grad_norm": 0.0654296875, "learning_rate": 4.3851143885831436e-05, "loss": 0.0004, "step": 14778 }, { "epoch": 6.89801633605601, "grad_norm": 0.15234375, "learning_rate": 4.383900241084231e-05, "loss": 0.0005, "step": 14779 }, { "epoch": 6.898483080513419, "grad_norm": 0.028564453125, "learning_rate": 4.382686214506173e-05, "loss": 0.0003, "step": 14780 }, { "epoch": 6.8989498249708285, "grad_norm": 0.03662109375, "learning_rate": 4.3814723088751186e-05, "loss": 0.0003, "step": 14781 }, { "epoch": 6.899416569428238, "grad_norm": 0.038818359375, "learning_rate": 4.380258524217194e-05, "loss": 0.0003, "step": 14782 }, { "epoch": 6.899883313885647, "grad_norm": 0.0302734375, "learning_rate": 4.37904486055854e-05, "loss": 0.0003, "step": 14783 }, { "epoch": 6.900350058343057, "grad_norm": 0.044677734375, "learning_rate": 4.37783131792529e-05, "loss": 0.0004, "step": 14784 }, { "epoch": 6.900816802800467, "grad_norm": 0.306640625, "learning_rate": 4.376617896343566e-05, "loss": 0.0008, "step": 14785 }, { "epoch": 6.901283547257877, "grad_norm": 0.0830078125, "learning_rate": 4.3754045958394976e-05, "loss": 0.0004, "step": 14786 }, { "epoch": 6.9017502917152855, "grad_norm": 0.0361328125, "learning_rate": 4.374191416439213e-05, "loss": 0.0003, "step": 14787 }, { "epoch": 6.902217036172695, "grad_norm": 0.04833984375, "learning_rate": 4.3729783581688245e-05, "loss": 0.0003, "step": 14788 }, { "epoch": 6.902683780630105, "grad_norm": 0.03271484375, "learning_rate": 4.371765421054456e-05, "loss": 0.0003, "step": 14789 }, { "epoch": 6.903150525087515, "grad_norm": 0.2099609375, "learning_rate": 4.3705526051222266e-05, "loss": 0.0007, "step": 14790 }, { "epoch": 6.903617269544924, "grad_norm": 0.035888671875, "learning_rate": 4.369339910398241e-05, "loss": 0.0002, "step": 14791 }, { "epoch": 6.904084014002334, "grad_norm": 0.11083984375, "learning_rate": 4.3681273369086154e-05, "loss": 0.0005, "step": 14792 }, { "epoch": 6.904550758459743, "grad_norm": 0.11083984375, "learning_rate": 4.3669148846794595e-05, "loss": 0.0006, "step": 14793 }, { "epoch": 6.905017502917153, "grad_norm": 0.265625, "learning_rate": 4.3657025537368734e-05, "loss": 0.0005, "step": 14794 }, { "epoch": 6.905484247374562, "grad_norm": 0.0625, "learning_rate": 4.364490344106962e-05, "loss": 0.0003, "step": 14795 }, { "epoch": 6.905950991831972, "grad_norm": 0.04638671875, "learning_rate": 4.36327825581583e-05, "loss": 0.0022, "step": 14796 }, { "epoch": 6.906417736289382, "grad_norm": 0.02783203125, "learning_rate": 4.362066288889568e-05, "loss": 0.0002, "step": 14797 }, { "epoch": 6.9068844807467915, "grad_norm": 0.0296630859375, "learning_rate": 4.360854443354278e-05, "loss": 0.0019, "step": 14798 }, { "epoch": 6.9073512252042, "grad_norm": 0.29296875, "learning_rate": 4.3596427192360446e-05, "loss": 0.0012, "step": 14799 }, { "epoch": 6.90781796966161, "grad_norm": 0.123046875, "learning_rate": 4.358431116560962e-05, "loss": 0.0024, "step": 14800 }, { "epoch": 6.90828471411902, "grad_norm": 0.050537109375, "learning_rate": 4.35721963535512e-05, "loss": 0.0003, "step": 14801 }, { "epoch": 6.908751458576429, "grad_norm": 0.049560546875, "learning_rate": 4.3560082756446e-05, "loss": 0.0003, "step": 14802 }, { "epoch": 6.909218203033839, "grad_norm": 0.11669921875, "learning_rate": 4.35479703745548e-05, "loss": 0.0005, "step": 14803 }, { "epoch": 6.9096849474912485, "grad_norm": 0.01556396484375, "learning_rate": 4.353585920813843e-05, "loss": 0.0002, "step": 14804 }, { "epoch": 6.910151691948658, "grad_norm": 0.02978515625, "learning_rate": 4.35237492574577e-05, "loss": 0.0002, "step": 14805 }, { "epoch": 6.910618436406068, "grad_norm": 0.115234375, "learning_rate": 4.351164052277328e-05, "loss": 0.0005, "step": 14806 }, { "epoch": 6.911085180863477, "grad_norm": 0.059814453125, "learning_rate": 4.3499533004345905e-05, "loss": 0.0003, "step": 14807 }, { "epoch": 6.911551925320887, "grad_norm": 0.1513671875, "learning_rate": 4.3487426702436316e-05, "loss": 0.0008, "step": 14808 }, { "epoch": 6.912018669778297, "grad_norm": 0.03125, "learning_rate": 4.34753216173051e-05, "loss": 0.0002, "step": 14809 }, { "epoch": 6.9124854142357055, "grad_norm": 0.1455078125, "learning_rate": 4.346321774921295e-05, "loss": 0.0005, "step": 14810 }, { "epoch": 6.912952158693115, "grad_norm": 0.02587890625, "learning_rate": 4.345111509842041e-05, "loss": 0.0002, "step": 14811 }, { "epoch": 6.913418903150525, "grad_norm": 0.208984375, "learning_rate": 4.3439013665188124e-05, "loss": 0.0008, "step": 14812 }, { "epoch": 6.913885647607935, "grad_norm": 0.08935546875, "learning_rate": 4.3426913449776655e-05, "loss": 0.0005, "step": 14813 }, { "epoch": 6.914352392065345, "grad_norm": 0.0223388671875, "learning_rate": 4.341481445244646e-05, "loss": 0.0002, "step": 14814 }, { "epoch": 6.914819136522754, "grad_norm": 0.015625, "learning_rate": 4.340271667345811e-05, "loss": 0.0002, "step": 14815 }, { "epoch": 6.915285880980163, "grad_norm": 0.010498046875, "learning_rate": 4.33906201130721e-05, "loss": 0.0002, "step": 14816 }, { "epoch": 6.915752625437573, "grad_norm": 0.0478515625, "learning_rate": 4.3378524771548804e-05, "loss": 0.0004, "step": 14817 }, { "epoch": 6.916219369894982, "grad_norm": 0.04833984375, "learning_rate": 4.336643064914869e-05, "loss": 0.0003, "step": 14818 }, { "epoch": 6.916686114352392, "grad_norm": 0.01708984375, "learning_rate": 4.3354337746132215e-05, "loss": 0.0002, "step": 14819 }, { "epoch": 6.917152858809802, "grad_norm": 0.021484375, "learning_rate": 4.334224606275964e-05, "loss": 0.0003, "step": 14820 }, { "epoch": 6.9176196032672115, "grad_norm": 0.119140625, "learning_rate": 4.333015559929138e-05, "loss": 0.0032, "step": 14821 }, { "epoch": 6.91808634772462, "grad_norm": 0.09033203125, "learning_rate": 4.331806635598778e-05, "loss": 0.0034, "step": 14822 }, { "epoch": 6.91855309218203, "grad_norm": 0.0247802734375, "learning_rate": 4.3305978333109064e-05, "loss": 0.0002, "step": 14823 }, { "epoch": 6.91901983663944, "grad_norm": 0.05078125, "learning_rate": 4.329389153091554e-05, "loss": 0.0022, "step": 14824 }, { "epoch": 6.91948658109685, "grad_norm": 0.1513671875, "learning_rate": 4.3281805949667486e-05, "loss": 0.0037, "step": 14825 }, { "epoch": 6.919953325554259, "grad_norm": 0.031005859375, "learning_rate": 4.326972158962504e-05, "loss": 0.0003, "step": 14826 }, { "epoch": 6.9204200700116685, "grad_norm": 0.1123046875, "learning_rate": 4.3257638451048434e-05, "loss": 0.0012, "step": 14827 }, { "epoch": 6.920886814469078, "grad_norm": 0.035400390625, "learning_rate": 4.324555653419786e-05, "loss": 0.0002, "step": 14828 }, { "epoch": 6.921353558926488, "grad_norm": 0.2421875, "learning_rate": 4.3233475839333394e-05, "loss": 0.0012, "step": 14829 }, { "epoch": 6.921820303383897, "grad_norm": 0.0264892578125, "learning_rate": 4.322139636671522e-05, "loss": 0.0003, "step": 14830 }, { "epoch": 6.922287047841307, "grad_norm": 0.045654296875, "learning_rate": 4.3209318116603324e-05, "loss": 0.0003, "step": 14831 }, { "epoch": 6.922753792298717, "grad_norm": 0.37890625, "learning_rate": 4.319724108925783e-05, "loss": 0.0011, "step": 14832 }, { "epoch": 6.923220536756126, "grad_norm": 0.0289306640625, "learning_rate": 4.318516528493879e-05, "loss": 0.0003, "step": 14833 }, { "epoch": 6.923687281213535, "grad_norm": 0.265625, "learning_rate": 4.317309070390615e-05, "loss": 0.0009, "step": 14834 }, { "epoch": 6.924154025670945, "grad_norm": 0.058837890625, "learning_rate": 4.3161017346419916e-05, "loss": 0.0017, "step": 14835 }, { "epoch": 6.924620770128355, "grad_norm": 0.01953125, "learning_rate": 4.3148945212740076e-05, "loss": 0.0003, "step": 14836 }, { "epoch": 6.925087514585765, "grad_norm": 0.05419921875, "learning_rate": 4.313687430312649e-05, "loss": 0.0003, "step": 14837 }, { "epoch": 6.925554259043174, "grad_norm": 0.1416015625, "learning_rate": 4.312480461783909e-05, "loss": 0.0029, "step": 14838 }, { "epoch": 6.926021003500583, "grad_norm": 0.035400390625, "learning_rate": 4.311273615713778e-05, "loss": 0.0003, "step": 14839 }, { "epoch": 6.926487747957993, "grad_norm": 0.2255859375, "learning_rate": 4.310066892128235e-05, "loss": 0.0013, "step": 14840 }, { "epoch": 6.926954492415403, "grad_norm": 0.02294921875, "learning_rate": 4.308860291053264e-05, "loss": 0.0003, "step": 14841 }, { "epoch": 6.927421236872812, "grad_norm": 0.07861328125, "learning_rate": 4.3076538125148505e-05, "loss": 0.0004, "step": 14842 }, { "epoch": 6.927887981330222, "grad_norm": 0.026611328125, "learning_rate": 4.306447456538961e-05, "loss": 0.0002, "step": 14843 }, { "epoch": 6.9283547257876315, "grad_norm": 0.0478515625, "learning_rate": 4.305241223151576e-05, "loss": 0.0004, "step": 14844 }, { "epoch": 6.92882147024504, "grad_norm": 0.03466796875, "learning_rate": 4.3040351123786684e-05, "loss": 0.0003, "step": 14845 }, { "epoch": 6.92928821470245, "grad_norm": 0.02978515625, "learning_rate": 4.3028291242462014e-05, "loss": 0.0003, "step": 14846 }, { "epoch": 6.92975495915986, "grad_norm": 0.10693359375, "learning_rate": 4.301623258780145e-05, "loss": 0.0003, "step": 14847 }, { "epoch": 6.93022170361727, "grad_norm": 0.02685546875, "learning_rate": 4.3004175160064665e-05, "loss": 0.0003, "step": 14848 }, { "epoch": 6.9306884480746795, "grad_norm": 0.01251220703125, "learning_rate": 4.299211895951122e-05, "loss": 0.0001, "step": 14849 }, { "epoch": 6.9311551925320884, "grad_norm": 0.011474609375, "learning_rate": 4.298006398640066e-05, "loss": 0.0002, "step": 14850 }, { "epoch": 6.931621936989498, "grad_norm": 0.015625, "learning_rate": 4.296801024099263e-05, "loss": 0.0002, "step": 14851 }, { "epoch": 6.932088681446908, "grad_norm": 0.07568359375, "learning_rate": 4.295595772354658e-05, "loss": 0.0003, "step": 14852 }, { "epoch": 6.932555425904317, "grad_norm": 0.111328125, "learning_rate": 4.2943906434322056e-05, "loss": 0.004, "step": 14853 }, { "epoch": 6.933022170361727, "grad_norm": 0.0341796875, "learning_rate": 4.293185637357856e-05, "loss": 0.0002, "step": 14854 }, { "epoch": 6.9334889148191365, "grad_norm": 0.046875, "learning_rate": 4.2919807541575486e-05, "loss": 0.0003, "step": 14855 }, { "epoch": 6.933955659276546, "grad_norm": 0.01239013671875, "learning_rate": 4.290775993857229e-05, "loss": 0.0002, "step": 14856 }, { "epoch": 6.934422403733956, "grad_norm": 0.01104736328125, "learning_rate": 4.28957135648284e-05, "loss": 0.0002, "step": 14857 }, { "epoch": 6.934889148191365, "grad_norm": 0.04345703125, "learning_rate": 4.288366842060312e-05, "loss": 0.0003, "step": 14858 }, { "epoch": 6.935355892648775, "grad_norm": 0.111328125, "learning_rate": 4.287162450615582e-05, "loss": 0.0003, "step": 14859 }, { "epoch": 6.935822637106185, "grad_norm": 0.0220947265625, "learning_rate": 4.2859581821745876e-05, "loss": 0.0003, "step": 14860 }, { "epoch": 6.9362893815635935, "grad_norm": 0.0164794921875, "learning_rate": 4.284754036763249e-05, "loss": 0.0002, "step": 14861 }, { "epoch": 6.936756126021003, "grad_norm": 0.0250244140625, "learning_rate": 4.283550014407501e-05, "loss": 0.0002, "step": 14862 }, { "epoch": 6.937222870478413, "grad_norm": 0.031494140625, "learning_rate": 4.282346115133261e-05, "loss": 0.0002, "step": 14863 }, { "epoch": 6.937689614935823, "grad_norm": 0.0142822265625, "learning_rate": 4.2811423389664515e-05, "loss": 0.0002, "step": 14864 }, { "epoch": 6.938156359393232, "grad_norm": 0.01312255859375, "learning_rate": 4.279938685932998e-05, "loss": 0.0002, "step": 14865 }, { "epoch": 6.938623103850642, "grad_norm": 0.006866455078125, "learning_rate": 4.2787351560588064e-05, "loss": 0.0002, "step": 14866 }, { "epoch": 6.939089848308051, "grad_norm": 0.0693359375, "learning_rate": 4.2775317493697954e-05, "loss": 0.0004, "step": 14867 }, { "epoch": 6.939556592765461, "grad_norm": 0.109375, "learning_rate": 4.276328465891878e-05, "loss": 0.0004, "step": 14868 }, { "epoch": 6.94002333722287, "grad_norm": 0.0230712890625, "learning_rate": 4.2751253056509556e-05, "loss": 0.0003, "step": 14869 }, { "epoch": 6.94049008168028, "grad_norm": 0.1259765625, "learning_rate": 4.273922268672937e-05, "loss": 0.0004, "step": 14870 }, { "epoch": 6.94095682613769, "grad_norm": 0.1396484375, "learning_rate": 4.2727193549837296e-05, "loss": 0.0004, "step": 14871 }, { "epoch": 6.9414235705950995, "grad_norm": 0.296875, "learning_rate": 4.271516564609225e-05, "loss": 0.0028, "step": 14872 }, { "epoch": 6.941890315052508, "grad_norm": 0.07080078125, "learning_rate": 4.270313897575324e-05, "loss": 0.0004, "step": 14873 }, { "epoch": 6.942357059509918, "grad_norm": 0.03662109375, "learning_rate": 4.269111353907926e-05, "loss": 0.0003, "step": 14874 }, { "epoch": 6.942823803967328, "grad_norm": 0.0262451171875, "learning_rate": 4.267908933632917e-05, "loss": 0.0003, "step": 14875 }, { "epoch": 6.943290548424738, "grad_norm": 0.17578125, "learning_rate": 4.266706636776187e-05, "loss": 0.0005, "step": 14876 }, { "epoch": 6.943757292882147, "grad_norm": 0.10302734375, "learning_rate": 4.2655044633636275e-05, "loss": 0.0003, "step": 14877 }, { "epoch": 6.9442240373395565, "grad_norm": 0.0380859375, "learning_rate": 4.2643024134211175e-05, "loss": 0.002, "step": 14878 }, { "epoch": 6.944690781796966, "grad_norm": 0.173828125, "learning_rate": 4.263100486974543e-05, "loss": 0.001, "step": 14879 }, { "epoch": 6.945157526254376, "grad_norm": 0.0301513671875, "learning_rate": 4.261898684049776e-05, "loss": 0.0003, "step": 14880 }, { "epoch": 6.945624270711785, "grad_norm": 0.06005859375, "learning_rate": 4.260697004672697e-05, "loss": 0.0032, "step": 14881 }, { "epoch": 6.946091015169195, "grad_norm": 0.045654296875, "learning_rate": 4.259495448869184e-05, "loss": 0.0003, "step": 14882 }, { "epoch": 6.946557759626605, "grad_norm": 0.023193359375, "learning_rate": 4.2582940166650986e-05, "loss": 0.0003, "step": 14883 }, { "epoch": 6.947024504084014, "grad_norm": 0.294921875, "learning_rate": 4.257092708086314e-05, "loss": 0.0019, "step": 14884 }, { "epoch": 6.947491248541423, "grad_norm": 0.150390625, "learning_rate": 4.255891523158699e-05, "loss": 0.0006, "step": 14885 }, { "epoch": 6.947957992998833, "grad_norm": 0.035888671875, "learning_rate": 4.254690461908108e-05, "loss": 0.0002, "step": 14886 }, { "epoch": 6.948424737456243, "grad_norm": 0.031982421875, "learning_rate": 4.253489524360408e-05, "loss": 0.0025, "step": 14887 }, { "epoch": 6.948891481913652, "grad_norm": 0.0947265625, "learning_rate": 4.252288710541457e-05, "loss": 0.0004, "step": 14888 }, { "epoch": 6.949358226371062, "grad_norm": 0.0546875, "learning_rate": 4.251088020477103e-05, "loss": 0.0003, "step": 14889 }, { "epoch": 6.949824970828471, "grad_norm": 0.267578125, "learning_rate": 4.2498874541932035e-05, "loss": 0.001, "step": 14890 }, { "epoch": 6.950291715285881, "grad_norm": 0.03271484375, "learning_rate": 4.2486870117156094e-05, "loss": 0.0003, "step": 14891 }, { "epoch": 6.950758459743291, "grad_norm": 0.1435546875, "learning_rate": 4.247486693070162e-05, "loss": 0.0006, "step": 14892 }, { "epoch": 6.9512252042007, "grad_norm": 0.01318359375, "learning_rate": 4.246286498282709e-05, "loss": 0.0002, "step": 14893 }, { "epoch": 6.95169194865811, "grad_norm": 0.1123046875, "learning_rate": 4.2450864273790944e-05, "loss": 0.0007, "step": 14894 }, { "epoch": 6.9521586931155195, "grad_norm": 0.060546875, "learning_rate": 4.2438864803851505e-05, "loss": 0.0004, "step": 14895 }, { "epoch": 6.952625437572928, "grad_norm": 0.015869140625, "learning_rate": 4.242686657326719e-05, "loss": 0.0003, "step": 14896 }, { "epoch": 6.953092182030338, "grad_norm": 0.0128173828125, "learning_rate": 4.2414869582296365e-05, "loss": 0.0002, "step": 14897 }, { "epoch": 6.953558926487748, "grad_norm": 0.05126953125, "learning_rate": 4.240287383119723e-05, "loss": 0.0031, "step": 14898 }, { "epoch": 6.954025670945158, "grad_norm": 0.0172119140625, "learning_rate": 4.239087932022813e-05, "loss": 0.0002, "step": 14899 }, { "epoch": 6.954492415402568, "grad_norm": 0.283203125, "learning_rate": 4.237888604964735e-05, "loss": 0.0036, "step": 14900 }, { "epoch": 6.9549591598599765, "grad_norm": 0.1611328125, "learning_rate": 4.2366894019713045e-05, "loss": 0.0004, "step": 14901 }, { "epoch": 6.955425904317386, "grad_norm": 0.06689453125, "learning_rate": 4.2354903230683465e-05, "loss": 0.0006, "step": 14902 }, { "epoch": 6.955892648774796, "grad_norm": 0.05712890625, "learning_rate": 4.23429136828168e-05, "loss": 0.0004, "step": 14903 }, { "epoch": 6.956359393232205, "grad_norm": 0.138671875, "learning_rate": 4.233092537637114e-05, "loss": 0.0041, "step": 14904 }, { "epoch": 6.956826137689615, "grad_norm": 0.02783203125, "learning_rate": 4.231893831160465e-05, "loss": 0.0002, "step": 14905 }, { "epoch": 6.957292882147025, "grad_norm": 0.0859375, "learning_rate": 4.230695248877545e-05, "loss": 0.0019, "step": 14906 }, { "epoch": 6.957759626604434, "grad_norm": 0.04736328125, "learning_rate": 4.229496790814153e-05, "loss": 0.0005, "step": 14907 }, { "epoch": 6.958226371061843, "grad_norm": 0.01025390625, "learning_rate": 4.2282984569960994e-05, "loss": 0.0002, "step": 14908 }, { "epoch": 6.958693115519253, "grad_norm": 0.0245361328125, "learning_rate": 4.227100247449186e-05, "loss": 0.0002, "step": 14909 }, { "epoch": 6.959159859976663, "grad_norm": 0.0244140625, "learning_rate": 4.225902162199207e-05, "loss": 0.0003, "step": 14910 }, { "epoch": 6.959626604434073, "grad_norm": 0.0751953125, "learning_rate": 4.224704201271963e-05, "loss": 0.0023, "step": 14911 }, { "epoch": 6.960093348891482, "grad_norm": 0.220703125, "learning_rate": 4.223506364693243e-05, "loss": 0.0027, "step": 14912 }, { "epoch": 6.960560093348891, "grad_norm": 0.058837890625, "learning_rate": 4.222308652488839e-05, "loss": 0.0003, "step": 14913 }, { "epoch": 6.961026837806301, "grad_norm": 0.0133056640625, "learning_rate": 4.2211110646845445e-05, "loss": 0.0002, "step": 14914 }, { "epoch": 6.961493582263711, "grad_norm": 0.0537109375, "learning_rate": 4.219913601306137e-05, "loss": 0.004, "step": 14915 }, { "epoch": 6.96196032672112, "grad_norm": 0.017822265625, "learning_rate": 4.218716262379403e-05, "loss": 0.0003, "step": 14916 }, { "epoch": 6.96242707117853, "grad_norm": 0.09375, "learning_rate": 4.217519047930125e-05, "loss": 0.0005, "step": 14917 }, { "epoch": 6.9628938156359395, "grad_norm": 0.042236328125, "learning_rate": 4.216321957984075e-05, "loss": 0.0002, "step": 14918 }, { "epoch": 6.963360560093349, "grad_norm": 0.0361328125, "learning_rate": 4.21512499256703e-05, "loss": 0.0002, "step": 14919 }, { "epoch": 6.963827304550758, "grad_norm": 0.07275390625, "learning_rate": 4.213928151704766e-05, "loss": 0.0004, "step": 14920 }, { "epoch": 6.964294049008168, "grad_norm": 0.013671875, "learning_rate": 4.212731435423044e-05, "loss": 0.0002, "step": 14921 }, { "epoch": 6.964760793465578, "grad_norm": 0.0546875, "learning_rate": 4.211534843747637e-05, "loss": 0.0003, "step": 14922 }, { "epoch": 6.9652275379229875, "grad_norm": 0.0177001953125, "learning_rate": 4.21033837670431e-05, "loss": 0.0002, "step": 14923 }, { "epoch": 6.9656942823803965, "grad_norm": 0.013427734375, "learning_rate": 4.209142034318818e-05, "loss": 0.0003, "step": 14924 }, { "epoch": 6.966161026837806, "grad_norm": 0.01904296875, "learning_rate": 4.207945816616923e-05, "loss": 0.0002, "step": 14925 }, { "epoch": 6.966627771295216, "grad_norm": 0.0184326171875, "learning_rate": 4.206749723624385e-05, "loss": 0.0002, "step": 14926 }, { "epoch": 6.967094515752626, "grad_norm": 0.04150390625, "learning_rate": 4.205553755366949e-05, "loss": 0.0018, "step": 14927 }, { "epoch": 6.967561260210035, "grad_norm": 0.019775390625, "learning_rate": 4.204357911870373e-05, "loss": 0.0002, "step": 14928 }, { "epoch": 6.9680280046674445, "grad_norm": 0.03076171875, "learning_rate": 4.203162193160398e-05, "loss": 0.0002, "step": 14929 }, { "epoch": 6.968494749124854, "grad_norm": 0.01226806640625, "learning_rate": 4.201966599262773e-05, "loss": 0.0001, "step": 14930 }, { "epoch": 6.968961493582263, "grad_norm": 0.037841796875, "learning_rate": 4.200771130203244e-05, "loss": 0.0003, "step": 14931 }, { "epoch": 6.969428238039673, "grad_norm": 0.05859375, "learning_rate": 4.1995757860075436e-05, "loss": 0.0035, "step": 14932 }, { "epoch": 6.969894982497083, "grad_norm": 0.2060546875, "learning_rate": 4.198380566701411e-05, "loss": 0.0008, "step": 14933 }, { "epoch": 6.970361726954493, "grad_norm": 0.056640625, "learning_rate": 4.1971854723105865e-05, "loss": 0.0005, "step": 14934 }, { "epoch": 6.970828471411902, "grad_norm": 0.0167236328125, "learning_rate": 4.1959905028607926e-05, "loss": 0.0002, "step": 14935 }, { "epoch": 6.971295215869311, "grad_norm": 0.46484375, "learning_rate": 4.194795658377764e-05, "loss": 0.002, "step": 14936 }, { "epoch": 6.971761960326721, "grad_norm": 0.07666015625, "learning_rate": 4.193600938887229e-05, "loss": 0.0043, "step": 14937 }, { "epoch": 6.972228704784131, "grad_norm": 0.01092529296875, "learning_rate": 4.1924063444149044e-05, "loss": 0.0002, "step": 14938 }, { "epoch": 6.97269544924154, "grad_norm": 0.04296875, "learning_rate": 4.191211874986515e-05, "loss": 0.0004, "step": 14939 }, { "epoch": 6.97316219369895, "grad_norm": 0.095703125, "learning_rate": 4.190017530627781e-05, "loss": 0.0004, "step": 14940 }, { "epoch": 6.973628938156359, "grad_norm": 0.38671875, "learning_rate": 4.188823311364414e-05, "loss": 0.0044, "step": 14941 }, { "epoch": 6.974095682613769, "grad_norm": 0.146484375, "learning_rate": 4.187629217222129e-05, "loss": 0.0004, "step": 14942 }, { "epoch": 6.974562427071179, "grad_norm": 0.05078125, "learning_rate": 4.186435248226638e-05, "loss": 0.0029, "step": 14943 }, { "epoch": 6.975029171528588, "grad_norm": 0.08203125, "learning_rate": 4.1852414044036434e-05, "loss": 0.0024, "step": 14944 }, { "epoch": 6.975495915985998, "grad_norm": 0.00946044921875, "learning_rate": 4.1840476857788566e-05, "loss": 0.0002, "step": 14945 }, { "epoch": 6.9759626604434075, "grad_norm": 0.0341796875, "learning_rate": 4.182854092377977e-05, "loss": 0.0021, "step": 14946 }, { "epoch": 6.976429404900816, "grad_norm": 0.0595703125, "learning_rate": 4.181660624226697e-05, "loss": 0.0028, "step": 14947 }, { "epoch": 6.976896149358226, "grad_norm": 0.0213623046875, "learning_rate": 4.180467281350722e-05, "loss": 0.0002, "step": 14948 }, { "epoch": 6.977362893815636, "grad_norm": 0.0257568359375, "learning_rate": 4.179274063775745e-05, "loss": 0.0002, "step": 14949 }, { "epoch": 6.977829638273046, "grad_norm": 0.0194091796875, "learning_rate": 4.1780809715274536e-05, "loss": 0.0002, "step": 14950 }, { "epoch": 6.978296382730455, "grad_norm": 0.025390625, "learning_rate": 4.176888004631537e-05, "loss": 0.0002, "step": 14951 }, { "epoch": 6.9787631271878645, "grad_norm": 0.0269775390625, "learning_rate": 4.1756951631136866e-05, "loss": 0.0017, "step": 14952 }, { "epoch": 6.979229871645274, "grad_norm": 0.0235595703125, "learning_rate": 4.174502446999579e-05, "loss": 0.0002, "step": 14953 }, { "epoch": 6.979696616102684, "grad_norm": 0.091796875, "learning_rate": 4.173309856314895e-05, "loss": 0.0004, "step": 14954 }, { "epoch": 6.980163360560093, "grad_norm": 0.0296630859375, "learning_rate": 4.1721173910853196e-05, "loss": 0.0002, "step": 14955 }, { "epoch": 6.980630105017503, "grad_norm": 0.0186767578125, "learning_rate": 4.1709250513365196e-05, "loss": 0.0002, "step": 14956 }, { "epoch": 6.981096849474913, "grad_norm": 0.041748046875, "learning_rate": 4.16973283709417e-05, "loss": 0.0019, "step": 14957 }, { "epoch": 6.981563593932322, "grad_norm": 0.0247802734375, "learning_rate": 4.1685407483839455e-05, "loss": 0.0002, "step": 14958 }, { "epoch": 6.982030338389731, "grad_norm": 0.06396484375, "learning_rate": 4.167348785231504e-05, "loss": 0.0003, "step": 14959 }, { "epoch": 6.982497082847141, "grad_norm": 0.046142578125, "learning_rate": 4.16615694766252e-05, "loss": 0.0002, "step": 14960 }, { "epoch": 6.982963827304551, "grad_norm": 0.036376953125, "learning_rate": 4.1649652357026445e-05, "loss": 0.0002, "step": 14961 }, { "epoch": 6.983430571761961, "grad_norm": 0.043701171875, "learning_rate": 4.1637736493775425e-05, "loss": 0.0003, "step": 14962 }, { "epoch": 6.98389731621937, "grad_norm": 0.1591796875, "learning_rate": 4.1625821887128724e-05, "loss": 0.0005, "step": 14963 }, { "epoch": 6.984364060676779, "grad_norm": 0.059814453125, "learning_rate": 4.161390853734281e-05, "loss": 0.0003, "step": 14964 }, { "epoch": 6.984830805134189, "grad_norm": 0.023681640625, "learning_rate": 4.160199644467422e-05, "loss": 0.0002, "step": 14965 }, { "epoch": 6.985297549591599, "grad_norm": 0.037841796875, "learning_rate": 4.159008560937948e-05, "loss": 0.0004, "step": 14966 }, { "epoch": 6.985764294049008, "grad_norm": 0.0228271484375, "learning_rate": 4.1578176031714976e-05, "loss": 0.0002, "step": 14967 }, { "epoch": 6.986231038506418, "grad_norm": 0.03173828125, "learning_rate": 4.1566267711937154e-05, "loss": 0.0002, "step": 14968 }, { "epoch": 6.9866977829638275, "grad_norm": 0.0830078125, "learning_rate": 4.155436065030246e-05, "loss": 0.0003, "step": 14969 }, { "epoch": 6.987164527421237, "grad_norm": 0.053466796875, "learning_rate": 4.154245484706719e-05, "loss": 0.0003, "step": 14970 }, { "epoch": 6.987631271878646, "grad_norm": 0.1884765625, "learning_rate": 4.1530550302487725e-05, "loss": 0.0005, "step": 14971 }, { "epoch": 6.988098016336056, "grad_norm": 0.08984375, "learning_rate": 4.151864701682042e-05, "loss": 0.0004, "step": 14972 }, { "epoch": 6.988564760793466, "grad_norm": 0.1220703125, "learning_rate": 4.15067449903215e-05, "loss": 0.0004, "step": 14973 }, { "epoch": 6.989031505250875, "grad_norm": 0.01953125, "learning_rate": 4.1494844223247265e-05, "loss": 0.0002, "step": 14974 }, { "epoch": 6.9894982497082845, "grad_norm": 0.0272216796875, "learning_rate": 4.148294471585397e-05, "loss": 0.0003, "step": 14975 }, { "epoch": 6.989964994165694, "grad_norm": 0.08203125, "learning_rate": 4.147104646839777e-05, "loss": 0.0032, "step": 14976 }, { "epoch": 6.990431738623104, "grad_norm": 0.0771484375, "learning_rate": 4.145914948113492e-05, "loss": 0.0004, "step": 14977 }, { "epoch": 6.990898483080514, "grad_norm": 0.053955078125, "learning_rate": 4.144725375432149e-05, "loss": 0.002, "step": 14978 }, { "epoch": 6.991365227537923, "grad_norm": 0.00994873046875, "learning_rate": 4.143535928821366e-05, "loss": 0.0002, "step": 14979 }, { "epoch": 6.991831971995333, "grad_norm": 0.0081787109375, "learning_rate": 4.142346608306755e-05, "loss": 0.0002, "step": 14980 }, { "epoch": 6.992298716452742, "grad_norm": 0.05615234375, "learning_rate": 4.141157413913917e-05, "loss": 0.0003, "step": 14981 }, { "epoch": 6.992765460910151, "grad_norm": 0.0186767578125, "learning_rate": 4.139968345668461e-05, "loss": 0.0002, "step": 14982 }, { "epoch": 6.993232205367561, "grad_norm": 0.1025390625, "learning_rate": 4.1387794035959924e-05, "loss": 0.0021, "step": 14983 }, { "epoch": 6.993698949824971, "grad_norm": 0.060546875, "learning_rate": 4.1375905877221024e-05, "loss": 0.0003, "step": 14984 }, { "epoch": 6.994165694282381, "grad_norm": 0.05322265625, "learning_rate": 4.1364018980723906e-05, "loss": 0.0003, "step": 14985 }, { "epoch": 6.9946324387397905, "grad_norm": 0.023681640625, "learning_rate": 4.1352133346724565e-05, "loss": 0.0002, "step": 14986 }, { "epoch": 6.995099183197199, "grad_norm": 0.0306396484375, "learning_rate": 4.1340248975478825e-05, "loss": 0.0003, "step": 14987 }, { "epoch": 6.995565927654609, "grad_norm": 0.0191650390625, "learning_rate": 4.1328365867242604e-05, "loss": 0.0002, "step": 14988 }, { "epoch": 6.996032672112019, "grad_norm": 0.0198974609375, "learning_rate": 4.131648402227181e-05, "loss": 0.0002, "step": 14989 }, { "epoch": 6.996499416569428, "grad_norm": 0.033203125, "learning_rate": 4.1304603440822176e-05, "loss": 0.0003, "step": 14990 }, { "epoch": 6.996966161026838, "grad_norm": 0.0400390625, "learning_rate": 4.129272412314956e-05, "loss": 0.003, "step": 14991 }, { "epoch": 6.9974329054842475, "grad_norm": 0.01458740234375, "learning_rate": 4.128084606950976e-05, "loss": 0.0002, "step": 14992 }, { "epoch": 6.997899649941657, "grad_norm": 0.01953125, "learning_rate": 4.12689692801585e-05, "loss": 0.0003, "step": 14993 }, { "epoch": 6.998366394399066, "grad_norm": 0.047119140625, "learning_rate": 4.125709375535146e-05, "loss": 0.0015, "step": 14994 }, { "epoch": 6.998833138856476, "grad_norm": 0.0311279296875, "learning_rate": 4.12452194953444e-05, "loss": 0.0003, "step": 14995 }, { "epoch": 6.999299883313886, "grad_norm": 0.045166015625, "learning_rate": 4.123334650039292e-05, "loss": 0.0003, "step": 14996 }, { "epoch": 6.9997666277712955, "grad_norm": 0.359375, "learning_rate": 4.12214747707527e-05, "loss": 0.001, "step": 14997 }, { "epoch": 7.0002333722287045, "grad_norm": 0.0849609375, "learning_rate": 4.1209604306679375e-05, "loss": 0.0046, "step": 14998 }, { "epoch": 7.000700116686114, "grad_norm": 0.042724609375, "learning_rate": 4.119773510842846e-05, "loss": 0.0017, "step": 14999 }, { "epoch": 7.001166861143524, "grad_norm": 0.03271484375, "learning_rate": 4.118586717625555e-05, "loss": 0.0003, "step": 15000 }, { "epoch": 7.001633605600934, "grad_norm": 0.130859375, "learning_rate": 4.117400051041621e-05, "loss": 0.0005, "step": 15001 }, { "epoch": 7.002100350058343, "grad_norm": 0.01348876953125, "learning_rate": 4.116213511116587e-05, "loss": 0.0002, "step": 15002 }, { "epoch": 7.0025670945157525, "grad_norm": 0.007537841796875, "learning_rate": 4.1150270978760044e-05, "loss": 0.0002, "step": 15003 }, { "epoch": 7.003033838973162, "grad_norm": 0.023681640625, "learning_rate": 4.1138408113454216e-05, "loss": 0.0003, "step": 15004 }, { "epoch": 7.003500583430572, "grad_norm": 0.0126953125, "learning_rate": 4.1126546515503736e-05, "loss": 0.0002, "step": 15005 }, { "epoch": 7.003967327887981, "grad_norm": 0.0164794921875, "learning_rate": 4.111468618516403e-05, "loss": 0.0002, "step": 15006 }, { "epoch": 7.004434072345391, "grad_norm": 0.051513671875, "learning_rate": 4.11028271226905e-05, "loss": 0.0003, "step": 15007 }, { "epoch": 7.004900816802801, "grad_norm": 0.00927734375, "learning_rate": 4.1090969328338416e-05, "loss": 0.0002, "step": 15008 }, { "epoch": 7.004900816802801, "eval_loss": 2.36678409576416, "eval_runtime": 84.4731, "eval_samples_per_second": 21.356, "eval_steps_per_second": 2.675, "step": 15008 }, { "epoch": 7.00536756126021, "grad_norm": 0.022705078125, "learning_rate": 4.107911280236315e-05, "loss": 0.0002, "step": 15009 }, { "epoch": 7.005834305717619, "grad_norm": 0.08837890625, "learning_rate": 4.106725754501993e-05, "loss": 0.0003, "step": 15010 }, { "epoch": 7.006301050175029, "grad_norm": 0.12158203125, "learning_rate": 4.105540355656404e-05, "loss": 0.0004, "step": 15011 }, { "epoch": 7.006767794632439, "grad_norm": 0.01025390625, "learning_rate": 4.104355083725076e-05, "loss": 0.0002, "step": 15012 }, { "epoch": 7.007234539089849, "grad_norm": 0.018310546875, "learning_rate": 4.103169938733519e-05, "loss": 0.0003, "step": 15013 }, { "epoch": 7.007701283547258, "grad_norm": 0.00689697265625, "learning_rate": 4.1019849207072556e-05, "loss": 0.0002, "step": 15014 }, { "epoch": 7.008168028004667, "grad_norm": 0.0152587890625, "learning_rate": 4.1008000296718065e-05, "loss": 0.0003, "step": 15015 }, { "epoch": 7.008634772462077, "grad_norm": 0.016845703125, "learning_rate": 4.0996152656526734e-05, "loss": 0.0003, "step": 15016 }, { "epoch": 7.009101516919487, "grad_norm": 0.026611328125, "learning_rate": 4.098430628675369e-05, "loss": 0.0003, "step": 15017 }, { "epoch": 7.009568261376896, "grad_norm": 0.1826171875, "learning_rate": 4.0972461187654066e-05, "loss": 0.001, "step": 15018 }, { "epoch": 7.010035005834306, "grad_norm": 0.036376953125, "learning_rate": 4.09606173594828e-05, "loss": 0.0003, "step": 15019 }, { "epoch": 7.0105017502917155, "grad_norm": 0.036376953125, "learning_rate": 4.094877480249495e-05, "loss": 0.0003, "step": 15020 }, { "epoch": 7.010968494749124, "grad_norm": 0.0146484375, "learning_rate": 4.093693351694553e-05, "loss": 0.0003, "step": 15021 }, { "epoch": 7.011435239206534, "grad_norm": 0.020751953125, "learning_rate": 4.092509350308943e-05, "loss": 0.0002, "step": 15022 }, { "epoch": 7.011901983663944, "grad_norm": 0.030517578125, "learning_rate": 4.0913254761181605e-05, "loss": 0.0003, "step": 15023 }, { "epoch": 7.012368728121354, "grad_norm": 0.019287109375, "learning_rate": 4.090141729147701e-05, "loss": 0.0002, "step": 15024 }, { "epoch": 7.012835472578763, "grad_norm": 0.0106201171875, "learning_rate": 4.088958109423042e-05, "loss": 0.0002, "step": 15025 }, { "epoch": 7.0133022170361725, "grad_norm": 0.00970458984375, "learning_rate": 4.087774616969675e-05, "loss": 0.0002, "step": 15026 }, { "epoch": 7.013768961493582, "grad_norm": 0.01519775390625, "learning_rate": 4.086591251813083e-05, "loss": 0.0002, "step": 15027 }, { "epoch": 7.014235705950992, "grad_norm": 0.0264892578125, "learning_rate": 4.085408013978739e-05, "loss": 0.0016, "step": 15028 }, { "epoch": 7.014702450408401, "grad_norm": 0.023193359375, "learning_rate": 4.0842249034921265e-05, "loss": 0.0002, "step": 15029 }, { "epoch": 7.015169194865811, "grad_norm": 0.0341796875, "learning_rate": 4.083041920378712e-05, "loss": 0.0002, "step": 15030 }, { "epoch": 7.015635939323221, "grad_norm": 0.01397705078125, "learning_rate": 4.0818590646639685e-05, "loss": 0.0002, "step": 15031 }, { "epoch": 7.01610268378063, "grad_norm": 0.043701171875, "learning_rate": 4.080676336373371e-05, "loss": 0.0002, "step": 15032 }, { "epoch": 7.016569428238039, "grad_norm": 0.0184326171875, "learning_rate": 4.079493735532375e-05, "loss": 0.0003, "step": 15033 }, { "epoch": 7.017036172695449, "grad_norm": 0.0184326171875, "learning_rate": 4.078311262166449e-05, "loss": 0.0002, "step": 15034 }, { "epoch": 7.017502917152859, "grad_norm": 0.0286865234375, "learning_rate": 4.077128916301054e-05, "loss": 0.0002, "step": 15035 }, { "epoch": 7.017969661610269, "grad_norm": 0.041015625, "learning_rate": 4.0759466979616414e-05, "loss": 0.0003, "step": 15036 }, { "epoch": 7.018436406067678, "grad_norm": 0.029052734375, "learning_rate": 4.07476460717367e-05, "loss": 0.0002, "step": 15037 }, { "epoch": 7.018903150525087, "grad_norm": 0.0517578125, "learning_rate": 4.073582643962594e-05, "loss": 0.0003, "step": 15038 }, { "epoch": 7.019369894982497, "grad_norm": 0.04931640625, "learning_rate": 4.072400808353856e-05, "loss": 0.0028, "step": 15039 }, { "epoch": 7.019836639439907, "grad_norm": 0.126953125, "learning_rate": 4.071219100372905e-05, "loss": 0.0004, "step": 15040 }, { "epoch": 7.020303383897316, "grad_norm": 0.0177001953125, "learning_rate": 4.070037520045191e-05, "loss": 0.0002, "step": 15041 }, { "epoch": 7.020770128354726, "grad_norm": 0.0257568359375, "learning_rate": 4.068856067396143e-05, "loss": 0.0002, "step": 15042 }, { "epoch": 7.0212368728121355, "grad_norm": 0.04541015625, "learning_rate": 4.067674742451204e-05, "loss": 0.0044, "step": 15043 }, { "epoch": 7.021703617269545, "grad_norm": 0.032958984375, "learning_rate": 4.0664935452358135e-05, "loss": 0.0006, "step": 15044 }, { "epoch": 7.022170361726954, "grad_norm": 0.0145263671875, "learning_rate": 4.065312475775397e-05, "loss": 0.0002, "step": 15045 }, { "epoch": 7.022637106184364, "grad_norm": 0.043212890625, "learning_rate": 4.064131534095387e-05, "loss": 0.0003, "step": 15046 }, { "epoch": 7.023103850641774, "grad_norm": 0.010986328125, "learning_rate": 4.062950720221215e-05, "loss": 0.0001, "step": 15047 }, { "epoch": 7.023570595099184, "grad_norm": 0.00927734375, "learning_rate": 4.0617700341782974e-05, "loss": 0.0002, "step": 15048 }, { "epoch": 7.0240373395565925, "grad_norm": 0.03515625, "learning_rate": 4.060589475992059e-05, "loss": 0.0002, "step": 15049 }, { "epoch": 7.024504084014002, "grad_norm": 0.025146484375, "learning_rate": 4.059409045687924e-05, "loss": 0.0002, "step": 15050 }, { "epoch": 7.024970828471412, "grad_norm": 0.045654296875, "learning_rate": 4.0582287432912993e-05, "loss": 0.0022, "step": 15051 }, { "epoch": 7.025437572928822, "grad_norm": 0.031494140625, "learning_rate": 4.0570485688276015e-05, "loss": 0.0002, "step": 15052 }, { "epoch": 7.025904317386231, "grad_norm": 0.044921875, "learning_rate": 4.0558685223222456e-05, "loss": 0.0003, "step": 15053 }, { "epoch": 7.026371061843641, "grad_norm": 0.03515625, "learning_rate": 4.054688603800634e-05, "loss": 0.0019, "step": 15054 }, { "epoch": 7.02683780630105, "grad_norm": 0.03466796875, "learning_rate": 4.053508813288171e-05, "loss": 0.0002, "step": 15055 }, { "epoch": 7.02730455075846, "grad_norm": 0.01129150390625, "learning_rate": 4.052329150810265e-05, "loss": 0.0002, "step": 15056 }, { "epoch": 7.027771295215869, "grad_norm": 0.1982421875, "learning_rate": 4.051149616392308e-05, "loss": 0.0034, "step": 15057 }, { "epoch": 7.028238039673279, "grad_norm": 0.197265625, "learning_rate": 4.0499702100597035e-05, "loss": 0.0016, "step": 15058 }, { "epoch": 7.028704784130689, "grad_norm": 0.048583984375, "learning_rate": 4.0487909318378394e-05, "loss": 0.0027, "step": 15059 }, { "epoch": 7.029171528588098, "grad_norm": 0.00860595703125, "learning_rate": 4.047611781752109e-05, "loss": 0.0001, "step": 15060 }, { "epoch": 7.029638273045507, "grad_norm": 0.0181884765625, "learning_rate": 4.046432759827904e-05, "loss": 0.0002, "step": 15061 }, { "epoch": 7.030105017502917, "grad_norm": 0.0272216796875, "learning_rate": 4.0452538660906046e-05, "loss": 0.0003, "step": 15062 }, { "epoch": 7.030571761960327, "grad_norm": 0.0303955078125, "learning_rate": 4.0440751005655964e-05, "loss": 0.0003, "step": 15063 }, { "epoch": 7.031038506417736, "grad_norm": 0.0546875, "learning_rate": 4.042896463278263e-05, "loss": 0.0003, "step": 15064 }, { "epoch": 7.031505250875146, "grad_norm": 0.0174560546875, "learning_rate": 4.041717954253974e-05, "loss": 0.0002, "step": 15065 }, { "epoch": 7.0319719953325555, "grad_norm": 0.01904296875, "learning_rate": 4.0405395735181085e-05, "loss": 0.0002, "step": 15066 }, { "epoch": 7.032438739789965, "grad_norm": 0.0235595703125, "learning_rate": 4.039361321096042e-05, "loss": 0.0002, "step": 15067 }, { "epoch": 7.032905484247374, "grad_norm": 0.01275634765625, "learning_rate": 4.038183197013137e-05, "loss": 0.0002, "step": 15068 }, { "epoch": 7.033372228704784, "grad_norm": 0.013427734375, "learning_rate": 4.0370052012947614e-05, "loss": 0.0002, "step": 15069 }, { "epoch": 7.033838973162194, "grad_norm": 0.412109375, "learning_rate": 4.035827333966283e-05, "loss": 0.0009, "step": 15070 }, { "epoch": 7.0343057176196035, "grad_norm": 0.01318359375, "learning_rate": 4.034649595053056e-05, "loss": 0.0002, "step": 15071 }, { "epoch": 7.0347724620770125, "grad_norm": 0.025146484375, "learning_rate": 4.033471984580443e-05, "loss": 0.0003, "step": 15072 }, { "epoch": 7.035239206534422, "grad_norm": 0.0128173828125, "learning_rate": 4.0322945025738014e-05, "loss": 0.0002, "step": 15073 }, { "epoch": 7.035705950991832, "grad_norm": 0.01458740234375, "learning_rate": 4.031117149058477e-05, "loss": 0.0002, "step": 15074 }, { "epoch": 7.036172695449242, "grad_norm": 0.0302734375, "learning_rate": 4.0299399240598224e-05, "loss": 0.0003, "step": 15075 }, { "epoch": 7.036639439906651, "grad_norm": 0.00799560546875, "learning_rate": 4.028762827603188e-05, "loss": 0.0001, "step": 15076 }, { "epoch": 7.0371061843640605, "grad_norm": 0.0458984375, "learning_rate": 4.027585859713913e-05, "loss": 0.0005, "step": 15077 }, { "epoch": 7.03757292882147, "grad_norm": 0.0830078125, "learning_rate": 4.026409020417344e-05, "loss": 0.0004, "step": 15078 }, { "epoch": 7.03803967327888, "grad_norm": 0.035400390625, "learning_rate": 4.0252323097388125e-05, "loss": 0.0002, "step": 15079 }, { "epoch": 7.038506417736289, "grad_norm": 0.04833984375, "learning_rate": 4.02405572770366e-05, "loss": 0.0003, "step": 15080 }, { "epoch": 7.038973162193699, "grad_norm": 0.006988525390625, "learning_rate": 4.02287927433722e-05, "loss": 0.0001, "step": 15081 }, { "epoch": 7.039439906651109, "grad_norm": 0.00927734375, "learning_rate": 4.0217029496648184e-05, "loss": 0.0002, "step": 15082 }, { "epoch": 7.039906651108518, "grad_norm": 0.0250244140625, "learning_rate": 4.020526753711784e-05, "loss": 0.0002, "step": 15083 }, { "epoch": 7.040373395565927, "grad_norm": 0.048095703125, "learning_rate": 4.019350686503448e-05, "loss": 0.0007, "step": 15084 }, { "epoch": 7.040840140023337, "grad_norm": 0.10107421875, "learning_rate": 4.0181747480651235e-05, "loss": 0.0004, "step": 15085 }, { "epoch": 7.041306884480747, "grad_norm": 0.016845703125, "learning_rate": 4.016998938422133e-05, "loss": 0.0002, "step": 15086 }, { "epoch": 7.041773628938157, "grad_norm": 0.038818359375, "learning_rate": 4.015823257599798e-05, "loss": 0.0022, "step": 15087 }, { "epoch": 7.042240373395566, "grad_norm": 0.2294921875, "learning_rate": 4.014647705623426e-05, "loss": 0.0006, "step": 15088 }, { "epoch": 7.042707117852975, "grad_norm": 0.0101318359375, "learning_rate": 4.013472282518329e-05, "loss": 0.0001, "step": 15089 }, { "epoch": 7.043173862310385, "grad_norm": 0.0260009765625, "learning_rate": 4.0122969883098184e-05, "loss": 0.0005, "step": 15090 }, { "epoch": 7.043640606767795, "grad_norm": 0.0125732421875, "learning_rate": 4.011121823023193e-05, "loss": 0.0003, "step": 15091 }, { "epoch": 7.044107351225204, "grad_norm": 0.1162109375, "learning_rate": 4.0099467866837595e-05, "loss": 0.0004, "step": 15092 }, { "epoch": 7.044574095682614, "grad_norm": 0.016357421875, "learning_rate": 4.008771879316822e-05, "loss": 0.0002, "step": 15093 }, { "epoch": 7.0450408401400235, "grad_norm": 0.095703125, "learning_rate": 4.007597100947669e-05, "loss": 0.0003, "step": 15094 }, { "epoch": 7.045507584597433, "grad_norm": 0.0185546875, "learning_rate": 4.006422451601599e-05, "loss": 0.0002, "step": 15095 }, { "epoch": 7.045974329054842, "grad_norm": 0.07275390625, "learning_rate": 4.0052479313039083e-05, "loss": 0.0003, "step": 15096 }, { "epoch": 7.046441073512252, "grad_norm": 0.01409912109375, "learning_rate": 4.004073540079876e-05, "loss": 0.0002, "step": 15097 }, { "epoch": 7.046907817969662, "grad_norm": 0.00927734375, "learning_rate": 4.002899277954794e-05, "loss": 0.0002, "step": 15098 }, { "epoch": 7.047374562427072, "grad_norm": 0.0263671875, "learning_rate": 4.0017251449539474e-05, "loss": 0.0001, "step": 15099 }, { "epoch": 7.0478413068844805, "grad_norm": 0.11181640625, "learning_rate": 4.000551141102611e-05, "loss": 0.0004, "step": 15100 }, { "epoch": 7.04830805134189, "grad_norm": 0.0517578125, "learning_rate": 3.9993772664260645e-05, "loss": 0.0032, "step": 15101 }, { "epoch": 7.0487747957993, "grad_norm": 0.0262451171875, "learning_rate": 3.998203520949586e-05, "loss": 0.0003, "step": 15102 }, { "epoch": 7.049241540256709, "grad_norm": 0.03173828125, "learning_rate": 3.997029904698442e-05, "loss": 0.0018, "step": 15103 }, { "epoch": 7.049708284714119, "grad_norm": 0.01806640625, "learning_rate": 3.995856417697904e-05, "loss": 0.0002, "step": 15104 }, { "epoch": 7.050175029171529, "grad_norm": 0.023681640625, "learning_rate": 3.994683059973243e-05, "loss": 0.0002, "step": 15105 }, { "epoch": 7.050641773628938, "grad_norm": 0.029052734375, "learning_rate": 3.993509831549715e-05, "loss": 0.0019, "step": 15106 }, { "epoch": 7.051108518086347, "grad_norm": 0.0101318359375, "learning_rate": 3.9923367324525896e-05, "loss": 0.0002, "step": 15107 }, { "epoch": 7.051575262543757, "grad_norm": 0.04345703125, "learning_rate": 3.991163762707115e-05, "loss": 0.0003, "step": 15108 }, { "epoch": 7.052042007001167, "grad_norm": 0.107421875, "learning_rate": 3.989990922338551e-05, "loss": 0.0003, "step": 15109 }, { "epoch": 7.052508751458577, "grad_norm": 0.0184326171875, "learning_rate": 3.988818211372155e-05, "loss": 0.0002, "step": 15110 }, { "epoch": 7.052975495915986, "grad_norm": 0.0147705078125, "learning_rate": 3.98764562983317e-05, "loss": 0.0002, "step": 15111 }, { "epoch": 7.053442240373395, "grad_norm": 0.01416015625, "learning_rate": 3.986473177746843e-05, "loss": 0.0002, "step": 15112 }, { "epoch": 7.053908984830805, "grad_norm": 0.052978515625, "learning_rate": 3.9853008551384264e-05, "loss": 0.0002, "step": 15113 }, { "epoch": 7.054375729288215, "grad_norm": 0.010986328125, "learning_rate": 3.984128662033151e-05, "loss": 0.0002, "step": 15114 }, { "epoch": 7.054842473745624, "grad_norm": 0.0084228515625, "learning_rate": 3.982956598456261e-05, "loss": 0.0002, "step": 15115 }, { "epoch": 7.055309218203034, "grad_norm": 0.138671875, "learning_rate": 3.9817846644329936e-05, "loss": 0.0004, "step": 15116 }, { "epoch": 7.0557759626604435, "grad_norm": 0.05810546875, "learning_rate": 3.980612859988577e-05, "loss": 0.0009, "step": 15117 }, { "epoch": 7.056242707117853, "grad_norm": 0.038818359375, "learning_rate": 3.979441185148245e-05, "loss": 0.0003, "step": 15118 }, { "epoch": 7.056709451575262, "grad_norm": 0.0157470703125, "learning_rate": 3.9782696399372264e-05, "loss": 0.0002, "step": 15119 }, { "epoch": 7.057176196032672, "grad_norm": 0.031982421875, "learning_rate": 3.977098224380741e-05, "loss": 0.0002, "step": 15120 }, { "epoch": 7.057642940490082, "grad_norm": 0.037109375, "learning_rate": 3.975926938504013e-05, "loss": 0.0018, "step": 15121 }, { "epoch": 7.058109684947492, "grad_norm": 0.02001953125, "learning_rate": 3.974755782332266e-05, "loss": 0.0002, "step": 15122 }, { "epoch": 7.0585764294049005, "grad_norm": 0.0283203125, "learning_rate": 3.973584755890708e-05, "loss": 0.0002, "step": 15123 }, { "epoch": 7.05904317386231, "grad_norm": 0.031494140625, "learning_rate": 3.972413859204557e-05, "loss": 0.0002, "step": 15124 }, { "epoch": 7.05950991831972, "grad_norm": 0.036376953125, "learning_rate": 3.9712430922990266e-05, "loss": 0.0019, "step": 15125 }, { "epoch": 7.05997666277713, "grad_norm": 0.03955078125, "learning_rate": 3.9700724551993174e-05, "loss": 0.0027, "step": 15126 }, { "epoch": 7.060443407234539, "grad_norm": 0.01251220703125, "learning_rate": 3.9689019479306425e-05, "loss": 0.0002, "step": 15127 }, { "epoch": 7.060910151691949, "grad_norm": 0.0166015625, "learning_rate": 3.9677315705181974e-05, "loss": 0.0002, "step": 15128 }, { "epoch": 7.061376896149358, "grad_norm": 0.03564453125, "learning_rate": 3.966561322987184e-05, "loss": 0.0002, "step": 15129 }, { "epoch": 7.061843640606768, "grad_norm": 0.061767578125, "learning_rate": 3.965391205362803e-05, "loss": 0.0003, "step": 15130 }, { "epoch": 7.062310385064177, "grad_norm": 0.03466796875, "learning_rate": 3.964221217670241e-05, "loss": 0.0018, "step": 15131 }, { "epoch": 7.062777129521587, "grad_norm": 0.01324462890625, "learning_rate": 3.963051359934693e-05, "loss": 0.0002, "step": 15132 }, { "epoch": 7.063243873978997, "grad_norm": 0.0279541015625, "learning_rate": 3.961881632181351e-05, "loss": 0.0002, "step": 15133 }, { "epoch": 7.0637106184364065, "grad_norm": 0.01519775390625, "learning_rate": 3.960712034435393e-05, "loss": 0.0002, "step": 15134 }, { "epoch": 7.064177362893815, "grad_norm": 0.009521484375, "learning_rate": 3.959542566722006e-05, "loss": 0.0002, "step": 15135 }, { "epoch": 7.064644107351225, "grad_norm": 0.06298828125, "learning_rate": 3.958373229066373e-05, "loss": 0.002, "step": 15136 }, { "epoch": 7.065110851808635, "grad_norm": 0.07958984375, "learning_rate": 3.957204021493669e-05, "loss": 0.0004, "step": 15137 }, { "epoch": 7.065577596266045, "grad_norm": 0.03515625, "learning_rate": 3.956034944029062e-05, "loss": 0.0003, "step": 15138 }, { "epoch": 7.066044340723454, "grad_norm": 0.037109375, "learning_rate": 3.9548659966977316e-05, "loss": 0.0003, "step": 15139 }, { "epoch": 7.0665110851808635, "grad_norm": 0.05322265625, "learning_rate": 3.9536971795248414e-05, "loss": 0.0003, "step": 15140 }, { "epoch": 7.066977829638273, "grad_norm": 0.08935546875, "learning_rate": 3.9525284925355586e-05, "loss": 0.002, "step": 15141 }, { "epoch": 7.067444574095683, "grad_norm": 0.01348876953125, "learning_rate": 3.9513599357550515e-05, "loss": 0.0002, "step": 15142 }, { "epoch": 7.067911318553092, "grad_norm": 0.038330078125, "learning_rate": 3.9501915092084716e-05, "loss": 0.0002, "step": 15143 }, { "epoch": 7.068378063010502, "grad_norm": 0.013671875, "learning_rate": 3.949023212920981e-05, "loss": 0.0002, "step": 15144 }, { "epoch": 7.0688448074679116, "grad_norm": 0.00958251953125, "learning_rate": 3.947855046917738e-05, "loss": 0.0002, "step": 15145 }, { "epoch": 7.0693115519253205, "grad_norm": 0.0262451171875, "learning_rate": 3.946687011223888e-05, "loss": 0.0002, "step": 15146 }, { "epoch": 7.06977829638273, "grad_norm": 0.06201171875, "learning_rate": 3.9455191058645813e-05, "loss": 0.0004, "step": 15147 }, { "epoch": 7.07024504084014, "grad_norm": 0.03466796875, "learning_rate": 3.9443513308649704e-05, "loss": 0.0015, "step": 15148 }, { "epoch": 7.07071178529755, "grad_norm": 0.07568359375, "learning_rate": 3.9431836862501906e-05, "loss": 0.0003, "step": 15149 }, { "epoch": 7.071178529754959, "grad_norm": 0.025390625, "learning_rate": 3.942016172045386e-05, "loss": 0.0002, "step": 15150 }, { "epoch": 7.0716452742123685, "grad_norm": 0.015869140625, "learning_rate": 3.940848788275699e-05, "loss": 0.0002, "step": 15151 }, { "epoch": 7.072112018669778, "grad_norm": 0.018798828125, "learning_rate": 3.939681534966255e-05, "loss": 0.0002, "step": 15152 }, { "epoch": 7.072578763127188, "grad_norm": 0.07958984375, "learning_rate": 3.938514412142193e-05, "loss": 0.0047, "step": 15153 }, { "epoch": 7.073045507584597, "grad_norm": 0.0167236328125, "learning_rate": 3.937347419828644e-05, "loss": 0.0002, "step": 15154 }, { "epoch": 7.073512252042007, "grad_norm": 0.0390625, "learning_rate": 3.9361805580507295e-05, "loss": 0.0003, "step": 15155 }, { "epoch": 7.073978996499417, "grad_norm": 0.022705078125, "learning_rate": 3.935013826833578e-05, "loss": 0.0002, "step": 15156 }, { "epoch": 7.074445740956826, "grad_norm": 0.0169677734375, "learning_rate": 3.933847226202304e-05, "loss": 0.0002, "step": 15157 }, { "epoch": 7.074912485414235, "grad_norm": 0.04150390625, "learning_rate": 3.93268075618203e-05, "loss": 0.0003, "step": 15158 }, { "epoch": 7.075379229871645, "grad_norm": 0.0206298828125, "learning_rate": 3.931514416797875e-05, "loss": 0.0002, "step": 15159 }, { "epoch": 7.075845974329055, "grad_norm": 0.0291748046875, "learning_rate": 3.9303482080749445e-05, "loss": 0.0004, "step": 15160 }, { "epoch": 7.076312718786465, "grad_norm": 0.0284423828125, "learning_rate": 3.9291821300383504e-05, "loss": 0.0003, "step": 15161 }, { "epoch": 7.076779463243874, "grad_norm": 0.01080322265625, "learning_rate": 3.9280161827132054e-05, "loss": 0.0001, "step": 15162 }, { "epoch": 7.077246207701283, "grad_norm": 0.054443359375, "learning_rate": 3.926850366124605e-05, "loss": 0.0009, "step": 15163 }, { "epoch": 7.077712952158693, "grad_norm": 0.048095703125, "learning_rate": 3.925684680297654e-05, "loss": 0.0034, "step": 15164 }, { "epoch": 7.078179696616103, "grad_norm": 0.09375, "learning_rate": 3.9245191252574556e-05, "loss": 0.002, "step": 15165 }, { "epoch": 7.078646441073512, "grad_norm": 0.039794921875, "learning_rate": 3.923353701029098e-05, "loss": 0.0002, "step": 15166 }, { "epoch": 7.079113185530922, "grad_norm": 0.020263671875, "learning_rate": 3.922188407637676e-05, "loss": 0.0002, "step": 15167 }, { "epoch": 7.0795799299883315, "grad_norm": 0.017822265625, "learning_rate": 3.921023245108285e-05, "loss": 0.0002, "step": 15168 }, { "epoch": 7.080046674445741, "grad_norm": 0.00823974609375, "learning_rate": 3.9198582134660056e-05, "loss": 0.0002, "step": 15169 }, { "epoch": 7.08051341890315, "grad_norm": 0.05029296875, "learning_rate": 3.918693312735925e-05, "loss": 0.0027, "step": 15170 }, { "epoch": 7.08098016336056, "grad_norm": 0.01904296875, "learning_rate": 3.9175285429431274e-05, "loss": 0.0002, "step": 15171 }, { "epoch": 7.08144690781797, "grad_norm": 0.006500244140625, "learning_rate": 3.916363904112686e-05, "loss": 0.0001, "step": 15172 }, { "epoch": 7.08191365227538, "grad_norm": 0.0595703125, "learning_rate": 3.915199396269679e-05, "loss": 0.0026, "step": 15173 }, { "epoch": 7.0823803967327885, "grad_norm": 0.01434326171875, "learning_rate": 3.914035019439184e-05, "loss": 0.0002, "step": 15174 }, { "epoch": 7.082847141190198, "grad_norm": 0.042724609375, "learning_rate": 3.912870773646263e-05, "loss": 0.0036, "step": 15175 }, { "epoch": 7.083313885647608, "grad_norm": 0.015380859375, "learning_rate": 3.911706658915993e-05, "loss": 0.0002, "step": 15176 }, { "epoch": 7.083780630105018, "grad_norm": 0.031982421875, "learning_rate": 3.91054267527343e-05, "loss": 0.0003, "step": 15177 }, { "epoch": 7.084247374562427, "grad_norm": 0.050537109375, "learning_rate": 3.90937882274364e-05, "loss": 0.0002, "step": 15178 }, { "epoch": 7.084714119019837, "grad_norm": 0.01397705078125, "learning_rate": 3.9082151013516846e-05, "loss": 0.0002, "step": 15179 }, { "epoch": 7.085180863477246, "grad_norm": 0.00927734375, "learning_rate": 3.9070515111226125e-05, "loss": 0.0002, "step": 15180 }, { "epoch": 7.085647607934656, "grad_norm": 0.00848388671875, "learning_rate": 3.9058880520814835e-05, "loss": 0.0002, "step": 15181 }, { "epoch": 7.086114352392065, "grad_norm": 0.04833984375, "learning_rate": 3.904724724253348e-05, "loss": 0.003, "step": 15182 }, { "epoch": 7.086581096849475, "grad_norm": 0.0230712890625, "learning_rate": 3.90356152766325e-05, "loss": 0.0002, "step": 15183 }, { "epoch": 7.087047841306885, "grad_norm": 0.0123291015625, "learning_rate": 3.9023984623362385e-05, "loss": 0.0002, "step": 15184 }, { "epoch": 7.0875145857642945, "grad_norm": 0.01214599609375, "learning_rate": 3.901235528297351e-05, "loss": 0.0002, "step": 15185 }, { "epoch": 7.087981330221703, "grad_norm": 0.0081787109375, "learning_rate": 3.900072725571632e-05, "loss": 0.0002, "step": 15186 }, { "epoch": 7.088448074679113, "grad_norm": 0.09130859375, "learning_rate": 3.898910054184113e-05, "loss": 0.0004, "step": 15187 }, { "epoch": 7.088914819136523, "grad_norm": 0.0234375, "learning_rate": 3.897747514159832e-05, "loss": 0.0002, "step": 15188 }, { "epoch": 7.089381563593932, "grad_norm": 0.10302734375, "learning_rate": 3.896585105523815e-05, "loss": 0.0004, "step": 15189 }, { "epoch": 7.089848308051342, "grad_norm": 0.068359375, "learning_rate": 3.8954228283010916e-05, "loss": 0.0026, "step": 15190 }, { "epoch": 7.0903150525087515, "grad_norm": 0.0115966796875, "learning_rate": 3.894260682516692e-05, "loss": 0.0002, "step": 15191 }, { "epoch": 7.090781796966161, "grad_norm": 0.0203857421875, "learning_rate": 3.893098668195631e-05, "loss": 0.0002, "step": 15192 }, { "epoch": 7.09124854142357, "grad_norm": 0.09619140625, "learning_rate": 3.891936785362931e-05, "loss": 0.0062, "step": 15193 }, { "epoch": 7.09171528588098, "grad_norm": 0.038330078125, "learning_rate": 3.8907750340436125e-05, "loss": 0.0016, "step": 15194 }, { "epoch": 7.09218203033839, "grad_norm": 0.0269775390625, "learning_rate": 3.8896134142626824e-05, "loss": 0.0002, "step": 15195 }, { "epoch": 7.0926487747958, "grad_norm": 0.006378173828125, "learning_rate": 3.888451926045155e-05, "loss": 0.0001, "step": 15196 }, { "epoch": 7.0931155192532085, "grad_norm": 0.017822265625, "learning_rate": 3.887290569416042e-05, "loss": 0.0002, "step": 15197 }, { "epoch": 7.093582263710618, "grad_norm": 0.0439453125, "learning_rate": 3.886129344400342e-05, "loss": 0.0026, "step": 15198 }, { "epoch": 7.094049008168028, "grad_norm": 0.021484375, "learning_rate": 3.884968251023061e-05, "loss": 0.0002, "step": 15199 }, { "epoch": 7.094515752625438, "grad_norm": 0.0087890625, "learning_rate": 3.883807289309202e-05, "loss": 0.0001, "step": 15200 }, { "epoch": 7.094982497082847, "grad_norm": 0.02734375, "learning_rate": 3.882646459283753e-05, "loss": 0.0002, "step": 15201 }, { "epoch": 7.095449241540257, "grad_norm": 0.076171875, "learning_rate": 3.8814857609717134e-05, "loss": 0.0005, "step": 15202 }, { "epoch": 7.095915985997666, "grad_norm": 0.04345703125, "learning_rate": 3.880325194398079e-05, "loss": 0.0003, "step": 15203 }, { "epoch": 7.096382730455076, "grad_norm": 0.0546875, "learning_rate": 3.879164759587828e-05, "loss": 0.0002, "step": 15204 }, { "epoch": 7.096849474912485, "grad_norm": 0.0263671875, "learning_rate": 3.878004456565951e-05, "loss": 0.0002, "step": 15205 }, { "epoch": 7.097316219369895, "grad_norm": 0.019775390625, "learning_rate": 3.876844285357435e-05, "loss": 0.0002, "step": 15206 }, { "epoch": 7.097782963827305, "grad_norm": 0.01251220703125, "learning_rate": 3.875684245987251e-05, "loss": 0.0002, "step": 15207 }, { "epoch": 7.0982497082847145, "grad_norm": 0.043212890625, "learning_rate": 3.874524338480384e-05, "loss": 0.0004, "step": 15208 }, { "epoch": 7.098716452742123, "grad_norm": 0.01251220703125, "learning_rate": 3.873364562861802e-05, "loss": 0.0002, "step": 15209 }, { "epoch": 7.099183197199533, "grad_norm": 0.018310546875, "learning_rate": 3.872204919156477e-05, "loss": 0.0002, "step": 15210 }, { "epoch": 7.099649941656943, "grad_norm": 0.00872802734375, "learning_rate": 3.871045407389383e-05, "loss": 0.0002, "step": 15211 }, { "epoch": 7.100116686114353, "grad_norm": 0.048095703125, "learning_rate": 3.869886027585478e-05, "loss": 0.0037, "step": 15212 }, { "epoch": 7.100583430571762, "grad_norm": 0.1640625, "learning_rate": 3.868726779769728e-05, "loss": 0.0007, "step": 15213 }, { "epoch": 7.1010501750291715, "grad_norm": 0.0186767578125, "learning_rate": 3.8675676639670975e-05, "loss": 0.0002, "step": 15214 }, { "epoch": 7.101516919486581, "grad_norm": 0.1357421875, "learning_rate": 3.866408680202535e-05, "loss": 0.0004, "step": 15215 }, { "epoch": 7.101983663943991, "grad_norm": 0.0181884765625, "learning_rate": 3.8652498285009996e-05, "loss": 0.0002, "step": 15216 }, { "epoch": 7.1024504084014, "grad_norm": 0.01556396484375, "learning_rate": 3.864091108887445e-05, "loss": 0.0002, "step": 15217 }, { "epoch": 7.10291715285881, "grad_norm": 0.0196533203125, "learning_rate": 3.8629325213868136e-05, "loss": 0.0002, "step": 15218 }, { "epoch": 7.1033838973162196, "grad_norm": 0.05810546875, "learning_rate": 3.861774066024054e-05, "loss": 0.0016, "step": 15219 }, { "epoch": 7.103850641773629, "grad_norm": 0.013427734375, "learning_rate": 3.860615742824113e-05, "loss": 0.0002, "step": 15220 }, { "epoch": 7.104317386231038, "grad_norm": 0.005950927734375, "learning_rate": 3.859457551811922e-05, "loss": 0.0001, "step": 15221 }, { "epoch": 7.104784130688448, "grad_norm": 0.01373291015625, "learning_rate": 3.858299493012425e-05, "loss": 0.0002, "step": 15222 }, { "epoch": 7.105250875145858, "grad_norm": 0.0301513671875, "learning_rate": 3.857141566450556e-05, "loss": 0.0016, "step": 15223 }, { "epoch": 7.105717619603268, "grad_norm": 0.039794921875, "learning_rate": 3.8559837721512427e-05, "loss": 0.0003, "step": 15224 }, { "epoch": 7.1061843640606766, "grad_norm": 0.08642578125, "learning_rate": 3.854826110139418e-05, "loss": 0.0003, "step": 15225 }, { "epoch": 7.106651108518086, "grad_norm": 0.283203125, "learning_rate": 3.853668580440003e-05, "loss": 0.0007, "step": 15226 }, { "epoch": 7.107117852975496, "grad_norm": 0.012451171875, "learning_rate": 3.852511183077922e-05, "loss": 0.0002, "step": 15227 }, { "epoch": 7.107584597432905, "grad_norm": 0.009521484375, "learning_rate": 3.8513539180781e-05, "loss": 0.0002, "step": 15228 }, { "epoch": 7.108051341890315, "grad_norm": 0.039306640625, "learning_rate": 3.850196785465446e-05, "loss": 0.0003, "step": 15229 }, { "epoch": 7.108518086347725, "grad_norm": 0.060546875, "learning_rate": 3.849039785264879e-05, "loss": 0.0003, "step": 15230 }, { "epoch": 7.108984830805134, "grad_norm": 0.052734375, "learning_rate": 3.847882917501313e-05, "loss": 0.0003, "step": 15231 }, { "epoch": 7.109451575262543, "grad_norm": 0.02392578125, "learning_rate": 3.846726182199655e-05, "loss": 0.0002, "step": 15232 }, { "epoch": 7.109918319719953, "grad_norm": 0.03955078125, "learning_rate": 3.8455695793848045e-05, "loss": 0.0023, "step": 15233 }, { "epoch": 7.110385064177363, "grad_norm": 0.0152587890625, "learning_rate": 3.84441310908167e-05, "loss": 0.0002, "step": 15234 }, { "epoch": 7.110851808634773, "grad_norm": 0.033203125, "learning_rate": 3.843256771315155e-05, "loss": 0.0003, "step": 15235 }, { "epoch": 7.111318553092182, "grad_norm": 0.025634765625, "learning_rate": 3.842100566110148e-05, "loss": 0.0002, "step": 15236 }, { "epoch": 7.111785297549591, "grad_norm": 0.039306640625, "learning_rate": 3.840944493491553e-05, "loss": 0.0027, "step": 15237 }, { "epoch": 7.112252042007001, "grad_norm": 0.0269775390625, "learning_rate": 3.839788553484253e-05, "loss": 0.0002, "step": 15238 }, { "epoch": 7.112718786464411, "grad_norm": 0.0164794921875, "learning_rate": 3.838632746113139e-05, "loss": 0.0002, "step": 15239 }, { "epoch": 7.11318553092182, "grad_norm": 0.01611328125, "learning_rate": 3.837477071403103e-05, "loss": 0.0002, "step": 15240 }, { "epoch": 7.11365227537923, "grad_norm": 0.10888671875, "learning_rate": 3.8363215293790186e-05, "loss": 0.0003, "step": 15241 }, { "epoch": 7.1141190198366395, "grad_norm": 0.00860595703125, "learning_rate": 3.835166120065771e-05, "loss": 0.0002, "step": 15242 }, { "epoch": 7.114585764294049, "grad_norm": 0.0106201171875, "learning_rate": 3.8340108434882407e-05, "loss": 0.0002, "step": 15243 }, { "epoch": 7.115052508751458, "grad_norm": 0.02392578125, "learning_rate": 3.8328556996712936e-05, "loss": 0.0002, "step": 15244 }, { "epoch": 7.115519253208868, "grad_norm": 0.0181884765625, "learning_rate": 3.831700688639807e-05, "loss": 0.0002, "step": 15245 }, { "epoch": 7.115985997666278, "grad_norm": 0.09326171875, "learning_rate": 3.830545810418651e-05, "loss": 0.0003, "step": 15246 }, { "epoch": 7.116452742123688, "grad_norm": 0.0302734375, "learning_rate": 3.8293910650326855e-05, "loss": 0.0003, "step": 15247 }, { "epoch": 7.1169194865810965, "grad_norm": 0.04931640625, "learning_rate": 3.828236452506777e-05, "loss": 0.0031, "step": 15248 }, { "epoch": 7.117386231038506, "grad_norm": 0.05078125, "learning_rate": 3.827081972865789e-05, "loss": 0.0029, "step": 15249 }, { "epoch": 7.117852975495916, "grad_norm": 0.011962890625, "learning_rate": 3.825927626134571e-05, "loss": 0.0002, "step": 15250 }, { "epoch": 7.118319719953326, "grad_norm": 0.0167236328125, "learning_rate": 3.824773412337982e-05, "loss": 0.0003, "step": 15251 }, { "epoch": 7.118786464410735, "grad_norm": 0.01116943359375, "learning_rate": 3.8236193315008774e-05, "loss": 0.0002, "step": 15252 }, { "epoch": 7.119253208868145, "grad_norm": 0.01104736328125, "learning_rate": 3.822465383648096e-05, "loss": 0.0002, "step": 15253 }, { "epoch": 7.119719953325554, "grad_norm": 0.02978515625, "learning_rate": 3.82131156880449e-05, "loss": 0.0002, "step": 15254 }, { "epoch": 7.120186697782964, "grad_norm": 0.0162353515625, "learning_rate": 3.820157886994905e-05, "loss": 0.0002, "step": 15255 }, { "epoch": 7.120653442240373, "grad_norm": 0.035888671875, "learning_rate": 3.8190043382441735e-05, "loss": 0.0007, "step": 15256 }, { "epoch": 7.121120186697783, "grad_norm": 0.01116943359375, "learning_rate": 3.81785092257714e-05, "loss": 0.0002, "step": 15257 }, { "epoch": 7.121586931155193, "grad_norm": 0.0185546875, "learning_rate": 3.8166976400186317e-05, "loss": 0.0002, "step": 15258 }, { "epoch": 7.1220536756126025, "grad_norm": 0.0576171875, "learning_rate": 3.815544490593484e-05, "loss": 0.003, "step": 15259 }, { "epoch": 7.122520420070011, "grad_norm": 0.01220703125, "learning_rate": 3.814391474326528e-05, "loss": 0.0002, "step": 15260 }, { "epoch": 7.122987164527421, "grad_norm": 0.0277099609375, "learning_rate": 3.8132385912425836e-05, "loss": 0.0002, "step": 15261 }, { "epoch": 7.123453908984831, "grad_norm": 0.01123046875, "learning_rate": 3.812085841366477e-05, "loss": 0.0001, "step": 15262 }, { "epoch": 7.123920653442241, "grad_norm": 0.087890625, "learning_rate": 3.810933224723031e-05, "loss": 0.0004, "step": 15263 }, { "epoch": 7.12438739789965, "grad_norm": 0.04296875, "learning_rate": 3.809780741337057e-05, "loss": 0.0029, "step": 15264 }, { "epoch": 7.1248541423570595, "grad_norm": 0.01904296875, "learning_rate": 3.808628391233371e-05, "loss": 0.0002, "step": 15265 }, { "epoch": 7.125320886814469, "grad_norm": 0.02392578125, "learning_rate": 3.807476174436788e-05, "loss": 0.0002, "step": 15266 }, { "epoch": 7.125787631271878, "grad_norm": 0.0498046875, "learning_rate": 3.806324090972111e-05, "loss": 0.0023, "step": 15267 }, { "epoch": 7.126254375729288, "grad_norm": 0.078125, "learning_rate": 3.805172140864148e-05, "loss": 0.0003, "step": 15268 }, { "epoch": 7.126721120186698, "grad_norm": 0.022216796875, "learning_rate": 3.804020324137706e-05, "loss": 0.0002, "step": 15269 }, { "epoch": 7.127187864644108, "grad_norm": 0.028564453125, "learning_rate": 3.802868640817577e-05, "loss": 0.0002, "step": 15270 }, { "epoch": 7.1276546091015165, "grad_norm": 0.018310546875, "learning_rate": 3.8017170909285624e-05, "loss": 0.0002, "step": 15271 }, { "epoch": 7.128121353558926, "grad_norm": 0.03271484375, "learning_rate": 3.80056567449546e-05, "loss": 0.0002, "step": 15272 }, { "epoch": 7.128588098016336, "grad_norm": 0.016357421875, "learning_rate": 3.7994143915430526e-05, "loss": 0.0003, "step": 15273 }, { "epoch": 7.129054842473746, "grad_norm": 0.0166015625, "learning_rate": 3.798263242096136e-05, "loss": 0.0002, "step": 15274 }, { "epoch": 7.129521586931155, "grad_norm": 0.08349609375, "learning_rate": 3.79711222617949e-05, "loss": 0.0003, "step": 15275 }, { "epoch": 7.129988331388565, "grad_norm": 0.009765625, "learning_rate": 3.7959613438179e-05, "loss": 0.0002, "step": 15276 }, { "epoch": 7.130455075845974, "grad_norm": 0.125, "learning_rate": 3.794810595036149e-05, "loss": 0.0015, "step": 15277 }, { "epoch": 7.130921820303384, "grad_norm": 0.0286865234375, "learning_rate": 3.793659979859008e-05, "loss": 0.0002, "step": 15278 }, { "epoch": 7.131388564760793, "grad_norm": 0.005035400390625, "learning_rate": 3.792509498311256e-05, "loss": 0.0001, "step": 15279 }, { "epoch": 7.131855309218203, "grad_norm": 0.0186767578125, "learning_rate": 3.791359150417659e-05, "loss": 0.0002, "step": 15280 }, { "epoch": 7.132322053675613, "grad_norm": 0.018798828125, "learning_rate": 3.790208936202991e-05, "loss": 0.0002, "step": 15281 }, { "epoch": 7.1327887981330225, "grad_norm": 0.08740234375, "learning_rate": 3.7890588556920124e-05, "loss": 0.0003, "step": 15282 }, { "epoch": 7.133255542590431, "grad_norm": 0.017578125, "learning_rate": 3.787908908909487e-05, "loss": 0.0001, "step": 15283 }, { "epoch": 7.133722287047841, "grad_norm": 0.0120849609375, "learning_rate": 3.786759095880178e-05, "loss": 0.0001, "step": 15284 }, { "epoch": 7.134189031505251, "grad_norm": 0.00927734375, "learning_rate": 3.785609416628837e-05, "loss": 0.0001, "step": 15285 }, { "epoch": 7.134655775962661, "grad_norm": 0.0206298828125, "learning_rate": 3.784459871180223e-05, "loss": 0.0002, "step": 15286 }, { "epoch": 7.13512252042007, "grad_norm": 0.0093994140625, "learning_rate": 3.783310459559081e-05, "loss": 0.0002, "step": 15287 }, { "epoch": 7.1355892648774795, "grad_norm": 0.0218505859375, "learning_rate": 3.782161181790163e-05, "loss": 0.0002, "step": 15288 }, { "epoch": 7.136056009334889, "grad_norm": 0.02001953125, "learning_rate": 3.781012037898216e-05, "loss": 0.0002, "step": 15289 }, { "epoch": 7.136522753792299, "grad_norm": 0.01068115234375, "learning_rate": 3.779863027907976e-05, "loss": 0.0002, "step": 15290 }, { "epoch": 7.136989498249708, "grad_norm": 0.01312255859375, "learning_rate": 3.778714151844188e-05, "loss": 0.0002, "step": 15291 }, { "epoch": 7.137456242707118, "grad_norm": 0.0537109375, "learning_rate": 3.777565409731589e-05, "loss": 0.0016, "step": 15292 }, { "epoch": 7.1379229871645276, "grad_norm": 0.00946044921875, "learning_rate": 3.7764168015949074e-05, "loss": 0.0002, "step": 15293 }, { "epoch": 7.138389731621937, "grad_norm": 0.018310546875, "learning_rate": 3.7752683274588785e-05, "loss": 0.0002, "step": 15294 }, { "epoch": 7.138856476079346, "grad_norm": 0.0157470703125, "learning_rate": 3.774119987348231e-05, "loss": 0.0002, "step": 15295 }, { "epoch": 7.139323220536756, "grad_norm": 0.07666015625, "learning_rate": 3.772971781287685e-05, "loss": 0.0003, "step": 15296 }, { "epoch": 7.139789964994166, "grad_norm": 0.0120849609375, "learning_rate": 3.7718237093019656e-05, "loss": 0.0002, "step": 15297 }, { "epoch": 7.140256709451576, "grad_norm": 0.01513671875, "learning_rate": 3.770675771415796e-05, "loss": 0.0002, "step": 15298 }, { "epoch": 7.1407234539089846, "grad_norm": 0.043701171875, "learning_rate": 3.769527967653884e-05, "loss": 0.0003, "step": 15299 }, { "epoch": 7.141190198366394, "grad_norm": 0.038818359375, "learning_rate": 3.768380298040949e-05, "loss": 0.0019, "step": 15300 }, { "epoch": 7.141656942823804, "grad_norm": 0.0242919921875, "learning_rate": 3.767232762601702e-05, "loss": 0.0016, "step": 15301 }, { "epoch": 7.142123687281214, "grad_norm": 0.05517578125, "learning_rate": 3.766085361360847e-05, "loss": 0.002, "step": 15302 }, { "epoch": 7.142590431738623, "grad_norm": 0.027587890625, "learning_rate": 3.76493809434309e-05, "loss": 0.0002, "step": 15303 }, { "epoch": 7.143057176196033, "grad_norm": 0.038818359375, "learning_rate": 3.763790961573138e-05, "loss": 0.0002, "step": 15304 }, { "epoch": 7.143523920653442, "grad_norm": 0.0703125, "learning_rate": 3.7626439630756816e-05, "loss": 0.0027, "step": 15305 }, { "epoch": 7.143990665110852, "grad_norm": 0.021728515625, "learning_rate": 3.761497098875425e-05, "loss": 0.0002, "step": 15306 }, { "epoch": 7.144457409568261, "grad_norm": 0.0164794921875, "learning_rate": 3.760350368997054e-05, "loss": 0.0002, "step": 15307 }, { "epoch": 7.144924154025671, "grad_norm": 0.045654296875, "learning_rate": 3.759203773465262e-05, "loss": 0.002, "step": 15308 }, { "epoch": 7.145390898483081, "grad_norm": 0.07958984375, "learning_rate": 3.7580573123047416e-05, "loss": 0.002, "step": 15309 }, { "epoch": 7.14585764294049, "grad_norm": 0.01318359375, "learning_rate": 3.7569109855401684e-05, "loss": 0.0002, "step": 15310 }, { "epoch": 7.146324387397899, "grad_norm": 0.01513671875, "learning_rate": 3.7557647931962294e-05, "loss": 0.0002, "step": 15311 }, { "epoch": 7.146791131855309, "grad_norm": 0.03515625, "learning_rate": 3.754618735297606e-05, "loss": 0.0002, "step": 15312 }, { "epoch": 7.147257876312719, "grad_norm": 0.0068359375, "learning_rate": 3.753472811868965e-05, "loss": 0.0001, "step": 15313 }, { "epoch": 7.147724620770128, "grad_norm": 0.01104736328125, "learning_rate": 3.752327022934987e-05, "loss": 0.0002, "step": 15314 }, { "epoch": 7.148191365227538, "grad_norm": 0.062255859375, "learning_rate": 3.751181368520343e-05, "loss": 0.0036, "step": 15315 }, { "epoch": 7.1486581096849475, "grad_norm": 0.0284423828125, "learning_rate": 3.750035848649695e-05, "loss": 0.0002, "step": 15316 }, { "epoch": 7.149124854142357, "grad_norm": 0.021728515625, "learning_rate": 3.748890463347709e-05, "loss": 0.0002, "step": 15317 }, { "epoch": 7.149591598599766, "grad_norm": 0.0306396484375, "learning_rate": 3.74774521263905e-05, "loss": 0.0025, "step": 15318 }, { "epoch": 7.150058343057176, "grad_norm": 0.0086669921875, "learning_rate": 3.7466000965483705e-05, "loss": 0.0001, "step": 15319 }, { "epoch": 7.150525087514586, "grad_norm": 0.01708984375, "learning_rate": 3.745455115100329e-05, "loss": 0.0002, "step": 15320 }, { "epoch": 7.150991831971996, "grad_norm": 0.0301513671875, "learning_rate": 3.7443102683195816e-05, "loss": 0.0002, "step": 15321 }, { "epoch": 7.1514585764294045, "grad_norm": 0.017333984375, "learning_rate": 3.743165556230772e-05, "loss": 0.0002, "step": 15322 }, { "epoch": 7.151925320886814, "grad_norm": 0.0166015625, "learning_rate": 3.742020978858553e-05, "loss": 0.0002, "step": 15323 }, { "epoch": 7.152392065344224, "grad_norm": 0.00921630859375, "learning_rate": 3.740876536227562e-05, "loss": 0.0001, "step": 15324 }, { "epoch": 7.152858809801634, "grad_norm": 0.01141357421875, "learning_rate": 3.7397322283624444e-05, "loss": 0.0001, "step": 15325 }, { "epoch": 7.153325554259043, "grad_norm": 0.0152587890625, "learning_rate": 3.738588055287842e-05, "loss": 0.0002, "step": 15326 }, { "epoch": 7.153792298716453, "grad_norm": 0.01141357421875, "learning_rate": 3.737444017028384e-05, "loss": 0.0002, "step": 15327 }, { "epoch": 7.154259043173862, "grad_norm": 0.00787353515625, "learning_rate": 3.736300113608703e-05, "loss": 0.0002, "step": 15328 }, { "epoch": 7.154725787631272, "grad_norm": 0.017822265625, "learning_rate": 3.73515634505343e-05, "loss": 0.0002, "step": 15329 }, { "epoch": 7.155192532088681, "grad_norm": 0.024169921875, "learning_rate": 3.734012711387195e-05, "loss": 0.0003, "step": 15330 }, { "epoch": 7.155659276546091, "grad_norm": 0.0142822265625, "learning_rate": 3.7328692126346144e-05, "loss": 0.0002, "step": 15331 }, { "epoch": 7.156126021003501, "grad_norm": 0.00970458984375, "learning_rate": 3.7317258488203134e-05, "loss": 0.0001, "step": 15332 }, { "epoch": 7.1565927654609105, "grad_norm": 0.031005859375, "learning_rate": 3.7305826199689144e-05, "loss": 0.0002, "step": 15333 }, { "epoch": 7.157059509918319, "grad_norm": 0.0086669921875, "learning_rate": 3.729439526105023e-05, "loss": 0.0002, "step": 15334 }, { "epoch": 7.157526254375729, "grad_norm": 0.0089111328125, "learning_rate": 3.7282965672532586e-05, "loss": 0.0002, "step": 15335 }, { "epoch": 7.157992998833139, "grad_norm": 0.06201171875, "learning_rate": 3.7271537434382265e-05, "loss": 0.0027, "step": 15336 }, { "epoch": 7.158459743290549, "grad_norm": 0.039794921875, "learning_rate": 3.726011054684533e-05, "loss": 0.0026, "step": 15337 }, { "epoch": 7.158926487747958, "grad_norm": 0.00811767578125, "learning_rate": 3.724868501016785e-05, "loss": 0.0002, "step": 15338 }, { "epoch": 7.1593932322053675, "grad_norm": 0.0089111328125, "learning_rate": 3.723726082459579e-05, "loss": 0.0002, "step": 15339 }, { "epoch": 7.159859976662777, "grad_norm": 0.06298828125, "learning_rate": 3.722583799037512e-05, "loss": 0.0003, "step": 15340 }, { "epoch": 7.160326721120187, "grad_norm": 0.0234375, "learning_rate": 3.721441650775186e-05, "loss": 0.0002, "step": 15341 }, { "epoch": 7.160793465577596, "grad_norm": 0.037841796875, "learning_rate": 3.7202996376971835e-05, "loss": 0.0002, "step": 15342 }, { "epoch": 7.161260210035006, "grad_norm": 0.029052734375, "learning_rate": 3.719157759828097e-05, "loss": 0.0002, "step": 15343 }, { "epoch": 7.161726954492416, "grad_norm": 0.0146484375, "learning_rate": 3.718016017192516e-05, "loss": 0.0002, "step": 15344 }, { "epoch": 7.162193698949825, "grad_norm": 0.035400390625, "learning_rate": 3.716874409815017e-05, "loss": 0.0003, "step": 15345 }, { "epoch": 7.162660443407234, "grad_norm": 0.041015625, "learning_rate": 3.715732937720182e-05, "loss": 0.0022, "step": 15346 }, { "epoch": 7.163127187864644, "grad_norm": 0.0517578125, "learning_rate": 3.7145916009325945e-05, "loss": 0.0022, "step": 15347 }, { "epoch": 7.163593932322054, "grad_norm": 0.059814453125, "learning_rate": 3.713450399476819e-05, "loss": 0.0048, "step": 15348 }, { "epoch": 7.164060676779464, "grad_norm": 0.0078125, "learning_rate": 3.712309333377433e-05, "loss": 0.0002, "step": 15349 }, { "epoch": 7.164527421236873, "grad_norm": 0.00592041015625, "learning_rate": 3.7111684026590054e-05, "loss": 0.0001, "step": 15350 }, { "epoch": 7.164994165694282, "grad_norm": 0.0458984375, "learning_rate": 3.710027607346096e-05, "loss": 0.0003, "step": 15351 }, { "epoch": 7.165460910151692, "grad_norm": 0.015869140625, "learning_rate": 3.7088869474632715e-05, "loss": 0.0002, "step": 15352 }, { "epoch": 7.165927654609101, "grad_norm": 0.0208740234375, "learning_rate": 3.7077464230350956e-05, "loss": 0.0002, "step": 15353 }, { "epoch": 7.166394399066511, "grad_norm": 0.023681640625, "learning_rate": 3.7066060340861153e-05, "loss": 0.0002, "step": 15354 }, { "epoch": 7.166861143523921, "grad_norm": 0.041748046875, "learning_rate": 3.705465780640894e-05, "loss": 0.0002, "step": 15355 }, { "epoch": 7.1673278879813305, "grad_norm": 0.0380859375, "learning_rate": 3.7043256627239754e-05, "loss": 0.0029, "step": 15356 }, { "epoch": 7.167794632438739, "grad_norm": 0.0172119140625, "learning_rate": 3.703185680359911e-05, "loss": 0.0002, "step": 15357 }, { "epoch": 7.168261376896149, "grad_norm": 0.04248046875, "learning_rate": 3.702045833573248e-05, "loss": 0.0002, "step": 15358 }, { "epoch": 7.168728121353559, "grad_norm": 0.01300048828125, "learning_rate": 3.700906122388521e-05, "loss": 0.0002, "step": 15359 }, { "epoch": 7.169194865810969, "grad_norm": 0.00909423828125, "learning_rate": 3.6997665468302755e-05, "loss": 0.0001, "step": 15360 }, { "epoch": 7.169661610268378, "grad_norm": 0.02099609375, "learning_rate": 3.6986271069230506e-05, "loss": 0.0002, "step": 15361 }, { "epoch": 7.1701283547257875, "grad_norm": 0.025146484375, "learning_rate": 3.697487802691372e-05, "loss": 0.0002, "step": 15362 }, { "epoch": 7.170595099183197, "grad_norm": 0.031494140625, "learning_rate": 3.6963486341597716e-05, "loss": 0.0002, "step": 15363 }, { "epoch": 7.171061843640607, "grad_norm": 0.038818359375, "learning_rate": 3.6952096013527845e-05, "loss": 0.0028, "step": 15364 }, { "epoch": 7.171528588098016, "grad_norm": 0.023681640625, "learning_rate": 3.694070704294926e-05, "loss": 0.0002, "step": 15365 }, { "epoch": 7.171995332555426, "grad_norm": 0.029296875, "learning_rate": 3.692931943010722e-05, "loss": 0.0002, "step": 15366 }, { "epoch": 7.172462077012836, "grad_norm": 0.038818359375, "learning_rate": 3.6917933175246954e-05, "loss": 0.0003, "step": 15367 }, { "epoch": 7.172928821470245, "grad_norm": 0.0947265625, "learning_rate": 3.6906548278613525e-05, "loss": 0.0004, "step": 15368 }, { "epoch": 7.173395565927654, "grad_norm": 0.024658203125, "learning_rate": 3.689516474045214e-05, "loss": 0.0002, "step": 15369 }, { "epoch": 7.173862310385064, "grad_norm": 0.0169677734375, "learning_rate": 3.688378256100788e-05, "loss": 0.0002, "step": 15370 }, { "epoch": 7.174329054842474, "grad_norm": 0.06982421875, "learning_rate": 3.68724017405258e-05, "loss": 0.0002, "step": 15371 }, { "epoch": 7.174795799299884, "grad_norm": 0.018798828125, "learning_rate": 3.686102227925093e-05, "loss": 0.0002, "step": 15372 }, { "epoch": 7.1752625437572926, "grad_norm": 0.07470703125, "learning_rate": 3.684964417742836e-05, "loss": 0.0003, "step": 15373 }, { "epoch": 7.175729288214702, "grad_norm": 0.052490234375, "learning_rate": 3.683826743530296e-05, "loss": 0.0019, "step": 15374 }, { "epoch": 7.176196032672112, "grad_norm": 0.03173828125, "learning_rate": 3.682689205311979e-05, "loss": 0.0021, "step": 15375 }, { "epoch": 7.176662777129522, "grad_norm": 0.0458984375, "learning_rate": 3.681551803112373e-05, "loss": 0.0026, "step": 15376 }, { "epoch": 7.177129521586931, "grad_norm": 0.02490234375, "learning_rate": 3.6804145369559626e-05, "loss": 0.0002, "step": 15377 }, { "epoch": 7.177596266044341, "grad_norm": 0.01611328125, "learning_rate": 3.67927740686724e-05, "loss": 0.0002, "step": 15378 }, { "epoch": 7.17806301050175, "grad_norm": 0.0191650390625, "learning_rate": 3.67814041287069e-05, "loss": 0.0002, "step": 15379 }, { "epoch": 7.17852975495916, "grad_norm": 0.00848388671875, "learning_rate": 3.6770035549907875e-05, "loss": 0.0001, "step": 15380 }, { "epoch": 7.178996499416569, "grad_norm": 0.01416015625, "learning_rate": 3.6758668332520154e-05, "loss": 0.0002, "step": 15381 }, { "epoch": 7.179463243873979, "grad_norm": 0.0191650390625, "learning_rate": 3.67473024767885e-05, "loss": 0.0002, "step": 15382 }, { "epoch": 7.179929988331389, "grad_norm": 0.03125, "learning_rate": 3.673593798295757e-05, "loss": 0.0002, "step": 15383 }, { "epoch": 7.1803967327887985, "grad_norm": 0.018310546875, "learning_rate": 3.672457485127209e-05, "loss": 0.0002, "step": 15384 }, { "epoch": 7.180863477246207, "grad_norm": 0.11962890625, "learning_rate": 3.671321308197674e-05, "loss": 0.0006, "step": 15385 }, { "epoch": 7.181330221703617, "grad_norm": 0.054931640625, "learning_rate": 3.67018526753161e-05, "loss": 0.0034, "step": 15386 }, { "epoch": 7.181796966161027, "grad_norm": 0.058349609375, "learning_rate": 3.669049363153485e-05, "loss": 0.0033, "step": 15387 }, { "epoch": 7.182263710618437, "grad_norm": 0.259765625, "learning_rate": 3.6679135950877474e-05, "loss": 0.001, "step": 15388 }, { "epoch": 7.182730455075846, "grad_norm": 0.016357421875, "learning_rate": 3.666777963358855e-05, "loss": 0.0002, "step": 15389 }, { "epoch": 7.1831971995332555, "grad_norm": 0.0211181640625, "learning_rate": 3.665642467991264e-05, "loss": 0.0002, "step": 15390 }, { "epoch": 7.183663943990665, "grad_norm": 0.007537841796875, "learning_rate": 3.664507109009415e-05, "loss": 0.0002, "step": 15391 }, { "epoch": 7.184130688448075, "grad_norm": 0.0380859375, "learning_rate": 3.663371886437758e-05, "loss": 0.0017, "step": 15392 }, { "epoch": 7.184597432905484, "grad_norm": 0.0233154296875, "learning_rate": 3.6622368003007376e-05, "loss": 0.0002, "step": 15393 }, { "epoch": 7.185064177362894, "grad_norm": 0.0196533203125, "learning_rate": 3.661101850622788e-05, "loss": 0.0001, "step": 15394 }, { "epoch": 7.185530921820304, "grad_norm": 0.0123291015625, "learning_rate": 3.659967037428348e-05, "loss": 0.0002, "step": 15395 }, { "epoch": 7.1859976662777125, "grad_norm": 0.140625, "learning_rate": 3.658832360741855e-05, "loss": 0.0004, "step": 15396 }, { "epoch": 7.186464410735122, "grad_norm": 0.034423828125, "learning_rate": 3.657697820587735e-05, "loss": 0.0002, "step": 15397 }, { "epoch": 7.186931155192532, "grad_norm": 0.02734375, "learning_rate": 3.6565634169904164e-05, "loss": 0.0003, "step": 15398 }, { "epoch": 7.187397899649942, "grad_norm": 0.0157470703125, "learning_rate": 3.65542914997433e-05, "loss": 0.0002, "step": 15399 }, { "epoch": 7.187864644107351, "grad_norm": 0.034912109375, "learning_rate": 3.654295019563889e-05, "loss": 0.0003, "step": 15400 }, { "epoch": 7.188331388564761, "grad_norm": 0.017333984375, "learning_rate": 3.653161025783518e-05, "loss": 0.0002, "step": 15401 }, { "epoch": 7.18879813302217, "grad_norm": 0.050537109375, "learning_rate": 3.6520271686576356e-05, "loss": 0.0031, "step": 15402 }, { "epoch": 7.18926487747958, "grad_norm": 0.05322265625, "learning_rate": 3.650893448210647e-05, "loss": 0.0049, "step": 15403 }, { "epoch": 7.189731621936989, "grad_norm": 0.006439208984375, "learning_rate": 3.6497598644669704e-05, "loss": 0.0002, "step": 15404 }, { "epoch": 7.190198366394399, "grad_norm": 0.006866455078125, "learning_rate": 3.6486264174510076e-05, "loss": 0.0002, "step": 15405 }, { "epoch": 7.190665110851809, "grad_norm": 0.0177001953125, "learning_rate": 3.647493107187163e-05, "loss": 0.0002, "step": 15406 }, { "epoch": 7.1911318553092185, "grad_norm": 0.01385498046875, "learning_rate": 3.646359933699845e-05, "loss": 0.0002, "step": 15407 }, { "epoch": 7.191598599766627, "grad_norm": 0.0196533203125, "learning_rate": 3.645226897013443e-05, "loss": 0.0002, "step": 15408 }, { "epoch": 7.192065344224037, "grad_norm": 0.0279541015625, "learning_rate": 3.644093997152357e-05, "loss": 0.0003, "step": 15409 }, { "epoch": 7.192532088681447, "grad_norm": 0.0303955078125, "learning_rate": 3.642961234140982e-05, "loss": 0.0002, "step": 15410 }, { "epoch": 7.192998833138857, "grad_norm": 0.0177001953125, "learning_rate": 3.641828608003702e-05, "loss": 0.0002, "step": 15411 }, { "epoch": 7.193465577596266, "grad_norm": 0.0308837890625, "learning_rate": 3.640696118764907e-05, "loss": 0.0002, "step": 15412 }, { "epoch": 7.1939323220536755, "grad_norm": 0.012939453125, "learning_rate": 3.6395637664489834e-05, "loss": 0.0002, "step": 15413 }, { "epoch": 7.194399066511085, "grad_norm": 0.005523681640625, "learning_rate": 3.638431551080306e-05, "loss": 0.0001, "step": 15414 }, { "epoch": 7.194865810968495, "grad_norm": 0.01708984375, "learning_rate": 3.6372994726832544e-05, "loss": 0.0002, "step": 15415 }, { "epoch": 7.195332555425904, "grad_norm": 0.06591796875, "learning_rate": 3.636167531282209e-05, "loss": 0.0004, "step": 15416 }, { "epoch": 7.195799299883314, "grad_norm": 0.01226806640625, "learning_rate": 3.635035726901533e-05, "loss": 0.0002, "step": 15417 }, { "epoch": 7.196266044340724, "grad_norm": 0.0079345703125, "learning_rate": 3.6339040595656006e-05, "loss": 0.0002, "step": 15418 }, { "epoch": 7.196732788798133, "grad_norm": 0.0240478515625, "learning_rate": 3.6327725292987805e-05, "loss": 0.0002, "step": 15419 }, { "epoch": 7.197199533255542, "grad_norm": 0.04150390625, "learning_rate": 3.631641136125429e-05, "loss": 0.0024, "step": 15420 }, { "epoch": 7.197666277712952, "grad_norm": 0.009033203125, "learning_rate": 3.63050988006991e-05, "loss": 0.0002, "step": 15421 }, { "epoch": 7.198133022170362, "grad_norm": 0.05810546875, "learning_rate": 3.629378761156583e-05, "loss": 0.0028, "step": 15422 }, { "epoch": 7.198599766627772, "grad_norm": 0.05029296875, "learning_rate": 3.6282477794098003e-05, "loss": 0.0023, "step": 15423 }, { "epoch": 7.199066511085181, "grad_norm": 0.01080322265625, "learning_rate": 3.627116934853909e-05, "loss": 0.0002, "step": 15424 }, { "epoch": 7.19953325554259, "grad_norm": 0.0380859375, "learning_rate": 3.6259862275132636e-05, "loss": 0.0018, "step": 15425 }, { "epoch": 7.2, "grad_norm": 0.1669921875, "learning_rate": 3.624855657412204e-05, "loss": 0.0005, "step": 15426 }, { "epoch": 7.20046674445741, "grad_norm": 0.031005859375, "learning_rate": 3.6237252245750755e-05, "loss": 0.0002, "step": 15427 }, { "epoch": 7.200933488914819, "grad_norm": 0.029052734375, "learning_rate": 3.622594929026221e-05, "loss": 0.0002, "step": 15428 }, { "epoch": 7.201400233372229, "grad_norm": 0.017578125, "learning_rate": 3.6214647707899694e-05, "loss": 0.0002, "step": 15429 }, { "epoch": 7.2018669778296385, "grad_norm": 0.01312255859375, "learning_rate": 3.6203347498906595e-05, "loss": 0.0002, "step": 15430 }, { "epoch": 7.202333722287048, "grad_norm": 0.01263427734375, "learning_rate": 3.6192048663526234e-05, "loss": 0.0002, "step": 15431 }, { "epoch": 7.202800466744457, "grad_norm": 0.0303955078125, "learning_rate": 3.618075120200183e-05, "loss": 0.0002, "step": 15432 }, { "epoch": 7.203267211201867, "grad_norm": 0.00830078125, "learning_rate": 3.616945511457665e-05, "loss": 0.0002, "step": 15433 }, { "epoch": 7.203733955659277, "grad_norm": 0.0289306640625, "learning_rate": 3.615816040149398e-05, "loss": 0.0003, "step": 15434 }, { "epoch": 7.204200700116687, "grad_norm": 0.0267333984375, "learning_rate": 3.6146867062996904e-05, "loss": 0.0003, "step": 15435 }, { "epoch": 7.2046674445740955, "grad_norm": 0.01165771484375, "learning_rate": 3.613557509932866e-05, "loss": 0.0002, "step": 15436 }, { "epoch": 7.205134189031505, "grad_norm": 0.10791015625, "learning_rate": 3.612428451073231e-05, "loss": 0.0004, "step": 15437 }, { "epoch": 7.205600933488915, "grad_norm": 0.009521484375, "learning_rate": 3.611299529745098e-05, "loss": 0.0001, "step": 15438 }, { "epoch": 7.206067677946324, "grad_norm": 0.010498046875, "learning_rate": 3.610170745972778e-05, "loss": 0.0002, "step": 15439 }, { "epoch": 7.206534422403734, "grad_norm": 0.021484375, "learning_rate": 3.609042099780568e-05, "loss": 0.0002, "step": 15440 }, { "epoch": 7.207001166861144, "grad_norm": 0.0086669921875, "learning_rate": 3.607913591192774e-05, "loss": 0.0001, "step": 15441 }, { "epoch": 7.207467911318553, "grad_norm": 0.005859375, "learning_rate": 3.606785220233695e-05, "loss": 0.0001, "step": 15442 }, { "epoch": 7.207934655775962, "grad_norm": 0.1787109375, "learning_rate": 3.60565698692762e-05, "loss": 0.0024, "step": 15443 }, { "epoch": 7.208401400233372, "grad_norm": 0.031494140625, "learning_rate": 3.604528891298844e-05, "loss": 0.0017, "step": 15444 }, { "epoch": 7.208868144690782, "grad_norm": 0.072265625, "learning_rate": 3.6034009333716614e-05, "loss": 0.0003, "step": 15445 }, { "epoch": 7.209334889148192, "grad_norm": 0.0106201171875, "learning_rate": 3.60227311317035e-05, "loss": 0.0002, "step": 15446 }, { "epoch": 7.209801633605601, "grad_norm": 0.0155029296875, "learning_rate": 3.6011454307191975e-05, "loss": 0.0001, "step": 15447 }, { "epoch": 7.21026837806301, "grad_norm": 0.08544921875, "learning_rate": 3.600017886042486e-05, "loss": 0.0023, "step": 15448 }, { "epoch": 7.21073512252042, "grad_norm": 0.0625, "learning_rate": 3.5988904791644875e-05, "loss": 0.0004, "step": 15449 }, { "epoch": 7.21120186697783, "grad_norm": 0.007476806640625, "learning_rate": 3.597763210109479e-05, "loss": 0.0001, "step": 15450 }, { "epoch": 7.211668611435239, "grad_norm": 0.2294921875, "learning_rate": 3.596636078901735e-05, "loss": 0.0034, "step": 15451 }, { "epoch": 7.212135355892649, "grad_norm": 0.013916015625, "learning_rate": 3.5955090855655184e-05, "loss": 0.0002, "step": 15452 }, { "epoch": 7.212602100350058, "grad_norm": 0.015380859375, "learning_rate": 3.5943822301250994e-05, "loss": 0.0002, "step": 15453 }, { "epoch": 7.213068844807468, "grad_norm": 0.01458740234375, "learning_rate": 3.593255512604735e-05, "loss": 0.0002, "step": 15454 }, { "epoch": 7.213535589264877, "grad_norm": 0.0673828125, "learning_rate": 3.592128933028689e-05, "loss": 0.0003, "step": 15455 }, { "epoch": 7.214002333722287, "grad_norm": 0.030029296875, "learning_rate": 3.591002491421219e-05, "loss": 0.0002, "step": 15456 }, { "epoch": 7.214469078179697, "grad_norm": 0.048828125, "learning_rate": 3.5898761878065737e-05, "loss": 0.0003, "step": 15457 }, { "epoch": 7.2149358226371065, "grad_norm": 0.0274658203125, "learning_rate": 3.5887500222090045e-05, "loss": 0.0015, "step": 15458 }, { "epoch": 7.215402567094515, "grad_norm": 0.0189208984375, "learning_rate": 3.5876239946527654e-05, "loss": 0.0002, "step": 15459 }, { "epoch": 7.215869311551925, "grad_norm": 0.04345703125, "learning_rate": 3.586498105162093e-05, "loss": 0.0029, "step": 15460 }, { "epoch": 7.216336056009335, "grad_norm": 0.07421875, "learning_rate": 3.5853723537612315e-05, "loss": 0.0004, "step": 15461 }, { "epoch": 7.216802800466745, "grad_norm": 0.060546875, "learning_rate": 3.584246740474425e-05, "loss": 0.0018, "step": 15462 }, { "epoch": 7.217269544924154, "grad_norm": 0.0279541015625, "learning_rate": 3.5831212653259005e-05, "loss": 0.0002, "step": 15463 }, { "epoch": 7.2177362893815635, "grad_norm": 0.03466796875, "learning_rate": 3.581995928339895e-05, "loss": 0.0031, "step": 15464 }, { "epoch": 7.218203033838973, "grad_norm": 0.0167236328125, "learning_rate": 3.580870729540641e-05, "loss": 0.0002, "step": 15465 }, { "epoch": 7.218669778296383, "grad_norm": 0.0184326171875, "learning_rate": 3.579745668952359e-05, "loss": 0.0002, "step": 15466 }, { "epoch": 7.219136522753792, "grad_norm": 0.047607421875, "learning_rate": 3.578620746599277e-05, "loss": 0.0035, "step": 15467 }, { "epoch": 7.219603267211202, "grad_norm": 0.00927734375, "learning_rate": 3.577495962505617e-05, "loss": 0.0002, "step": 15468 }, { "epoch": 7.220070011668612, "grad_norm": 0.01495361328125, "learning_rate": 3.576371316695593e-05, "loss": 0.0002, "step": 15469 }, { "epoch": 7.220536756126021, "grad_norm": 0.01220703125, "learning_rate": 3.575246809193421e-05, "loss": 0.0002, "step": 15470 }, { "epoch": 7.22100350058343, "grad_norm": 0.00567626953125, "learning_rate": 3.57412244002332e-05, "loss": 0.0001, "step": 15471 }, { "epoch": 7.22147024504084, "grad_norm": 0.04638671875, "learning_rate": 3.572998209209486e-05, "loss": 0.0002, "step": 15472 }, { "epoch": 7.22193698949825, "grad_norm": 0.0164794921875, "learning_rate": 3.571874116776132e-05, "loss": 0.0001, "step": 15473 }, { "epoch": 7.222403733955659, "grad_norm": 0.01080322265625, "learning_rate": 3.570750162747463e-05, "loss": 0.0001, "step": 15474 }, { "epoch": 7.222870478413069, "grad_norm": 0.047607421875, "learning_rate": 3.5696263471476724e-05, "loss": 0.0023, "step": 15475 }, { "epoch": 7.223337222870478, "grad_norm": 0.01531982421875, "learning_rate": 3.5685026700009616e-05, "loss": 0.0002, "step": 15476 }, { "epoch": 7.223803967327888, "grad_norm": 0.005523681640625, "learning_rate": 3.5673791313315284e-05, "loss": 0.0001, "step": 15477 }, { "epoch": 7.224270711785298, "grad_norm": 0.021240234375, "learning_rate": 3.566255731163556e-05, "loss": 0.0003, "step": 15478 }, { "epoch": 7.224737456242707, "grad_norm": 0.10107421875, "learning_rate": 3.565132469521235e-05, "loss": 0.0004, "step": 15479 }, { "epoch": 7.225204200700117, "grad_norm": 0.01153564453125, "learning_rate": 3.564009346428756e-05, "loss": 0.0002, "step": 15480 }, { "epoch": 7.2256709451575265, "grad_norm": 0.056640625, "learning_rate": 3.562886361910294e-05, "loss": 0.0003, "step": 15481 }, { "epoch": 7.226137689614935, "grad_norm": 0.014404296875, "learning_rate": 3.561763515990029e-05, "loss": 0.0002, "step": 15482 }, { "epoch": 7.226604434072345, "grad_norm": 0.064453125, "learning_rate": 3.560640808692143e-05, "loss": 0.0005, "step": 15483 }, { "epoch": 7.227071178529755, "grad_norm": 0.013427734375, "learning_rate": 3.559518240040801e-05, "loss": 0.0002, "step": 15484 }, { "epoch": 7.227537922987165, "grad_norm": 0.0142822265625, "learning_rate": 3.5583958100601814e-05, "loss": 0.0002, "step": 15485 }, { "epoch": 7.228004667444574, "grad_norm": 0.0185546875, "learning_rate": 3.5572735187744434e-05, "loss": 0.0002, "step": 15486 }, { "epoch": 7.2284714119019835, "grad_norm": 0.01055908203125, "learning_rate": 3.556151366207755e-05, "loss": 0.0001, "step": 15487 }, { "epoch": 7.228938156359393, "grad_norm": 0.03515625, "learning_rate": 3.55502935238428e-05, "loss": 0.0027, "step": 15488 }, { "epoch": 7.229404900816803, "grad_norm": 0.09521484375, "learning_rate": 3.553907477328171e-05, "loss": 0.0004, "step": 15489 }, { "epoch": 7.229871645274212, "grad_norm": 0.0625, "learning_rate": 3.552785741063586e-05, "loss": 0.0039, "step": 15490 }, { "epoch": 7.230338389731622, "grad_norm": 0.0162353515625, "learning_rate": 3.5516641436146816e-05, "loss": 0.0002, "step": 15491 }, { "epoch": 7.230805134189032, "grad_norm": 0.00933837890625, "learning_rate": 3.550542685005598e-05, "loss": 0.0002, "step": 15492 }, { "epoch": 7.231271878646441, "grad_norm": 0.29296875, "learning_rate": 3.549421365260487e-05, "loss": 0.001, "step": 15493 }, { "epoch": 7.23173862310385, "grad_norm": 0.0301513671875, "learning_rate": 3.5483001844034933e-05, "loss": 0.0003, "step": 15494 }, { "epoch": 7.23220536756126, "grad_norm": 0.0115966796875, "learning_rate": 3.547179142458752e-05, "loss": 0.0002, "step": 15495 }, { "epoch": 7.23267211201867, "grad_norm": 0.1005859375, "learning_rate": 3.546058239450404e-05, "loss": 0.0012, "step": 15496 }, { "epoch": 7.23313885647608, "grad_norm": 0.00762939453125, "learning_rate": 3.544937475402585e-05, "loss": 0.0001, "step": 15497 }, { "epoch": 7.233605600933489, "grad_norm": 0.0147705078125, "learning_rate": 3.543816850339423e-05, "loss": 0.0002, "step": 15498 }, { "epoch": 7.234072345390898, "grad_norm": 0.006744384765625, "learning_rate": 3.5426963642850455e-05, "loss": 0.0001, "step": 15499 }, { "epoch": 7.234539089848308, "grad_norm": 0.00970458984375, "learning_rate": 3.5415760172635836e-05, "loss": 0.0002, "step": 15500 }, { "epoch": 7.235005834305718, "grad_norm": 0.04638671875, "learning_rate": 3.540455809299153e-05, "loss": 0.0003, "step": 15501 }, { "epoch": 7.235472578763127, "grad_norm": 0.00848388671875, "learning_rate": 3.539335740415876e-05, "loss": 0.0001, "step": 15502 }, { "epoch": 7.235939323220537, "grad_norm": 0.015869140625, "learning_rate": 3.5382158106378705e-05, "loss": 0.0002, "step": 15503 }, { "epoch": 7.2364060676779465, "grad_norm": 0.0218505859375, "learning_rate": 3.5370960199892465e-05, "loss": 0.0002, "step": 15504 }, { "epoch": 7.236872812135356, "grad_norm": 0.01434326171875, "learning_rate": 3.5359763684941184e-05, "loss": 0.0002, "step": 15505 }, { "epoch": 7.237339556592765, "grad_norm": 0.259765625, "learning_rate": 3.534856856176587e-05, "loss": 0.0059, "step": 15506 }, { "epoch": 7.237806301050175, "grad_norm": 0.06640625, "learning_rate": 3.533737483060762e-05, "loss": 0.0031, "step": 15507 }, { "epoch": 7.238273045507585, "grad_norm": 0.021728515625, "learning_rate": 3.532618249170746e-05, "loss": 0.0003, "step": 15508 }, { "epoch": 7.238739789964995, "grad_norm": 0.01123046875, "learning_rate": 3.531499154530632e-05, "loss": 0.0002, "step": 15509 }, { "epoch": 7.2392065344224035, "grad_norm": 0.01116943359375, "learning_rate": 3.530380199164518e-05, "loss": 0.0001, "step": 15510 }, { "epoch": 7.239673278879813, "grad_norm": 0.00732421875, "learning_rate": 3.5292613830964994e-05, "loss": 0.0001, "step": 15511 }, { "epoch": 7.240140023337223, "grad_norm": 0.031494140625, "learning_rate": 3.528142706350659e-05, "loss": 0.0002, "step": 15512 }, { "epoch": 7.240606767794633, "grad_norm": 0.01214599609375, "learning_rate": 3.527024168951087e-05, "loss": 0.0002, "step": 15513 }, { "epoch": 7.241073512252042, "grad_norm": 0.011962890625, "learning_rate": 3.525905770921871e-05, "loss": 0.0002, "step": 15514 }, { "epoch": 7.241540256709452, "grad_norm": 0.01513671875, "learning_rate": 3.524787512287082e-05, "loss": 0.0002, "step": 15515 }, { "epoch": 7.242007001166861, "grad_norm": 0.01007080078125, "learning_rate": 3.5236693930708034e-05, "loss": 0.0001, "step": 15516 }, { "epoch": 7.24247374562427, "grad_norm": 0.0390625, "learning_rate": 3.522551413297112e-05, "loss": 0.0002, "step": 15517 }, { "epoch": 7.24294049008168, "grad_norm": 0.01116943359375, "learning_rate": 3.521433572990074e-05, "loss": 0.0002, "step": 15518 }, { "epoch": 7.24340723453909, "grad_norm": 0.051513671875, "learning_rate": 3.520315872173757e-05, "loss": 0.0003, "step": 15519 }, { "epoch": 7.2438739789965, "grad_norm": 0.00811767578125, "learning_rate": 3.5191983108722315e-05, "loss": 0.0002, "step": 15520 }, { "epoch": 7.244340723453909, "grad_norm": 0.01080322265625, "learning_rate": 3.518080889109554e-05, "loss": 0.0002, "step": 15521 }, { "epoch": 7.244807467911318, "grad_norm": 0.01324462890625, "learning_rate": 3.5169636069097866e-05, "loss": 0.0001, "step": 15522 }, { "epoch": 7.245274212368728, "grad_norm": 0.04052734375, "learning_rate": 3.515846464296989e-05, "loss": 0.0003, "step": 15523 }, { "epoch": 7.245740956826138, "grad_norm": 0.134765625, "learning_rate": 3.514729461295208e-05, "loss": 0.0005, "step": 15524 }, { "epoch": 7.246207701283547, "grad_norm": 0.007293701171875, "learning_rate": 3.513612597928497e-05, "loss": 0.0002, "step": 15525 }, { "epoch": 7.246674445740957, "grad_norm": 0.048583984375, "learning_rate": 3.512495874220908e-05, "loss": 0.0003, "step": 15526 }, { "epoch": 7.2471411901983664, "grad_norm": 0.0673828125, "learning_rate": 3.511379290196476e-05, "loss": 0.0003, "step": 15527 }, { "epoch": 7.247607934655776, "grad_norm": 0.023681640625, "learning_rate": 3.510262845879248e-05, "loss": 0.0002, "step": 15528 }, { "epoch": 7.248074679113185, "grad_norm": 0.009521484375, "learning_rate": 3.5091465412932646e-05, "loss": 0.0002, "step": 15529 }, { "epoch": 7.248541423570595, "grad_norm": 0.011474609375, "learning_rate": 3.5080303764625544e-05, "loss": 0.0001, "step": 15530 }, { "epoch": 7.249008168028005, "grad_norm": 0.038330078125, "learning_rate": 3.506914351411154e-05, "loss": 0.0017, "step": 15531 }, { "epoch": 7.2494749124854145, "grad_norm": 0.015380859375, "learning_rate": 3.505798466163094e-05, "loss": 0.0002, "step": 15532 }, { "epoch": 7.249941656942823, "grad_norm": 0.0291748046875, "learning_rate": 3.504682720742395e-05, "loss": 0.0003, "step": 15533 }, { "epoch": 7.250408401400233, "grad_norm": 0.048828125, "learning_rate": 3.503567115173088e-05, "loss": 0.0028, "step": 15534 }, { "epoch": 7.250875145857643, "grad_norm": 0.021728515625, "learning_rate": 3.502451649479185e-05, "loss": 0.0002, "step": 15535 }, { "epoch": 7.251341890315053, "grad_norm": 0.041748046875, "learning_rate": 3.501336323684707e-05, "loss": 0.0003, "step": 15536 }, { "epoch": 7.251808634772462, "grad_norm": 0.01708984375, "learning_rate": 3.5002211378136716e-05, "loss": 0.0002, "step": 15537 }, { "epoch": 7.2522753792298715, "grad_norm": 0.01214599609375, "learning_rate": 3.499106091890084e-05, "loss": 0.0001, "step": 15538 }, { "epoch": 7.252742123687281, "grad_norm": 0.044921875, "learning_rate": 3.4979911859379554e-05, "loss": 0.0002, "step": 15539 }, { "epoch": 7.253208868144691, "grad_norm": 0.00848388671875, "learning_rate": 3.496876419981293e-05, "loss": 0.0002, "step": 15540 }, { "epoch": 7.2536756126021, "grad_norm": 0.01708984375, "learning_rate": 3.495761794044095e-05, "loss": 0.0002, "step": 15541 }, { "epoch": 7.25414235705951, "grad_norm": 0.06640625, "learning_rate": 3.4946473081503605e-05, "loss": 0.0003, "step": 15542 }, { "epoch": 7.25460910151692, "grad_norm": 0.012939453125, "learning_rate": 3.493532962324091e-05, "loss": 0.0002, "step": 15543 }, { "epoch": 7.255075845974329, "grad_norm": 0.00860595703125, "learning_rate": 3.492418756589272e-05, "loss": 0.0002, "step": 15544 }, { "epoch": 7.255075845974329, "eval_loss": 2.3933210372924805, "eval_runtime": 84.4828, "eval_samples_per_second": 21.353, "eval_steps_per_second": 2.675, "step": 15544 }, { "epoch": 7.255542590431738, "grad_norm": 0.0115966796875, "learning_rate": 3.491304690969899e-05, "loss": 0.0002, "step": 15545 }, { "epoch": 7.256009334889148, "grad_norm": 0.0157470703125, "learning_rate": 3.490190765489959e-05, "loss": 0.0002, "step": 15546 }, { "epoch": 7.256476079346558, "grad_norm": 0.010498046875, "learning_rate": 3.4890769801734327e-05, "loss": 0.0002, "step": 15547 }, { "epoch": 7.256942823803968, "grad_norm": 0.0184326171875, "learning_rate": 3.487963335044302e-05, "loss": 0.0002, "step": 15548 }, { "epoch": 7.257409568261377, "grad_norm": 0.1435546875, "learning_rate": 3.48684983012655e-05, "loss": 0.0004, "step": 15549 }, { "epoch": 7.257876312718786, "grad_norm": 0.01806640625, "learning_rate": 3.485736465444144e-05, "loss": 0.0002, "step": 15550 }, { "epoch": 7.258343057176196, "grad_norm": 0.00836181640625, "learning_rate": 3.484623241021059e-05, "loss": 0.0002, "step": 15551 }, { "epoch": 7.258809801633606, "grad_norm": 0.044677734375, "learning_rate": 3.483510156881269e-05, "loss": 0.0003, "step": 15552 }, { "epoch": 7.259276546091015, "grad_norm": 0.0126953125, "learning_rate": 3.482397213048732e-05, "loss": 0.0001, "step": 15553 }, { "epoch": 7.259743290548425, "grad_norm": 0.0478515625, "learning_rate": 3.481284409547417e-05, "loss": 0.0003, "step": 15554 }, { "epoch": 7.2602100350058345, "grad_norm": 0.041015625, "learning_rate": 3.480171746401278e-05, "loss": 0.0002, "step": 15555 }, { "epoch": 7.260676779463244, "grad_norm": 0.00799560546875, "learning_rate": 3.4790592236342755e-05, "loss": 0.0002, "step": 15556 }, { "epoch": 7.261143523920653, "grad_norm": 0.0673828125, "learning_rate": 3.4779468412703665e-05, "loss": 0.0002, "step": 15557 }, { "epoch": 7.261610268378063, "grad_norm": 0.0185546875, "learning_rate": 3.476834599333495e-05, "loss": 0.0001, "step": 15558 }, { "epoch": 7.262077012835473, "grad_norm": 0.0576171875, "learning_rate": 3.475722497847612e-05, "loss": 0.0022, "step": 15559 }, { "epoch": 7.262543757292882, "grad_norm": 0.0228271484375, "learning_rate": 3.4746105368366656e-05, "loss": 0.0002, "step": 15560 }, { "epoch": 7.2630105017502915, "grad_norm": 0.0067138671875, "learning_rate": 3.473498716324591e-05, "loss": 0.0002, "step": 15561 }, { "epoch": 7.263477246207701, "grad_norm": 0.006805419921875, "learning_rate": 3.472387036335331e-05, "loss": 0.0001, "step": 15562 }, { "epoch": 7.263943990665111, "grad_norm": 0.01092529296875, "learning_rate": 3.471275496892824e-05, "loss": 0.0002, "step": 15563 }, { "epoch": 7.264410735122521, "grad_norm": 0.1513671875, "learning_rate": 3.470164098020995e-05, "loss": 0.0004, "step": 15564 }, { "epoch": 7.26487747957993, "grad_norm": 0.017578125, "learning_rate": 3.4690528397437785e-05, "loss": 0.0002, "step": 15565 }, { "epoch": 7.26534422403734, "grad_norm": 0.0189208984375, "learning_rate": 3.467941722085107e-05, "loss": 0.0002, "step": 15566 }, { "epoch": 7.265810968494749, "grad_norm": 0.0074462890625, "learning_rate": 3.466830745068892e-05, "loss": 0.0001, "step": 15567 }, { "epoch": 7.266277712952158, "grad_norm": 0.049560546875, "learning_rate": 3.4657199087190585e-05, "loss": 0.0002, "step": 15568 }, { "epoch": 7.266744457409568, "grad_norm": 0.0654296875, "learning_rate": 3.464609213059529e-05, "loss": 0.0002, "step": 15569 }, { "epoch": 7.267211201866978, "grad_norm": 0.01190185546875, "learning_rate": 3.463498658114211e-05, "loss": 0.0002, "step": 15570 }, { "epoch": 7.267677946324388, "grad_norm": 0.0135498046875, "learning_rate": 3.462388243907019e-05, "loss": 0.0002, "step": 15571 }, { "epoch": 7.268144690781797, "grad_norm": 0.013671875, "learning_rate": 3.461277970461866e-05, "loss": 0.0002, "step": 15572 }, { "epoch": 7.268611435239206, "grad_norm": 0.01263427734375, "learning_rate": 3.460167837802648e-05, "loss": 0.0002, "step": 15573 }, { "epoch": 7.269078179696616, "grad_norm": 0.01141357421875, "learning_rate": 3.459057845953274e-05, "loss": 0.0001, "step": 15574 }, { "epoch": 7.269544924154026, "grad_norm": 0.0174560546875, "learning_rate": 3.457947994937645e-05, "loss": 0.0002, "step": 15575 }, { "epoch": 7.270011668611435, "grad_norm": 0.037353515625, "learning_rate": 3.456838284779651e-05, "loss": 0.0002, "step": 15576 }, { "epoch": 7.270478413068845, "grad_norm": 0.049560546875, "learning_rate": 3.4557287155031884e-05, "loss": 0.0026, "step": 15577 }, { "epoch": 7.2709451575262545, "grad_norm": 0.008056640625, "learning_rate": 3.4546192871321505e-05, "loss": 0.0002, "step": 15578 }, { "epoch": 7.271411901983664, "grad_norm": 0.038818359375, "learning_rate": 3.4535099996904183e-05, "loss": 0.0002, "step": 15579 }, { "epoch": 7.271878646441073, "grad_norm": 0.197265625, "learning_rate": 3.452400853201879e-05, "loss": 0.0005, "step": 15580 }, { "epoch": 7.272345390898483, "grad_norm": 0.046875, "learning_rate": 3.4512918476904176e-05, "loss": 0.0004, "step": 15581 }, { "epoch": 7.272812135355893, "grad_norm": 0.0517578125, "learning_rate": 3.450182983179906e-05, "loss": 0.0003, "step": 15582 }, { "epoch": 7.273278879813303, "grad_norm": 0.08984375, "learning_rate": 3.449074259694224e-05, "loss": 0.0003, "step": 15583 }, { "epoch": 7.2737456242707115, "grad_norm": 0.01031494140625, "learning_rate": 3.447965677257239e-05, "loss": 0.0002, "step": 15584 }, { "epoch": 7.274212368728121, "grad_norm": 0.083984375, "learning_rate": 3.4468572358928233e-05, "loss": 0.0003, "step": 15585 }, { "epoch": 7.274679113185531, "grad_norm": 0.0157470703125, "learning_rate": 3.445748935624844e-05, "loss": 0.0002, "step": 15586 }, { "epoch": 7.275145857642941, "grad_norm": 0.0142822265625, "learning_rate": 3.4446407764771596e-05, "loss": 0.0002, "step": 15587 }, { "epoch": 7.27561260210035, "grad_norm": 0.0380859375, "learning_rate": 3.443532758473632e-05, "loss": 0.0022, "step": 15588 }, { "epoch": 7.27607934655776, "grad_norm": 0.00982666015625, "learning_rate": 3.442424881638123e-05, "loss": 0.0002, "step": 15589 }, { "epoch": 7.276546091015169, "grad_norm": 0.0595703125, "learning_rate": 3.4413171459944784e-05, "loss": 0.0024, "step": 15590 }, { "epoch": 7.277012835472579, "grad_norm": 0.021728515625, "learning_rate": 3.440209551566552e-05, "loss": 0.0002, "step": 15591 }, { "epoch": 7.277479579929988, "grad_norm": 0.01239013671875, "learning_rate": 3.439102098378196e-05, "loss": 0.0002, "step": 15592 }, { "epoch": 7.277946324387398, "grad_norm": 0.00921630859375, "learning_rate": 3.437994786453248e-05, "loss": 0.0002, "step": 15593 }, { "epoch": 7.278413068844808, "grad_norm": 0.009765625, "learning_rate": 3.436887615815553e-05, "loss": 0.0002, "step": 15594 }, { "epoch": 7.2788798133022174, "grad_norm": 0.00885009765625, "learning_rate": 3.4357805864889525e-05, "loss": 0.0001, "step": 15595 }, { "epoch": 7.279346557759626, "grad_norm": 0.0185546875, "learning_rate": 3.4346736984972764e-05, "loss": 0.0002, "step": 15596 }, { "epoch": 7.279813302217036, "grad_norm": 0.0113525390625, "learning_rate": 3.4335669518643585e-05, "loss": 0.0002, "step": 15597 }, { "epoch": 7.280280046674446, "grad_norm": 0.0126953125, "learning_rate": 3.4324603466140345e-05, "loss": 0.0002, "step": 15598 }, { "epoch": 7.280746791131856, "grad_norm": 0.06884765625, "learning_rate": 3.4313538827701216e-05, "loss": 0.0044, "step": 15599 }, { "epoch": 7.281213535589265, "grad_norm": 0.0250244140625, "learning_rate": 3.430247560356449e-05, "loss": 0.0003, "step": 15600 }, { "epoch": 7.2816802800466744, "grad_norm": 0.05322265625, "learning_rate": 3.429141379396838e-05, "loss": 0.003, "step": 15601 }, { "epoch": 7.282147024504084, "grad_norm": 0.01904296875, "learning_rate": 3.4280353399151e-05, "loss": 0.0001, "step": 15602 }, { "epoch": 7.282613768961493, "grad_norm": 0.04736328125, "learning_rate": 3.4269294419350573e-05, "loss": 0.0006, "step": 15603 }, { "epoch": 7.283080513418903, "grad_norm": 0.0274658203125, "learning_rate": 3.425823685480513e-05, "loss": 0.0002, "step": 15604 }, { "epoch": 7.283547257876313, "grad_norm": 0.01116943359375, "learning_rate": 3.4247180705752776e-05, "loss": 0.0002, "step": 15605 }, { "epoch": 7.2840140023337225, "grad_norm": 0.010986328125, "learning_rate": 3.423612597243162e-05, "loss": 0.0002, "step": 15606 }, { "epoch": 7.2844807467911314, "grad_norm": 0.01226806640625, "learning_rate": 3.42250726550796e-05, "loss": 0.0002, "step": 15607 }, { "epoch": 7.284947491248541, "grad_norm": 0.1376953125, "learning_rate": 3.421402075393476e-05, "loss": 0.0005, "step": 15608 }, { "epoch": 7.285414235705951, "grad_norm": 0.1416015625, "learning_rate": 3.420297026923507e-05, "loss": 0.0005, "step": 15609 }, { "epoch": 7.285880980163361, "grad_norm": 0.0108642578125, "learning_rate": 3.4191921201218394e-05, "loss": 0.0002, "step": 15610 }, { "epoch": 7.28634772462077, "grad_norm": 0.029541015625, "learning_rate": 3.418087355012266e-05, "loss": 0.0002, "step": 15611 }, { "epoch": 7.2868144690781795, "grad_norm": 0.029296875, "learning_rate": 3.41698273161858e-05, "loss": 0.0002, "step": 15612 }, { "epoch": 7.287281213535589, "grad_norm": 0.036865234375, "learning_rate": 3.4158782499645545e-05, "loss": 0.0003, "step": 15613 }, { "epoch": 7.287747957992999, "grad_norm": 0.019775390625, "learning_rate": 3.414773910073981e-05, "loss": 0.0002, "step": 15614 }, { "epoch": 7.288214702450408, "grad_norm": 0.0267333984375, "learning_rate": 3.4136697119706294e-05, "loss": 0.0002, "step": 15615 }, { "epoch": 7.288681446907818, "grad_norm": 0.00946044921875, "learning_rate": 3.4125656556782746e-05, "loss": 0.0002, "step": 15616 }, { "epoch": 7.289148191365228, "grad_norm": 0.01055908203125, "learning_rate": 3.4114617412206903e-05, "loss": 0.0002, "step": 15617 }, { "epoch": 7.289614935822637, "grad_norm": 0.05859375, "learning_rate": 3.410357968621648e-05, "loss": 0.0002, "step": 15618 }, { "epoch": 7.290081680280046, "grad_norm": 0.02099609375, "learning_rate": 3.409254337904906e-05, "loss": 0.0002, "step": 15619 }, { "epoch": 7.290548424737456, "grad_norm": 0.032958984375, "learning_rate": 3.4081508490942306e-05, "loss": 0.002, "step": 15620 }, { "epoch": 7.291015169194866, "grad_norm": 0.022216796875, "learning_rate": 3.4070475022133855e-05, "loss": 0.0004, "step": 15621 }, { "epoch": 7.291481913652276, "grad_norm": 0.0177001953125, "learning_rate": 3.405944297286119e-05, "loss": 0.0002, "step": 15622 }, { "epoch": 7.291948658109685, "grad_norm": 0.032470703125, "learning_rate": 3.404841234336187e-05, "loss": 0.0015, "step": 15623 }, { "epoch": 7.292415402567094, "grad_norm": 0.006988525390625, "learning_rate": 3.403738313387345e-05, "loss": 0.0001, "step": 15624 }, { "epoch": 7.292882147024504, "grad_norm": 0.0291748046875, "learning_rate": 3.402635534463332e-05, "loss": 0.0003, "step": 15625 }, { "epoch": 7.293348891481914, "grad_norm": 0.05517578125, "learning_rate": 3.401532897587897e-05, "loss": 0.0021, "step": 15626 }, { "epoch": 7.293815635939323, "grad_norm": 0.0185546875, "learning_rate": 3.4004304027847814e-05, "loss": 0.0002, "step": 15627 }, { "epoch": 7.294282380396733, "grad_norm": 0.0272216796875, "learning_rate": 3.399328050077719e-05, "loss": 0.0002, "step": 15628 }, { "epoch": 7.2947491248541425, "grad_norm": 0.05859375, "learning_rate": 3.398225839490447e-05, "loss": 0.0028, "step": 15629 }, { "epoch": 7.295215869311552, "grad_norm": 0.01055908203125, "learning_rate": 3.3971237710467017e-05, "loss": 0.0002, "step": 15630 }, { "epoch": 7.295682613768961, "grad_norm": 0.048095703125, "learning_rate": 3.396021844770204e-05, "loss": 0.0003, "step": 15631 }, { "epoch": 7.296149358226371, "grad_norm": 0.060791015625, "learning_rate": 3.3949200606846865e-05, "loss": 0.0044, "step": 15632 }, { "epoch": 7.296616102683781, "grad_norm": 0.027587890625, "learning_rate": 3.3938184188138644e-05, "loss": 0.0002, "step": 15633 }, { "epoch": 7.297082847141191, "grad_norm": 0.022216796875, "learning_rate": 3.392716919181462e-05, "loss": 0.0002, "step": 15634 }, { "epoch": 7.2975495915985995, "grad_norm": 0.0111083984375, "learning_rate": 3.391615561811199e-05, "loss": 0.0002, "step": 15635 }, { "epoch": 7.298016336056009, "grad_norm": 0.07958984375, "learning_rate": 3.390514346726782e-05, "loss": 0.0033, "step": 15636 }, { "epoch": 7.298483080513419, "grad_norm": 0.057861328125, "learning_rate": 3.389413273951924e-05, "loss": 0.0039, "step": 15637 }, { "epoch": 7.298949824970828, "grad_norm": 0.01275634765625, "learning_rate": 3.388312343510336e-05, "loss": 0.0002, "step": 15638 }, { "epoch": 7.299416569428238, "grad_norm": 0.0458984375, "learning_rate": 3.387211555425715e-05, "loss": 0.0021, "step": 15639 }, { "epoch": 7.299883313885648, "grad_norm": 0.017822265625, "learning_rate": 3.386110909721767e-05, "loss": 0.0002, "step": 15640 }, { "epoch": 7.300350058343057, "grad_norm": 0.033203125, "learning_rate": 3.3850104064221924e-05, "loss": 0.0003, "step": 15641 }, { "epoch": 7.300816802800467, "grad_norm": 0.01202392578125, "learning_rate": 3.38391004555068e-05, "loss": 0.0002, "step": 15642 }, { "epoch": 7.301283547257876, "grad_norm": 0.039794921875, "learning_rate": 3.382809827130925e-05, "loss": 0.002, "step": 15643 }, { "epoch": 7.301750291715286, "grad_norm": 0.0184326171875, "learning_rate": 3.38170975118662e-05, "loss": 0.0002, "step": 15644 }, { "epoch": 7.302217036172696, "grad_norm": 0.0274658203125, "learning_rate": 3.3806098177414435e-05, "loss": 0.0017, "step": 15645 }, { "epoch": 7.302683780630105, "grad_norm": 0.01318359375, "learning_rate": 3.3795100268190815e-05, "loss": 0.0002, "step": 15646 }, { "epoch": 7.303150525087514, "grad_norm": 0.01287841796875, "learning_rate": 3.378410378443219e-05, "loss": 0.0002, "step": 15647 }, { "epoch": 7.303617269544924, "grad_norm": 0.01123046875, "learning_rate": 3.377310872637524e-05, "loss": 0.0002, "step": 15648 }, { "epoch": 7.304084014002334, "grad_norm": 0.021728515625, "learning_rate": 3.376211509425673e-05, "loss": 0.0002, "step": 15649 }, { "epoch": 7.304550758459743, "grad_norm": 0.04150390625, "learning_rate": 3.375112288831341e-05, "loss": 0.0003, "step": 15650 }, { "epoch": 7.305017502917153, "grad_norm": 0.19921875, "learning_rate": 3.374013210878189e-05, "loss": 0.0006, "step": 15651 }, { "epoch": 7.3054842473745625, "grad_norm": 0.058349609375, "learning_rate": 3.372914275589887e-05, "loss": 0.0028, "step": 15652 }, { "epoch": 7.305950991831972, "grad_norm": 0.0284423828125, "learning_rate": 3.3718154829900906e-05, "loss": 0.0002, "step": 15653 }, { "epoch": 7.306417736289381, "grad_norm": 0.0111083984375, "learning_rate": 3.370716833102461e-05, "loss": 0.0002, "step": 15654 }, { "epoch": 7.306884480746791, "grad_norm": 0.0111083984375, "learning_rate": 3.3696183259506566e-05, "loss": 0.0002, "step": 15655 }, { "epoch": 7.307351225204201, "grad_norm": 0.0093994140625, "learning_rate": 3.368519961558322e-05, "loss": 0.0002, "step": 15656 }, { "epoch": 7.307817969661611, "grad_norm": 0.0230712890625, "learning_rate": 3.367421739949111e-05, "loss": 0.0002, "step": 15657 }, { "epoch": 7.3082847141190195, "grad_norm": 0.031494140625, "learning_rate": 3.3663236611466734e-05, "loss": 0.0002, "step": 15658 }, { "epoch": 7.308751458576429, "grad_norm": 0.01025390625, "learning_rate": 3.3652257251746424e-05, "loss": 0.0002, "step": 15659 }, { "epoch": 7.309218203033839, "grad_norm": 0.0093994140625, "learning_rate": 3.364127932056664e-05, "loss": 0.0002, "step": 15660 }, { "epoch": 7.309684947491249, "grad_norm": 0.01043701171875, "learning_rate": 3.3630302818163775e-05, "loss": 0.0002, "step": 15661 }, { "epoch": 7.310151691948658, "grad_norm": 0.01336669921875, "learning_rate": 3.361932774477412e-05, "loss": 0.0002, "step": 15662 }, { "epoch": 7.310618436406068, "grad_norm": 0.0244140625, "learning_rate": 3.3608354100633954e-05, "loss": 0.0002, "step": 15663 }, { "epoch": 7.311085180863477, "grad_norm": 0.01416015625, "learning_rate": 3.359738188597963e-05, "loss": 0.0002, "step": 15664 }, { "epoch": 7.311551925320887, "grad_norm": 0.0218505859375, "learning_rate": 3.358641110104731e-05, "loss": 0.002, "step": 15665 }, { "epoch": 7.312018669778296, "grad_norm": 0.0286865234375, "learning_rate": 3.357544174607324e-05, "loss": 0.0002, "step": 15666 }, { "epoch": 7.312485414235706, "grad_norm": 0.00921630859375, "learning_rate": 3.356447382129365e-05, "loss": 0.0002, "step": 15667 }, { "epoch": 7.312952158693116, "grad_norm": 0.00946044921875, "learning_rate": 3.355350732694462e-05, "loss": 0.0002, "step": 15668 }, { "epoch": 7.3134189031505255, "grad_norm": 0.134765625, "learning_rate": 3.3542542263262286e-05, "loss": 0.0003, "step": 15669 }, { "epoch": 7.313885647607934, "grad_norm": 0.009033203125, "learning_rate": 3.353157863048281e-05, "loss": 0.0002, "step": 15670 }, { "epoch": 7.314352392065344, "grad_norm": 0.05712890625, "learning_rate": 3.3520616428842145e-05, "loss": 0.0003, "step": 15671 }, { "epoch": 7.314819136522754, "grad_norm": 0.030029296875, "learning_rate": 3.350965565857637e-05, "loss": 0.0003, "step": 15672 }, { "epoch": 7.315285880980164, "grad_norm": 0.06884765625, "learning_rate": 3.349869631992152e-05, "loss": 0.0003, "step": 15673 }, { "epoch": 7.315752625437573, "grad_norm": 0.051513671875, "learning_rate": 3.348773841311348e-05, "loss": 0.0003, "step": 15674 }, { "epoch": 7.3162193698949824, "grad_norm": 0.0262451171875, "learning_rate": 3.3476781938388235e-05, "loss": 0.0002, "step": 15675 }, { "epoch": 7.316686114352392, "grad_norm": 0.0113525390625, "learning_rate": 3.346582689598171e-05, "loss": 0.0002, "step": 15676 }, { "epoch": 7.317152858809802, "grad_norm": 0.0164794921875, "learning_rate": 3.345487328612973e-05, "loss": 0.0002, "step": 15677 }, { "epoch": 7.317619603267211, "grad_norm": 0.0289306640625, "learning_rate": 3.3443921109068154e-05, "loss": 0.0002, "step": 15678 }, { "epoch": 7.318086347724621, "grad_norm": 0.01214599609375, "learning_rate": 3.3432970365032834e-05, "loss": 0.0002, "step": 15679 }, { "epoch": 7.3185530921820305, "grad_norm": 0.006439208984375, "learning_rate": 3.342202105425949e-05, "loss": 0.0001, "step": 15680 }, { "epoch": 7.3190198366394394, "grad_norm": 0.02392578125, "learning_rate": 3.34110731769839e-05, "loss": 0.0002, "step": 15681 }, { "epoch": 7.319486581096849, "grad_norm": 0.04052734375, "learning_rate": 3.340012673344183e-05, "loss": 0.0027, "step": 15682 }, { "epoch": 7.319953325554259, "grad_norm": 0.0159912109375, "learning_rate": 3.338918172386888e-05, "loss": 0.0002, "step": 15683 }, { "epoch": 7.320420070011669, "grad_norm": 0.03466796875, "learning_rate": 3.337823814850078e-05, "loss": 0.0021, "step": 15684 }, { "epoch": 7.320886814469079, "grad_norm": 0.00787353515625, "learning_rate": 3.3367296007573115e-05, "loss": 0.0001, "step": 15685 }, { "epoch": 7.3213535589264875, "grad_norm": 0.0179443359375, "learning_rate": 3.335635530132148e-05, "loss": 0.0002, "step": 15686 }, { "epoch": 7.321820303383897, "grad_norm": 0.048828125, "learning_rate": 3.33454160299815e-05, "loss": 0.0022, "step": 15687 }, { "epoch": 7.322287047841307, "grad_norm": 0.09912109375, "learning_rate": 3.333447819378864e-05, "loss": 0.0069, "step": 15688 }, { "epoch": 7.322753792298716, "grad_norm": 0.004547119140625, "learning_rate": 3.332354179297843e-05, "loss": 0.0001, "step": 15689 }, { "epoch": 7.323220536756126, "grad_norm": 0.00970458984375, "learning_rate": 3.331260682778637e-05, "loss": 0.0002, "step": 15690 }, { "epoch": 7.323687281213536, "grad_norm": 0.33984375, "learning_rate": 3.3301673298447844e-05, "loss": 0.0008, "step": 15691 }, { "epoch": 7.324154025670945, "grad_norm": 0.0223388671875, "learning_rate": 3.3290741205198294e-05, "loss": 0.0002, "step": 15692 }, { "epoch": 7.324620770128354, "grad_norm": 0.0150146484375, "learning_rate": 3.327981054827314e-05, "loss": 0.0002, "step": 15693 }, { "epoch": 7.325087514585764, "grad_norm": 0.010986328125, "learning_rate": 3.3268881327907654e-05, "loss": 0.0002, "step": 15694 }, { "epoch": 7.325554259043174, "grad_norm": 0.007080078125, "learning_rate": 3.3257953544337196e-05, "loss": 0.0001, "step": 15695 }, { "epoch": 7.326021003500584, "grad_norm": 0.011962890625, "learning_rate": 3.324702719779709e-05, "loss": 0.0002, "step": 15696 }, { "epoch": 7.326487747957993, "grad_norm": 0.01068115234375, "learning_rate": 3.3236102288522506e-05, "loss": 0.0001, "step": 15697 }, { "epoch": 7.326954492415402, "grad_norm": 0.016845703125, "learning_rate": 3.3225178816748715e-05, "loss": 0.0002, "step": 15698 }, { "epoch": 7.327421236872812, "grad_norm": 0.01190185546875, "learning_rate": 3.321425678271095e-05, "loss": 0.0002, "step": 15699 }, { "epoch": 7.327887981330222, "grad_norm": 0.010009765625, "learning_rate": 3.3203336186644306e-05, "loss": 0.0002, "step": 15700 }, { "epoch": 7.328354725787631, "grad_norm": 0.006195068359375, "learning_rate": 3.319241702878397e-05, "loss": 0.0001, "step": 15701 }, { "epoch": 7.328821470245041, "grad_norm": 0.0693359375, "learning_rate": 3.318149930936499e-05, "loss": 0.0058, "step": 15702 }, { "epoch": 7.3292882147024505, "grad_norm": 0.1328125, "learning_rate": 3.317058302862248e-05, "loss": 0.0003, "step": 15703 }, { "epoch": 7.32975495915986, "grad_norm": 0.0198974609375, "learning_rate": 3.315966818679148e-05, "loss": 0.0002, "step": 15704 }, { "epoch": 7.330221703617269, "grad_norm": 0.055908203125, "learning_rate": 3.3148754784106976e-05, "loss": 0.0003, "step": 15705 }, { "epoch": 7.330688448074679, "grad_norm": 0.01495361328125, "learning_rate": 3.313784282080393e-05, "loss": 0.0002, "step": 15706 }, { "epoch": 7.331155192532089, "grad_norm": 0.00927734375, "learning_rate": 3.3126932297117356e-05, "loss": 0.0001, "step": 15707 }, { "epoch": 7.331621936989499, "grad_norm": 0.059326171875, "learning_rate": 3.3116023213282085e-05, "loss": 0.0015, "step": 15708 }, { "epoch": 7.3320886814469075, "grad_norm": 0.0211181640625, "learning_rate": 3.310511556953305e-05, "loss": 0.0002, "step": 15709 }, { "epoch": 7.332555425904317, "grad_norm": 0.054443359375, "learning_rate": 3.309420936610513e-05, "loss": 0.0002, "step": 15710 }, { "epoch": 7.333022170361727, "grad_norm": 0.01397705078125, "learning_rate": 3.308330460323312e-05, "loss": 0.0001, "step": 15711 }, { "epoch": 7.333488914819137, "grad_norm": 0.2412109375, "learning_rate": 3.307240128115178e-05, "loss": 0.0005, "step": 15712 }, { "epoch": 7.333955659276546, "grad_norm": 0.050048828125, "learning_rate": 3.306149940009592e-05, "loss": 0.0033, "step": 15713 }, { "epoch": 7.334422403733956, "grad_norm": 0.0166015625, "learning_rate": 3.3050598960300205e-05, "loss": 0.0002, "step": 15714 }, { "epoch": 7.334889148191365, "grad_norm": 0.0164794921875, "learning_rate": 3.303969996199939e-05, "loss": 0.0002, "step": 15715 }, { "epoch": 7.335355892648775, "grad_norm": 0.0247802734375, "learning_rate": 3.302880240542816e-05, "loss": 0.0002, "step": 15716 }, { "epoch": 7.335822637106184, "grad_norm": 0.040771484375, "learning_rate": 3.301790629082109e-05, "loss": 0.0023, "step": 15717 }, { "epoch": 7.336289381563594, "grad_norm": 0.01129150390625, "learning_rate": 3.3007011618412806e-05, "loss": 0.0002, "step": 15718 }, { "epoch": 7.336756126021004, "grad_norm": 0.01611328125, "learning_rate": 3.2996118388437936e-05, "loss": 0.0002, "step": 15719 }, { "epoch": 7.3372228704784135, "grad_norm": 0.01055908203125, "learning_rate": 3.2985226601130936e-05, "loss": 0.0001, "step": 15720 }, { "epoch": 7.337689614935822, "grad_norm": 0.005523681640625, "learning_rate": 3.297433625672637e-05, "loss": 0.0001, "step": 15721 }, { "epoch": 7.338156359393232, "grad_norm": 0.014892578125, "learning_rate": 3.296344735545873e-05, "loss": 0.0002, "step": 15722 }, { "epoch": 7.338623103850642, "grad_norm": 0.011474609375, "learning_rate": 3.295255989756243e-05, "loss": 0.0002, "step": 15723 }, { "epoch": 7.339089848308051, "grad_norm": 0.037353515625, "learning_rate": 3.294167388327189e-05, "loss": 0.0002, "step": 15724 }, { "epoch": 7.339556592765461, "grad_norm": 0.0240478515625, "learning_rate": 3.293078931282156e-05, "loss": 0.0002, "step": 15725 }, { "epoch": 7.3400233372228705, "grad_norm": 0.04150390625, "learning_rate": 3.2919906186445714e-05, "loss": 0.0017, "step": 15726 }, { "epoch": 7.34049008168028, "grad_norm": 0.0167236328125, "learning_rate": 3.2909024504378716e-05, "loss": 0.0002, "step": 15727 }, { "epoch": 7.34095682613769, "grad_norm": 0.00994873046875, "learning_rate": 3.2898144266854894e-05, "loss": 0.0002, "step": 15728 }, { "epoch": 7.341423570595099, "grad_norm": 0.01434326171875, "learning_rate": 3.2887265474108434e-05, "loss": 0.0002, "step": 15729 }, { "epoch": 7.341890315052509, "grad_norm": 0.08984375, "learning_rate": 3.287638812637361e-05, "loss": 0.0003, "step": 15730 }, { "epoch": 7.342357059509919, "grad_norm": 0.00897216796875, "learning_rate": 3.286551222388468e-05, "loss": 0.0002, "step": 15731 }, { "epoch": 7.3428238039673275, "grad_norm": 0.01116943359375, "learning_rate": 3.285463776687571e-05, "loss": 0.0002, "step": 15732 }, { "epoch": 7.343290548424737, "grad_norm": 0.045654296875, "learning_rate": 3.284376475558092e-05, "loss": 0.0016, "step": 15733 }, { "epoch": 7.343757292882147, "grad_norm": 0.0203857421875, "learning_rate": 3.2832893190234346e-05, "loss": 0.0002, "step": 15734 }, { "epoch": 7.344224037339557, "grad_norm": 0.0673828125, "learning_rate": 3.282202307107012e-05, "loss": 0.0028, "step": 15735 }, { "epoch": 7.344690781796966, "grad_norm": 0.0252685546875, "learning_rate": 3.2811154398322295e-05, "loss": 0.0002, "step": 15736 }, { "epoch": 7.345157526254376, "grad_norm": 0.134765625, "learning_rate": 3.2800287172224834e-05, "loss": 0.0024, "step": 15737 }, { "epoch": 7.345624270711785, "grad_norm": 0.053955078125, "learning_rate": 3.2789421393011746e-05, "loss": 0.0003, "step": 15738 }, { "epoch": 7.346091015169195, "grad_norm": 0.0198974609375, "learning_rate": 3.2778557060917025e-05, "loss": 0.0002, "step": 15739 }, { "epoch": 7.346557759626604, "grad_norm": 0.10205078125, "learning_rate": 3.276769417617452e-05, "loss": 0.0004, "step": 15740 }, { "epoch": 7.347024504084014, "grad_norm": 0.0252685546875, "learning_rate": 3.275683273901815e-05, "loss": 0.0002, "step": 15741 }, { "epoch": 7.347491248541424, "grad_norm": 0.00799560546875, "learning_rate": 3.274597274968182e-05, "loss": 0.0001, "step": 15742 }, { "epoch": 7.3479579929988335, "grad_norm": 0.025634765625, "learning_rate": 3.273511420839927e-05, "loss": 0.0021, "step": 15743 }, { "epoch": 7.348424737456242, "grad_norm": 0.0380859375, "learning_rate": 3.272425711540435e-05, "loss": 0.003, "step": 15744 }, { "epoch": 7.348891481913652, "grad_norm": 0.00762939453125, "learning_rate": 3.271340147093086e-05, "loss": 0.0001, "step": 15745 }, { "epoch": 7.349358226371062, "grad_norm": 0.06298828125, "learning_rate": 3.270254727521246e-05, "loss": 0.0003, "step": 15746 }, { "epoch": 7.349824970828472, "grad_norm": 0.01495361328125, "learning_rate": 3.269169452848288e-05, "loss": 0.0002, "step": 15747 }, { "epoch": 7.350291715285881, "grad_norm": 0.07177734375, "learning_rate": 3.268084323097582e-05, "loss": 0.0003, "step": 15748 }, { "epoch": 7.3507584597432905, "grad_norm": 0.0341796875, "learning_rate": 3.266999338292487e-05, "loss": 0.0018, "step": 15749 }, { "epoch": 7.3512252042007, "grad_norm": 0.051025390625, "learning_rate": 3.2659144984563695e-05, "loss": 0.0027, "step": 15750 }, { "epoch": 7.35169194865811, "grad_norm": 0.03759765625, "learning_rate": 3.2648298036125825e-05, "loss": 0.0002, "step": 15751 }, { "epoch": 7.352158693115519, "grad_norm": 0.0081787109375, "learning_rate": 3.2637452537844816e-05, "loss": 0.0002, "step": 15752 }, { "epoch": 7.352625437572929, "grad_norm": 0.2392578125, "learning_rate": 3.262660848995423e-05, "loss": 0.0007, "step": 15753 }, { "epoch": 7.3530921820303385, "grad_norm": 0.1083984375, "learning_rate": 3.261576589268748e-05, "loss": 0.0059, "step": 15754 }, { "epoch": 7.353558926487748, "grad_norm": 0.048828125, "learning_rate": 3.2604924746278055e-05, "loss": 0.003, "step": 15755 }, { "epoch": 7.354025670945157, "grad_norm": 0.0081787109375, "learning_rate": 3.259408505095941e-05, "loss": 0.0001, "step": 15756 }, { "epoch": 7.354492415402567, "grad_norm": 0.0130615234375, "learning_rate": 3.2583246806964896e-05, "loss": 0.0002, "step": 15757 }, { "epoch": 7.354959159859977, "grad_norm": 0.0322265625, "learning_rate": 3.2572410014527856e-05, "loss": 0.0002, "step": 15758 }, { "epoch": 7.355425904317387, "grad_norm": 0.02197265625, "learning_rate": 3.256157467388163e-05, "loss": 0.0002, "step": 15759 }, { "epoch": 7.3558926487747955, "grad_norm": 0.037353515625, "learning_rate": 3.255074078525957e-05, "loss": 0.0035, "step": 15760 }, { "epoch": 7.356359393232205, "grad_norm": 0.0322265625, "learning_rate": 3.2539908348894855e-05, "loss": 0.0002, "step": 15761 }, { "epoch": 7.356826137689615, "grad_norm": 0.006866455078125, "learning_rate": 3.25290773650208e-05, "loss": 0.0001, "step": 15762 }, { "epoch": 7.357292882147025, "grad_norm": 0.005645751953125, "learning_rate": 3.251824783387053e-05, "loss": 0.0001, "step": 15763 }, { "epoch": 7.357759626604434, "grad_norm": 0.0101318359375, "learning_rate": 3.250741975567726e-05, "loss": 0.0002, "step": 15764 }, { "epoch": 7.358226371061844, "grad_norm": 0.0634765625, "learning_rate": 3.249659313067417e-05, "loss": 0.0025, "step": 15765 }, { "epoch": 7.358693115519253, "grad_norm": 0.007598876953125, "learning_rate": 3.2485767959094284e-05, "loss": 0.0001, "step": 15766 }, { "epoch": 7.359159859976662, "grad_norm": 0.01470947265625, "learning_rate": 3.2474944241170714e-05, "loss": 0.0002, "step": 15767 }, { "epoch": 7.359626604434072, "grad_norm": 0.0091552734375, "learning_rate": 3.246412197713656e-05, "loss": 0.0002, "step": 15768 }, { "epoch": 7.360093348891482, "grad_norm": 0.007659912109375, "learning_rate": 3.245330116722476e-05, "loss": 0.0002, "step": 15769 }, { "epoch": 7.360560093348892, "grad_norm": 0.00927734375, "learning_rate": 3.244248181166833e-05, "loss": 0.0002, "step": 15770 }, { "epoch": 7.3610268378063015, "grad_norm": 0.0128173828125, "learning_rate": 3.243166391070024e-05, "loss": 0.0002, "step": 15771 }, { "epoch": 7.36149358226371, "grad_norm": 0.009765625, "learning_rate": 3.242084746455337e-05, "loss": 0.0001, "step": 15772 }, { "epoch": 7.36196032672112, "grad_norm": 0.05078125, "learning_rate": 3.241003247346064e-05, "loss": 0.0003, "step": 15773 }, { "epoch": 7.36242707117853, "grad_norm": 0.006805419921875, "learning_rate": 3.239921893765494e-05, "loss": 0.0002, "step": 15774 }, { "epoch": 7.362893815635939, "grad_norm": 0.09228515625, "learning_rate": 3.238840685736901e-05, "loss": 0.0003, "step": 15775 }, { "epoch": 7.363360560093349, "grad_norm": 0.007049560546875, "learning_rate": 3.237759623283572e-05, "loss": 0.0001, "step": 15776 }, { "epoch": 7.3638273045507585, "grad_norm": 0.1513671875, "learning_rate": 3.236678706428783e-05, "loss": 0.0029, "step": 15777 }, { "epoch": 7.364294049008168, "grad_norm": 0.01318359375, "learning_rate": 3.235597935195802e-05, "loss": 0.0002, "step": 15778 }, { "epoch": 7.364760793465577, "grad_norm": 0.048828125, "learning_rate": 3.234517309607903e-05, "loss": 0.0026, "step": 15779 }, { "epoch": 7.365227537922987, "grad_norm": 0.04296875, "learning_rate": 3.233436829688356e-05, "loss": 0.0017, "step": 15780 }, { "epoch": 7.365694282380397, "grad_norm": 0.08251953125, "learning_rate": 3.232356495460419e-05, "loss": 0.0025, "step": 15781 }, { "epoch": 7.366161026837807, "grad_norm": 0.04296875, "learning_rate": 3.231276306947359e-05, "loss": 0.0018, "step": 15782 }, { "epoch": 7.3666277712952155, "grad_norm": 0.05712890625, "learning_rate": 3.230196264172426e-05, "loss": 0.0021, "step": 15783 }, { "epoch": 7.367094515752625, "grad_norm": 0.00927734375, "learning_rate": 3.229116367158879e-05, "loss": 0.0001, "step": 15784 }, { "epoch": 7.367561260210035, "grad_norm": 0.0113525390625, "learning_rate": 3.2280366159299715e-05, "loss": 0.0002, "step": 15785 }, { "epoch": 7.368028004667445, "grad_norm": 0.01123046875, "learning_rate": 3.2269570105089465e-05, "loss": 0.0002, "step": 15786 }, { "epoch": 7.368494749124854, "grad_norm": 0.01055908203125, "learning_rate": 3.225877550919052e-05, "loss": 0.0002, "step": 15787 }, { "epoch": 7.368961493582264, "grad_norm": 0.07080078125, "learning_rate": 3.2247982371835325e-05, "loss": 0.0043, "step": 15788 }, { "epoch": 7.369428238039673, "grad_norm": 0.01336669921875, "learning_rate": 3.2237190693256226e-05, "loss": 0.0002, "step": 15789 }, { "epoch": 7.369894982497083, "grad_norm": 0.0242919921875, "learning_rate": 3.222640047368558e-05, "loss": 0.0002, "step": 15790 }, { "epoch": 7.370361726954492, "grad_norm": 0.037109375, "learning_rate": 3.2215611713355767e-05, "loss": 0.0002, "step": 15791 }, { "epoch": 7.370828471411902, "grad_norm": 0.0301513671875, "learning_rate": 3.2204824412499014e-05, "loss": 0.0003, "step": 15792 }, { "epoch": 7.371295215869312, "grad_norm": 0.008544921875, "learning_rate": 3.2194038571347604e-05, "loss": 0.0001, "step": 15793 }, { "epoch": 7.3717619603267215, "grad_norm": 0.0196533203125, "learning_rate": 3.2183254190133806e-05, "loss": 0.0002, "step": 15794 }, { "epoch": 7.37222870478413, "grad_norm": 0.01458740234375, "learning_rate": 3.217247126908977e-05, "loss": 0.0002, "step": 15795 }, { "epoch": 7.37269544924154, "grad_norm": 0.050537109375, "learning_rate": 3.216168980844767e-05, "loss": 0.002, "step": 15796 }, { "epoch": 7.37316219369895, "grad_norm": 0.0059814453125, "learning_rate": 3.215090980843969e-05, "loss": 0.0001, "step": 15797 }, { "epoch": 7.37362893815636, "grad_norm": 0.0400390625, "learning_rate": 3.214013126929787e-05, "loss": 0.002, "step": 15798 }, { "epoch": 7.374095682613769, "grad_norm": 0.007110595703125, "learning_rate": 3.2129354191254346e-05, "loss": 0.0001, "step": 15799 }, { "epoch": 7.3745624270711785, "grad_norm": 0.00946044921875, "learning_rate": 3.21185785745411e-05, "loss": 0.0001, "step": 15800 }, { "epoch": 7.375029171528588, "grad_norm": 0.1689453125, "learning_rate": 3.210780441939017e-05, "loss": 0.0023, "step": 15801 }, { "epoch": 7.375495915985998, "grad_norm": 0.0299072265625, "learning_rate": 3.2097031726033585e-05, "loss": 0.0002, "step": 15802 }, { "epoch": 7.375962660443407, "grad_norm": 0.078125, "learning_rate": 3.2086260494703205e-05, "loss": 0.0026, "step": 15803 }, { "epoch": 7.376429404900817, "grad_norm": 0.01446533203125, "learning_rate": 3.207549072563099e-05, "loss": 0.0002, "step": 15804 }, { "epoch": 7.376896149358227, "grad_norm": 0.01416015625, "learning_rate": 3.2064722419048854e-05, "loss": 0.0002, "step": 15805 }, { "epoch": 7.377362893815636, "grad_norm": 0.0196533203125, "learning_rate": 3.2053955575188624e-05, "loss": 0.0002, "step": 15806 }, { "epoch": 7.377829638273045, "grad_norm": 0.052978515625, "learning_rate": 3.204319019428208e-05, "loss": 0.0021, "step": 15807 }, { "epoch": 7.378296382730455, "grad_norm": 0.006927490234375, "learning_rate": 3.203242627656105e-05, "loss": 0.0001, "step": 15808 }, { "epoch": 7.378763127187865, "grad_norm": 0.007293701171875, "learning_rate": 3.202166382225732e-05, "loss": 0.0002, "step": 15809 }, { "epoch": 7.379229871645274, "grad_norm": 0.0302734375, "learning_rate": 3.2010902831602576e-05, "loss": 0.0021, "step": 15810 }, { "epoch": 7.379696616102684, "grad_norm": 0.0203857421875, "learning_rate": 3.2000143304828546e-05, "loss": 0.0002, "step": 15811 }, { "epoch": 7.380163360560093, "grad_norm": 0.006622314453125, "learning_rate": 3.198938524216685e-05, "loss": 0.0001, "step": 15812 }, { "epoch": 7.380630105017503, "grad_norm": 0.01153564453125, "learning_rate": 3.197862864384915e-05, "loss": 0.0002, "step": 15813 }, { "epoch": 7.381096849474912, "grad_norm": 0.004913330078125, "learning_rate": 3.196787351010708e-05, "loss": 0.0001, "step": 15814 }, { "epoch": 7.381563593932322, "grad_norm": 0.0299072265625, "learning_rate": 3.195711984117215e-05, "loss": 0.0022, "step": 15815 }, { "epoch": 7.382030338389732, "grad_norm": 0.01263427734375, "learning_rate": 3.194636763727591e-05, "loss": 0.0002, "step": 15816 }, { "epoch": 7.3824970828471415, "grad_norm": 0.0064697265625, "learning_rate": 3.1935616898649926e-05, "loss": 0.0001, "step": 15817 }, { "epoch": 7.38296382730455, "grad_norm": 0.05224609375, "learning_rate": 3.192486762552559e-05, "loss": 0.0023, "step": 15818 }, { "epoch": 7.38343057176196, "grad_norm": 0.02685546875, "learning_rate": 3.191411981813438e-05, "loss": 0.0002, "step": 15819 }, { "epoch": 7.38389731621937, "grad_norm": 0.032470703125, "learning_rate": 3.1903373476707754e-05, "loss": 0.0019, "step": 15820 }, { "epoch": 7.38436406067678, "grad_norm": 0.00848388671875, "learning_rate": 3.1892628601477015e-05, "loss": 0.0002, "step": 15821 }, { "epoch": 7.384830805134189, "grad_norm": 0.0184326171875, "learning_rate": 3.188188519267356e-05, "loss": 0.0003, "step": 15822 }, { "epoch": 7.3852975495915985, "grad_norm": 0.01031494140625, "learning_rate": 3.1871143250528715e-05, "loss": 0.0002, "step": 15823 }, { "epoch": 7.385764294049008, "grad_norm": 0.0084228515625, "learning_rate": 3.1860402775273715e-05, "loss": 0.0001, "step": 15824 }, { "epoch": 7.386231038506418, "grad_norm": 0.007659912109375, "learning_rate": 3.184966376713985e-05, "loss": 0.0002, "step": 15825 }, { "epoch": 7.386697782963827, "grad_norm": 0.0130615234375, "learning_rate": 3.1838926226358356e-05, "loss": 0.0002, "step": 15826 }, { "epoch": 7.387164527421237, "grad_norm": 0.00921630859375, "learning_rate": 3.182819015316039e-05, "loss": 0.0002, "step": 15827 }, { "epoch": 7.3876312718786465, "grad_norm": 0.06005859375, "learning_rate": 3.181745554777712e-05, "loss": 0.0051, "step": 15828 }, { "epoch": 7.388098016336056, "grad_norm": 0.01434326171875, "learning_rate": 3.180672241043972e-05, "loss": 0.0002, "step": 15829 }, { "epoch": 7.388564760793465, "grad_norm": 0.01153564453125, "learning_rate": 3.1795990741379225e-05, "loss": 0.0002, "step": 15830 }, { "epoch": 7.389031505250875, "grad_norm": 0.047119140625, "learning_rate": 3.1785260540826735e-05, "loss": 0.0003, "step": 15831 }, { "epoch": 7.389498249708285, "grad_norm": 0.0279541015625, "learning_rate": 3.1774531809013254e-05, "loss": 0.0002, "step": 15832 }, { "epoch": 7.389964994165695, "grad_norm": 0.06591796875, "learning_rate": 3.1763804546169785e-05, "loss": 0.0003, "step": 15833 }, { "epoch": 7.3904317386231035, "grad_norm": 0.017578125, "learning_rate": 3.1753078752527366e-05, "loss": 0.0002, "step": 15834 }, { "epoch": 7.390898483080513, "grad_norm": 0.023681640625, "learning_rate": 3.174235442831684e-05, "loss": 0.0002, "step": 15835 }, { "epoch": 7.391365227537923, "grad_norm": 0.01141357421875, "learning_rate": 3.173163157376915e-05, "loss": 0.0002, "step": 15836 }, { "epoch": 7.391831971995333, "grad_norm": 0.06640625, "learning_rate": 3.1720910189115214e-05, "loss": 0.0017, "step": 15837 }, { "epoch": 7.392298716452742, "grad_norm": 0.029052734375, "learning_rate": 3.171019027458581e-05, "loss": 0.0018, "step": 15838 }, { "epoch": 7.392765460910152, "grad_norm": 0.0101318359375, "learning_rate": 3.169947183041177e-05, "loss": 0.0002, "step": 15839 }, { "epoch": 7.393232205367561, "grad_norm": 0.01190185546875, "learning_rate": 3.168875485682393e-05, "loss": 0.0002, "step": 15840 }, { "epoch": 7.393698949824971, "grad_norm": 0.0458984375, "learning_rate": 3.1678039354052946e-05, "loss": 0.0016, "step": 15841 }, { "epoch": 7.39416569428238, "grad_norm": 0.0120849609375, "learning_rate": 3.1667325322329575e-05, "loss": 0.0002, "step": 15842 }, { "epoch": 7.39463243873979, "grad_norm": 0.01190185546875, "learning_rate": 3.165661276188455e-05, "loss": 0.0002, "step": 15843 }, { "epoch": 7.3950991831972, "grad_norm": 0.0107421875, "learning_rate": 3.164590167294844e-05, "loss": 0.0002, "step": 15844 }, { "epoch": 7.3955659276546095, "grad_norm": 0.00885009765625, "learning_rate": 3.1635192055751915e-05, "loss": 0.0001, "step": 15845 }, { "epoch": 7.396032672112018, "grad_norm": 0.0184326171875, "learning_rate": 3.162448391052559e-05, "loss": 0.0002, "step": 15846 }, { "epoch": 7.396499416569428, "grad_norm": 0.00994873046875, "learning_rate": 3.161377723749995e-05, "loss": 0.0001, "step": 15847 }, { "epoch": 7.396966161026838, "grad_norm": 0.08984375, "learning_rate": 3.160307203690557e-05, "loss": 0.0003, "step": 15848 }, { "epoch": 7.397432905484248, "grad_norm": 0.025390625, "learning_rate": 3.1592368308972965e-05, "loss": 0.0016, "step": 15849 }, { "epoch": 7.397899649941657, "grad_norm": 0.009765625, "learning_rate": 3.1581666053932536e-05, "loss": 0.0001, "step": 15850 }, { "epoch": 7.3983663943990665, "grad_norm": 0.05126953125, "learning_rate": 3.157096527201478e-05, "loss": 0.0026, "step": 15851 }, { "epoch": 7.398833138856476, "grad_norm": 0.0185546875, "learning_rate": 3.156026596345003e-05, "loss": 0.0002, "step": 15852 }, { "epoch": 7.399299883313885, "grad_norm": 0.04638671875, "learning_rate": 3.1549568128468734e-05, "loss": 0.002, "step": 15853 }, { "epoch": 7.399766627771295, "grad_norm": 0.04248046875, "learning_rate": 3.153887176730114e-05, "loss": 0.0032, "step": 15854 }, { "epoch": 7.400233372228705, "grad_norm": 0.06396484375, "learning_rate": 3.1528176880177626e-05, "loss": 0.0003, "step": 15855 }, { "epoch": 7.400700116686115, "grad_norm": 0.03955078125, "learning_rate": 3.151748346732841e-05, "loss": 0.0011, "step": 15856 }, { "epoch": 7.4011668611435235, "grad_norm": 0.0274658203125, "learning_rate": 3.150679152898376e-05, "loss": 0.0002, "step": 15857 }, { "epoch": 7.401633605600933, "grad_norm": 0.01141357421875, "learning_rate": 3.149610106537391e-05, "loss": 0.0001, "step": 15858 }, { "epoch": 7.402100350058343, "grad_norm": 0.0078125, "learning_rate": 3.148541207672897e-05, "loss": 0.0001, "step": 15859 }, { "epoch": 7.402567094515753, "grad_norm": 0.0306396484375, "learning_rate": 3.147472456327914e-05, "loss": 0.0022, "step": 15860 }, { "epoch": 7.403033838973162, "grad_norm": 0.00885009765625, "learning_rate": 3.146403852525455e-05, "loss": 0.0001, "step": 15861 }, { "epoch": 7.403500583430572, "grad_norm": 0.042236328125, "learning_rate": 3.145335396288521e-05, "loss": 0.0017, "step": 15862 }, { "epoch": 7.403967327887981, "grad_norm": 0.016845703125, "learning_rate": 3.144267087640126e-05, "loss": 0.0002, "step": 15863 }, { "epoch": 7.404434072345391, "grad_norm": 0.021484375, "learning_rate": 3.143198926603262e-05, "loss": 0.0002, "step": 15864 }, { "epoch": 7.4049008168028, "grad_norm": 0.01336669921875, "learning_rate": 3.142130913200935e-05, "loss": 0.0001, "step": 15865 }, { "epoch": 7.40536756126021, "grad_norm": 0.0284423828125, "learning_rate": 3.1410630474561406e-05, "loss": 0.0002, "step": 15866 }, { "epoch": 7.40583430571762, "grad_norm": 0.0084228515625, "learning_rate": 3.139995329391866e-05, "loss": 0.0002, "step": 15867 }, { "epoch": 7.4063010501750295, "grad_norm": 0.06103515625, "learning_rate": 3.138927759031103e-05, "loss": 0.0003, "step": 15868 }, { "epoch": 7.406767794632438, "grad_norm": 0.028564453125, "learning_rate": 3.1378603363968404e-05, "loss": 0.0008, "step": 15869 }, { "epoch": 7.407234539089848, "grad_norm": 0.04296875, "learning_rate": 3.1367930615120555e-05, "loss": 0.0003, "step": 15870 }, { "epoch": 7.407701283547258, "grad_norm": 0.006988525390625, "learning_rate": 3.135725934399731e-05, "loss": 0.0001, "step": 15871 }, { "epoch": 7.408168028004668, "grad_norm": 0.08984375, "learning_rate": 3.134658955082848e-05, "loss": 0.006, "step": 15872 }, { "epoch": 7.408634772462077, "grad_norm": 0.01409912109375, "learning_rate": 3.1335921235843703e-05, "loss": 0.0001, "step": 15873 }, { "epoch": 7.4091015169194865, "grad_norm": 0.703125, "learning_rate": 3.132525439927272e-05, "loss": 0.0016, "step": 15874 }, { "epoch": 7.409568261376896, "grad_norm": 0.0400390625, "learning_rate": 3.131458904134525e-05, "loss": 0.0024, "step": 15875 }, { "epoch": 7.410035005834306, "grad_norm": 0.042236328125, "learning_rate": 3.1303925162290834e-05, "loss": 0.0006, "step": 15876 }, { "epoch": 7.410501750291715, "grad_norm": 0.00732421875, "learning_rate": 3.129326276233915e-05, "loss": 0.0001, "step": 15877 }, { "epoch": 7.410968494749125, "grad_norm": 0.0341796875, "learning_rate": 3.1282601841719774e-05, "loss": 0.002, "step": 15878 }, { "epoch": 7.411435239206535, "grad_norm": 0.0128173828125, "learning_rate": 3.127194240066219e-05, "loss": 0.0002, "step": 15879 }, { "epoch": 7.411901983663944, "grad_norm": 0.01287841796875, "learning_rate": 3.126128443939598e-05, "loss": 0.0002, "step": 15880 }, { "epoch": 7.412368728121353, "grad_norm": 0.0091552734375, "learning_rate": 3.125062795815055e-05, "loss": 0.0001, "step": 15881 }, { "epoch": 7.412835472578763, "grad_norm": 0.05224609375, "learning_rate": 3.123997295715537e-05, "loss": 0.0002, "step": 15882 }, { "epoch": 7.413302217036173, "grad_norm": 0.039794921875, "learning_rate": 3.12293194366399e-05, "loss": 0.0027, "step": 15883 }, { "epoch": 7.413768961493583, "grad_norm": 0.00762939453125, "learning_rate": 3.1218667396833455e-05, "loss": 0.0002, "step": 15884 }, { "epoch": 7.414235705950992, "grad_norm": 0.006683349609375, "learning_rate": 3.120801683796541e-05, "loss": 0.0001, "step": 15885 }, { "epoch": 7.414702450408401, "grad_norm": 0.00946044921875, "learning_rate": 3.1197367760265125e-05, "loss": 0.0001, "step": 15886 }, { "epoch": 7.415169194865811, "grad_norm": 0.0230712890625, "learning_rate": 3.118672016396181e-05, "loss": 0.0002, "step": 15887 }, { "epoch": 7.41563593932322, "grad_norm": 0.0341796875, "learning_rate": 3.117607404928478e-05, "loss": 0.0002, "step": 15888 }, { "epoch": 7.41610268378063, "grad_norm": 0.11767578125, "learning_rate": 3.116542941646324e-05, "loss": 0.0059, "step": 15889 }, { "epoch": 7.41656942823804, "grad_norm": 0.025146484375, "learning_rate": 3.115478626572637e-05, "loss": 0.0002, "step": 15890 }, { "epoch": 7.4170361726954495, "grad_norm": 0.0137939453125, "learning_rate": 3.114414459730333e-05, "loss": 0.0002, "step": 15891 }, { "epoch": 7.417502917152859, "grad_norm": 0.00872802734375, "learning_rate": 3.1133504411423276e-05, "loss": 0.0002, "step": 15892 }, { "epoch": 7.417969661610268, "grad_norm": 0.0194091796875, "learning_rate": 3.112286570831526e-05, "loss": 0.0002, "step": 15893 }, { "epoch": 7.418436406067678, "grad_norm": 0.08056640625, "learning_rate": 3.1112228488208364e-05, "loss": 0.0003, "step": 15894 }, { "epoch": 7.418903150525088, "grad_norm": 0.0107421875, "learning_rate": 3.110159275133165e-05, "loss": 0.0002, "step": 15895 }, { "epoch": 7.419369894982497, "grad_norm": 0.0240478515625, "learning_rate": 3.109095849791406e-05, "loss": 0.0002, "step": 15896 }, { "epoch": 7.4198366394399065, "grad_norm": 0.0167236328125, "learning_rate": 3.108032572818458e-05, "loss": 0.0002, "step": 15897 }, { "epoch": 7.420303383897316, "grad_norm": 0.03369140625, "learning_rate": 3.10696944423722e-05, "loss": 0.0002, "step": 15898 }, { "epoch": 7.420770128354726, "grad_norm": 0.018310546875, "learning_rate": 3.105906464070574e-05, "loss": 0.0002, "step": 15899 }, { "epoch": 7.421236872812135, "grad_norm": 0.03125, "learning_rate": 3.1048436323414145e-05, "loss": 0.0028, "step": 15900 }, { "epoch": 7.421703617269545, "grad_norm": 0.0128173828125, "learning_rate": 3.103780949072621e-05, "loss": 0.0002, "step": 15901 }, { "epoch": 7.4221703617269545, "grad_norm": 0.0196533203125, "learning_rate": 3.1027184142870724e-05, "loss": 0.0002, "step": 15902 }, { "epoch": 7.422637106184364, "grad_norm": 0.0068359375, "learning_rate": 3.101656028007649e-05, "loss": 0.0001, "step": 15903 }, { "epoch": 7.423103850641773, "grad_norm": 0.02294921875, "learning_rate": 3.100593790257228e-05, "loss": 0.0002, "step": 15904 }, { "epoch": 7.423570595099183, "grad_norm": 0.01336669921875, "learning_rate": 3.099531701058676e-05, "loss": 0.0002, "step": 15905 }, { "epoch": 7.424037339556593, "grad_norm": 0.0103759765625, "learning_rate": 3.098469760434861e-05, "loss": 0.0002, "step": 15906 }, { "epoch": 7.424504084014003, "grad_norm": 0.026611328125, "learning_rate": 3.097407968408653e-05, "loss": 0.0002, "step": 15907 }, { "epoch": 7.4249708284714115, "grad_norm": 0.033203125, "learning_rate": 3.096346325002908e-05, "loss": 0.0002, "step": 15908 }, { "epoch": 7.425437572928821, "grad_norm": 0.04248046875, "learning_rate": 3.095284830240485e-05, "loss": 0.0025, "step": 15909 }, { "epoch": 7.425904317386231, "grad_norm": 0.01458740234375, "learning_rate": 3.094223484144244e-05, "loss": 0.0002, "step": 15910 }, { "epoch": 7.426371061843641, "grad_norm": 0.0206298828125, "learning_rate": 3.0931622867370304e-05, "loss": 0.0002, "step": 15911 }, { "epoch": 7.42683780630105, "grad_norm": 0.00872802734375, "learning_rate": 3.0921012380416995e-05, "loss": 0.0002, "step": 15912 }, { "epoch": 7.42730455075846, "grad_norm": 0.046875, "learning_rate": 3.0910403380810905e-05, "loss": 0.0025, "step": 15913 }, { "epoch": 7.427771295215869, "grad_norm": 0.029052734375, "learning_rate": 3.0899795868780476e-05, "loss": 0.0002, "step": 15914 }, { "epoch": 7.428238039673279, "grad_norm": 0.01202392578125, "learning_rate": 3.088918984455415e-05, "loss": 0.0002, "step": 15915 }, { "epoch": 7.428704784130688, "grad_norm": 0.00958251953125, "learning_rate": 3.087858530836021e-05, "loss": 0.0001, "step": 15916 }, { "epoch": 7.429171528588098, "grad_norm": 0.0172119140625, "learning_rate": 3.086798226042702e-05, "loss": 0.0002, "step": 15917 }, { "epoch": 7.429638273045508, "grad_norm": 0.030517578125, "learning_rate": 3.085738070098291e-05, "loss": 0.0002, "step": 15918 }, { "epoch": 7.4301050175029175, "grad_norm": 0.01318359375, "learning_rate": 3.084678063025607e-05, "loss": 0.0002, "step": 15919 }, { "epoch": 7.430571761960326, "grad_norm": 0.01171875, "learning_rate": 3.083618204847478e-05, "loss": 0.0002, "step": 15920 }, { "epoch": 7.431038506417736, "grad_norm": 0.02783203125, "learning_rate": 3.082558495586725e-05, "loss": 0.0003, "step": 15921 }, { "epoch": 7.431505250875146, "grad_norm": 0.00836181640625, "learning_rate": 3.0814989352661604e-05, "loss": 0.0001, "step": 15922 }, { "epoch": 7.431971995332556, "grad_norm": 0.01446533203125, "learning_rate": 3.080439523908599e-05, "loss": 0.0002, "step": 15923 }, { "epoch": 7.432438739789965, "grad_norm": 0.01043701171875, "learning_rate": 3.0793802615368564e-05, "loss": 0.0002, "step": 15924 }, { "epoch": 7.4329054842473745, "grad_norm": 0.0184326171875, "learning_rate": 3.0783211481737314e-05, "loss": 0.0001, "step": 15925 }, { "epoch": 7.433372228704784, "grad_norm": 0.01007080078125, "learning_rate": 3.0772621838420325e-05, "loss": 0.0002, "step": 15926 }, { "epoch": 7.433838973162194, "grad_norm": 0.09765625, "learning_rate": 3.076203368564562e-05, "loss": 0.0026, "step": 15927 }, { "epoch": 7.434305717619603, "grad_norm": 0.01055908203125, "learning_rate": 3.0751447023641124e-05, "loss": 0.0002, "step": 15928 }, { "epoch": 7.434772462077013, "grad_norm": 0.01007080078125, "learning_rate": 3.0740861852634826e-05, "loss": 0.0002, "step": 15929 }, { "epoch": 7.435239206534423, "grad_norm": 0.0155029296875, "learning_rate": 3.0730278172854596e-05, "loss": 0.0002, "step": 15930 }, { "epoch": 7.4357059509918315, "grad_norm": 0.0174560546875, "learning_rate": 3.071969598452834e-05, "loss": 0.0002, "step": 15931 }, { "epoch": 7.436172695449241, "grad_norm": 0.0234375, "learning_rate": 3.0709115287883915e-05, "loss": 0.002, "step": 15932 }, { "epoch": 7.436639439906651, "grad_norm": 0.06298828125, "learning_rate": 3.06985360831491e-05, "loss": 0.0003, "step": 15933 }, { "epoch": 7.437106184364061, "grad_norm": 0.0634765625, "learning_rate": 3.0687958370551686e-05, "loss": 0.0069, "step": 15934 }, { "epoch": 7.437572928821471, "grad_norm": 0.017333984375, "learning_rate": 3.067738215031946e-05, "loss": 0.0002, "step": 15935 }, { "epoch": 7.43803967327888, "grad_norm": 0.0126953125, "learning_rate": 3.0666807422680074e-05, "loss": 0.0002, "step": 15936 }, { "epoch": 7.438506417736289, "grad_norm": 0.032958984375, "learning_rate": 3.065623418786127e-05, "loss": 0.002, "step": 15937 }, { "epoch": 7.438973162193699, "grad_norm": 0.185546875, "learning_rate": 3.0645662446090694e-05, "loss": 0.0007, "step": 15938 }, { "epoch": 7.439439906651108, "grad_norm": 0.01092529296875, "learning_rate": 3.063509219759594e-05, "loss": 0.0002, "step": 15939 }, { "epoch": 7.439906651108518, "grad_norm": 0.05078125, "learning_rate": 3.06245234426046e-05, "loss": 0.0002, "step": 15940 }, { "epoch": 7.440373395565928, "grad_norm": 0.00958251953125, "learning_rate": 3.061395618134428e-05, "loss": 0.0002, "step": 15941 }, { "epoch": 7.4408401400233375, "grad_norm": 0.0140380859375, "learning_rate": 3.060339041404244e-05, "loss": 0.0002, "step": 15942 }, { "epoch": 7.441306884480746, "grad_norm": 0.0159912109375, "learning_rate": 3.05928261409266e-05, "loss": 0.0002, "step": 15943 }, { "epoch": 7.441773628938156, "grad_norm": 0.036376953125, "learning_rate": 3.0582263362224254e-05, "loss": 0.0003, "step": 15944 }, { "epoch": 7.442240373395566, "grad_norm": 0.0172119140625, "learning_rate": 3.057170207816277e-05, "loss": 0.0002, "step": 15945 }, { "epoch": 7.442707117852976, "grad_norm": 0.011962890625, "learning_rate": 3.0561142288969555e-05, "loss": 0.0002, "step": 15946 }, { "epoch": 7.443173862310385, "grad_norm": 0.04833984375, "learning_rate": 3.055058399487204e-05, "loss": 0.0026, "step": 15947 }, { "epoch": 7.4436406067677945, "grad_norm": 0.01422119140625, "learning_rate": 3.054002719609747e-05, "loss": 0.0001, "step": 15948 }, { "epoch": 7.444107351225204, "grad_norm": 0.03271484375, "learning_rate": 3.0529471892873205e-05, "loss": 0.0002, "step": 15949 }, { "epoch": 7.444574095682614, "grad_norm": 0.0186767578125, "learning_rate": 3.0518918085426504e-05, "loss": 0.0002, "step": 15950 }, { "epoch": 7.445040840140023, "grad_norm": 0.01214599609375, "learning_rate": 3.050836577398455e-05, "loss": 0.0002, "step": 15951 }, { "epoch": 7.445507584597433, "grad_norm": 0.0107421875, "learning_rate": 3.0497814958774584e-05, "loss": 0.0002, "step": 15952 }, { "epoch": 7.445974329054843, "grad_norm": 0.00933837890625, "learning_rate": 3.04872656400238e-05, "loss": 0.0002, "step": 15953 }, { "epoch": 7.446441073512252, "grad_norm": 0.006103515625, "learning_rate": 3.047671781795929e-05, "loss": 0.0002, "step": 15954 }, { "epoch": 7.446907817969661, "grad_norm": 0.015869140625, "learning_rate": 3.046617149280817e-05, "loss": 0.0002, "step": 15955 }, { "epoch": 7.447374562427071, "grad_norm": 0.0130615234375, "learning_rate": 3.0455626664797578e-05, "loss": 0.0001, "step": 15956 }, { "epoch": 7.447841306884481, "grad_norm": 0.0162353515625, "learning_rate": 3.0445083334154455e-05, "loss": 0.0002, "step": 15957 }, { "epoch": 7.448308051341891, "grad_norm": 0.0096435546875, "learning_rate": 3.0434541501105874e-05, "loss": 0.0001, "step": 15958 }, { "epoch": 7.4487747957993, "grad_norm": 0.0380859375, "learning_rate": 3.0424001165878824e-05, "loss": 0.0015, "step": 15959 }, { "epoch": 7.449241540256709, "grad_norm": 0.01318359375, "learning_rate": 3.0413462328700194e-05, "loss": 0.0002, "step": 15960 }, { "epoch": 7.449708284714119, "grad_norm": 0.0164794921875, "learning_rate": 3.0402924989796954e-05, "loss": 0.0002, "step": 15961 }, { "epoch": 7.450175029171529, "grad_norm": 0.0169677734375, "learning_rate": 3.0392389149395928e-05, "loss": 0.0002, "step": 15962 }, { "epoch": 7.450641773628938, "grad_norm": 0.0830078125, "learning_rate": 3.0381854807723997e-05, "loss": 0.0048, "step": 15963 }, { "epoch": 7.451108518086348, "grad_norm": 0.01434326171875, "learning_rate": 3.0371321965008014e-05, "loss": 0.0002, "step": 15964 }, { "epoch": 7.4515752625437575, "grad_norm": 0.05126953125, "learning_rate": 3.036079062147468e-05, "loss": 0.0033, "step": 15965 }, { "epoch": 7.452042007001167, "grad_norm": 0.005828857421875, "learning_rate": 3.0350260777350793e-05, "loss": 0.0001, "step": 15966 }, { "epoch": 7.452508751458576, "grad_norm": 0.0244140625, "learning_rate": 3.03397324328631e-05, "loss": 0.0002, "step": 15967 }, { "epoch": 7.452975495915986, "grad_norm": 0.011474609375, "learning_rate": 3.032920558823823e-05, "loss": 0.0002, "step": 15968 }, { "epoch": 7.453442240373396, "grad_norm": 0.0106201171875, "learning_rate": 3.0318680243702868e-05, "loss": 0.0002, "step": 15969 }, { "epoch": 7.4539089848308056, "grad_norm": 0.009521484375, "learning_rate": 3.0308156399483657e-05, "loss": 0.0002, "step": 15970 }, { "epoch": 7.4543757292882145, "grad_norm": 0.005767822265625, "learning_rate": 3.0297634055807134e-05, "loss": 0.0001, "step": 15971 }, { "epoch": 7.454842473745624, "grad_norm": 0.038818359375, "learning_rate": 3.0287113212899887e-05, "loss": 0.0002, "step": 15972 }, { "epoch": 7.455309218203034, "grad_norm": 0.025390625, "learning_rate": 3.0276593870988467e-05, "loss": 0.0025, "step": 15973 }, { "epoch": 7.455775962660443, "grad_norm": 0.02001953125, "learning_rate": 3.0266076030299316e-05, "loss": 0.0002, "step": 15974 }, { "epoch": 7.456242707117853, "grad_norm": 0.043212890625, "learning_rate": 3.025555969105891e-05, "loss": 0.0002, "step": 15975 }, { "epoch": 7.4567094515752625, "grad_norm": 0.1337890625, "learning_rate": 3.024504485349373e-05, "loss": 0.0003, "step": 15976 }, { "epoch": 7.457176196032672, "grad_norm": 0.006988525390625, "learning_rate": 3.023453151783009e-05, "loss": 0.0002, "step": 15977 }, { "epoch": 7.457642940490082, "grad_norm": 0.037353515625, "learning_rate": 3.0224019684294424e-05, "loss": 0.0002, "step": 15978 }, { "epoch": 7.458109684947491, "grad_norm": 0.0198974609375, "learning_rate": 3.0213509353113002e-05, "loss": 0.0002, "step": 15979 }, { "epoch": 7.458576429404901, "grad_norm": 0.0140380859375, "learning_rate": 3.020300052451216e-05, "loss": 0.0002, "step": 15980 }, { "epoch": 7.459043173862311, "grad_norm": 0.050537109375, "learning_rate": 3.0192493198718175e-05, "loss": 0.0002, "step": 15981 }, { "epoch": 7.4595099183197195, "grad_norm": 0.0194091796875, "learning_rate": 3.0181987375957245e-05, "loss": 0.0003, "step": 15982 }, { "epoch": 7.459976662777129, "grad_norm": 0.037353515625, "learning_rate": 3.0171483056455584e-05, "loss": 0.0003, "step": 15983 }, { "epoch": 7.460443407234539, "grad_norm": 0.08740234375, "learning_rate": 3.0160980240439406e-05, "loss": 0.0003, "step": 15984 }, { "epoch": 7.460910151691949, "grad_norm": 0.040771484375, "learning_rate": 3.015047892813477e-05, "loss": 0.0021, "step": 15985 }, { "epoch": 7.461376896149358, "grad_norm": 0.013671875, "learning_rate": 3.0139979119767835e-05, "loss": 0.0001, "step": 15986 }, { "epoch": 7.461843640606768, "grad_norm": 0.006103515625, "learning_rate": 3.0129480815564683e-05, "loss": 0.0001, "step": 15987 }, { "epoch": 7.462310385064177, "grad_norm": 0.01007080078125, "learning_rate": 3.0118984015751306e-05, "loss": 0.0001, "step": 15988 }, { "epoch": 7.462777129521587, "grad_norm": 0.01385498046875, "learning_rate": 3.010848872055373e-05, "loss": 0.0002, "step": 15989 }, { "epoch": 7.463243873978996, "grad_norm": 0.01177978515625, "learning_rate": 3.009799493019797e-05, "loss": 0.0002, "step": 15990 }, { "epoch": 7.463710618436406, "grad_norm": 0.01336669921875, "learning_rate": 3.0087502644909915e-05, "loss": 0.0002, "step": 15991 }, { "epoch": 7.464177362893816, "grad_norm": 0.051513671875, "learning_rate": 3.0077011864915483e-05, "loss": 0.0015, "step": 15992 }, { "epoch": 7.4646441073512255, "grad_norm": 0.043212890625, "learning_rate": 3.0066522590440604e-05, "loss": 0.0033, "step": 15993 }, { "epoch": 7.465110851808634, "grad_norm": 0.01239013671875, "learning_rate": 3.0056034821711065e-05, "loss": 0.0001, "step": 15994 }, { "epoch": 7.465577596266044, "grad_norm": 0.033935546875, "learning_rate": 3.0045548558952685e-05, "loss": 0.002, "step": 15995 }, { "epoch": 7.466044340723454, "grad_norm": 0.0057373046875, "learning_rate": 3.0035063802391338e-05, "loss": 0.0001, "step": 15996 }, { "epoch": 7.466511085180864, "grad_norm": 0.01611328125, "learning_rate": 3.0024580552252623e-05, "loss": 0.0002, "step": 15997 }, { "epoch": 7.466977829638273, "grad_norm": 0.04736328125, "learning_rate": 3.0014098808762324e-05, "loss": 0.0016, "step": 15998 }, { "epoch": 7.4674445740956825, "grad_norm": 0.009521484375, "learning_rate": 3.0003618572146163e-05, "loss": 0.0001, "step": 15999 }, { "epoch": 7.467911318553092, "grad_norm": 0.019287109375, "learning_rate": 2.9993139842629737e-05, "loss": 0.0002, "step": 16000 }, { "epoch": 7.468378063010502, "grad_norm": 0.051025390625, "learning_rate": 2.9982662620438674e-05, "loss": 0.002, "step": 16001 }, { "epoch": 7.468844807467911, "grad_norm": 0.04736328125, "learning_rate": 2.9972186905798595e-05, "loss": 0.0021, "step": 16002 }, { "epoch": 7.469311551925321, "grad_norm": 0.0172119140625, "learning_rate": 2.9961712698935007e-05, "loss": 0.0001, "step": 16003 }, { "epoch": 7.469778296382731, "grad_norm": 0.0419921875, "learning_rate": 2.9951240000073443e-05, "loss": 0.0002, "step": 16004 }, { "epoch": 7.47024504084014, "grad_norm": 0.06005859375, "learning_rate": 2.9940768809439445e-05, "loss": 0.0021, "step": 16005 }, { "epoch": 7.470711785297549, "grad_norm": 0.0458984375, "learning_rate": 2.99302991272584e-05, "loss": 0.0003, "step": 16006 }, { "epoch": 7.471178529754959, "grad_norm": 0.021484375, "learning_rate": 2.9919830953755744e-05, "loss": 0.0002, "step": 16007 }, { "epoch": 7.471645274212369, "grad_norm": 0.0272216796875, "learning_rate": 2.9909364289156916e-05, "loss": 0.0002, "step": 16008 }, { "epoch": 7.472112018669779, "grad_norm": 0.0206298828125, "learning_rate": 2.989889913368722e-05, "loss": 0.0002, "step": 16009 }, { "epoch": 7.472578763127188, "grad_norm": 0.046875, "learning_rate": 2.988843548757203e-05, "loss": 0.0002, "step": 16010 }, { "epoch": 7.473045507584597, "grad_norm": 0.01397705078125, "learning_rate": 2.987797335103658e-05, "loss": 0.0002, "step": 16011 }, { "epoch": 7.473512252042007, "grad_norm": 0.023681640625, "learning_rate": 2.986751272430617e-05, "loss": 0.0002, "step": 16012 }, { "epoch": 7.473978996499417, "grad_norm": 0.0157470703125, "learning_rate": 2.9857053607606055e-05, "loss": 0.0002, "step": 16013 }, { "epoch": 7.474445740956826, "grad_norm": 0.018798828125, "learning_rate": 2.9846596001161366e-05, "loss": 0.0002, "step": 16014 }, { "epoch": 7.474912485414236, "grad_norm": 0.0098876953125, "learning_rate": 2.98361399051973e-05, "loss": 0.0001, "step": 16015 }, { "epoch": 7.4753792298716455, "grad_norm": 0.09375, "learning_rate": 2.9825685319939035e-05, "loss": 0.0002, "step": 16016 }, { "epoch": 7.475845974329054, "grad_norm": 0.022705078125, "learning_rate": 2.981523224561158e-05, "loss": 0.0002, "step": 16017 }, { "epoch": 7.476312718786464, "grad_norm": 0.07861328125, "learning_rate": 2.9804780682440048e-05, "loss": 0.0003, "step": 16018 }, { "epoch": 7.476779463243874, "grad_norm": 0.029052734375, "learning_rate": 2.9794330630649503e-05, "loss": 0.0002, "step": 16019 }, { "epoch": 7.477246207701284, "grad_norm": 0.287109375, "learning_rate": 2.9783882090464886e-05, "loss": 0.0009, "step": 16020 }, { "epoch": 7.477712952158694, "grad_norm": 0.06689453125, "learning_rate": 2.97734350621112e-05, "loss": 0.0013, "step": 16021 }, { "epoch": 7.4781796966161025, "grad_norm": 0.02001953125, "learning_rate": 2.976298954581339e-05, "loss": 0.0002, "step": 16022 }, { "epoch": 7.478646441073512, "grad_norm": 0.0267333984375, "learning_rate": 2.9752545541796327e-05, "loss": 0.0001, "step": 16023 }, { "epoch": 7.479113185530922, "grad_norm": 0.05810546875, "learning_rate": 2.9742103050284886e-05, "loss": 0.0024, "step": 16024 }, { "epoch": 7.479579929988331, "grad_norm": 0.0157470703125, "learning_rate": 2.973166207150396e-05, "loss": 0.0001, "step": 16025 }, { "epoch": 7.480046674445741, "grad_norm": 0.0078125, "learning_rate": 2.9721222605678278e-05, "loss": 0.0001, "step": 16026 }, { "epoch": 7.480513418903151, "grad_norm": 0.0625, "learning_rate": 2.9710784653032654e-05, "loss": 0.0002, "step": 16027 }, { "epoch": 7.48098016336056, "grad_norm": 0.0205078125, "learning_rate": 2.970034821379185e-05, "loss": 0.0002, "step": 16028 }, { "epoch": 7.481446907817969, "grad_norm": 0.07373046875, "learning_rate": 2.9689913288180515e-05, "loss": 0.0003, "step": 16029 }, { "epoch": 7.481913652275379, "grad_norm": 0.01397705078125, "learning_rate": 2.96794798764234e-05, "loss": 0.0002, "step": 16030 }, { "epoch": 7.482380396732789, "grad_norm": 0.0146484375, "learning_rate": 2.9669047978745058e-05, "loss": 0.0002, "step": 16031 }, { "epoch": 7.482847141190199, "grad_norm": 0.01202392578125, "learning_rate": 2.9658617595370162e-05, "loss": 0.0001, "step": 16032 }, { "epoch": 7.483313885647608, "grad_norm": 0.00958251953125, "learning_rate": 2.9648188726523295e-05, "loss": 0.0002, "step": 16033 }, { "epoch": 7.483780630105017, "grad_norm": 0.009521484375, "learning_rate": 2.963776137242895e-05, "loss": 0.0001, "step": 16034 }, { "epoch": 7.484247374562427, "grad_norm": 0.008544921875, "learning_rate": 2.9627335533311672e-05, "loss": 0.0002, "step": 16035 }, { "epoch": 7.484714119019837, "grad_norm": 0.0091552734375, "learning_rate": 2.961691120939598e-05, "loss": 0.0002, "step": 16036 }, { "epoch": 7.485180863477246, "grad_norm": 0.01361083984375, "learning_rate": 2.960648840090624e-05, "loss": 0.0002, "step": 16037 }, { "epoch": 7.485647607934656, "grad_norm": 0.0634765625, "learning_rate": 2.9596067108066905e-05, "loss": 0.0003, "step": 16038 }, { "epoch": 7.4861143523920655, "grad_norm": 0.007110595703125, "learning_rate": 2.9585647331102406e-05, "loss": 0.0001, "step": 16039 }, { "epoch": 7.486581096849475, "grad_norm": 0.25390625, "learning_rate": 2.9575229070237e-05, "loss": 0.0011, "step": 16040 }, { "epoch": 7.487047841306884, "grad_norm": 0.1650390625, "learning_rate": 2.9564812325695056e-05, "loss": 0.0003, "step": 16041 }, { "epoch": 7.487514585764294, "grad_norm": 0.01214599609375, "learning_rate": 2.955439709770088e-05, "loss": 0.0002, "step": 16042 }, { "epoch": 7.487981330221704, "grad_norm": 0.0179443359375, "learning_rate": 2.9543983386478668e-05, "loss": 0.0002, "step": 16043 }, { "epoch": 7.4884480746791136, "grad_norm": 0.0277099609375, "learning_rate": 2.9533571192252696e-05, "loss": 0.0013, "step": 16044 }, { "epoch": 7.4889148191365225, "grad_norm": 0.048828125, "learning_rate": 2.9523160515247116e-05, "loss": 0.0003, "step": 16045 }, { "epoch": 7.489381563593932, "grad_norm": 0.00848388671875, "learning_rate": 2.9512751355686064e-05, "loss": 0.0002, "step": 16046 }, { "epoch": 7.489848308051342, "grad_norm": 0.03515625, "learning_rate": 2.9502343713793667e-05, "loss": 0.002, "step": 16047 }, { "epoch": 7.490315052508752, "grad_norm": 0.0556640625, "learning_rate": 2.9491937589794072e-05, "loss": 0.004, "step": 16048 }, { "epoch": 7.490781796966161, "grad_norm": 0.01544189453125, "learning_rate": 2.9481532983911252e-05, "loss": 0.0001, "step": 16049 }, { "epoch": 7.4912485414235706, "grad_norm": 0.048828125, "learning_rate": 2.9471129896369275e-05, "loss": 0.0002, "step": 16050 }, { "epoch": 7.49171528588098, "grad_norm": 0.0228271484375, "learning_rate": 2.946072832739215e-05, "loss": 0.0002, "step": 16051 }, { "epoch": 7.49218203033839, "grad_norm": 0.0101318359375, "learning_rate": 2.945032827720379e-05, "loss": 0.0002, "step": 16052 }, { "epoch": 7.492648774795799, "grad_norm": 0.007354736328125, "learning_rate": 2.943992974602813e-05, "loss": 0.0001, "step": 16053 }, { "epoch": 7.493115519253209, "grad_norm": 0.0098876953125, "learning_rate": 2.9429532734089094e-05, "loss": 0.0002, "step": 16054 }, { "epoch": 7.493582263710619, "grad_norm": 0.01251220703125, "learning_rate": 2.9419137241610496e-05, "loss": 0.0002, "step": 16055 }, { "epoch": 7.494049008168028, "grad_norm": 0.08447265625, "learning_rate": 2.9408743268816175e-05, "loss": 0.002, "step": 16056 }, { "epoch": 7.494515752625437, "grad_norm": 0.00592041015625, "learning_rate": 2.9398350815929975e-05, "loss": 0.0001, "step": 16057 }, { "epoch": 7.494982497082847, "grad_norm": 0.037109375, "learning_rate": 2.9387959883175574e-05, "loss": 0.0003, "step": 16058 }, { "epoch": 7.495449241540257, "grad_norm": 0.02197265625, "learning_rate": 2.937757047077678e-05, "loss": 0.0002, "step": 16059 }, { "epoch": 7.495915985997666, "grad_norm": 0.019775390625, "learning_rate": 2.936718257895722e-05, "loss": 0.0002, "step": 16060 }, { "epoch": 7.496382730455076, "grad_norm": 0.016357421875, "learning_rate": 2.9356796207940597e-05, "loss": 0.0001, "step": 16061 }, { "epoch": 7.496849474912485, "grad_norm": 0.050537109375, "learning_rate": 2.9346411357950544e-05, "loss": 0.0021, "step": 16062 }, { "epoch": 7.497316219369895, "grad_norm": 0.051513671875, "learning_rate": 2.9336028029210627e-05, "loss": 0.0002, "step": 16063 }, { "epoch": 7.497782963827304, "grad_norm": 0.078125, "learning_rate": 2.9325646221944426e-05, "loss": 0.0033, "step": 16064 }, { "epoch": 7.498249708284714, "grad_norm": 0.01025390625, "learning_rate": 2.9315265936375504e-05, "loss": 0.0001, "step": 16065 }, { "epoch": 7.498716452742124, "grad_norm": 0.0162353515625, "learning_rate": 2.9304887172727313e-05, "loss": 0.0002, "step": 16066 }, { "epoch": 7.4991831971995335, "grad_norm": 0.04052734375, "learning_rate": 2.9294509931223323e-05, "loss": 0.0017, "step": 16067 }, { "epoch": 7.499649941656942, "grad_norm": 0.0093994140625, "learning_rate": 2.928413421208702e-05, "loss": 0.0002, "step": 16068 }, { "epoch": 7.500116686114352, "grad_norm": 0.01104736328125, "learning_rate": 2.927376001554174e-05, "loss": 0.0001, "step": 16069 }, { "epoch": 7.500583430571762, "grad_norm": 0.0098876953125, "learning_rate": 2.926338734181088e-05, "loss": 0.0002, "step": 16070 }, { "epoch": 7.501050175029172, "grad_norm": 0.2578125, "learning_rate": 2.92530161911178e-05, "loss": 0.0008, "step": 16071 }, { "epoch": 7.501516919486581, "grad_norm": 0.046142578125, "learning_rate": 2.924264656368575e-05, "loss": 0.0002, "step": 16072 }, { "epoch": 7.5019836639439905, "grad_norm": 0.032470703125, "learning_rate": 2.9232278459738026e-05, "loss": 0.0012, "step": 16073 }, { "epoch": 7.5024504084014, "grad_norm": 0.01025390625, "learning_rate": 2.92219118794979e-05, "loss": 0.0001, "step": 16074 }, { "epoch": 7.50291715285881, "grad_norm": 0.12158203125, "learning_rate": 2.9211546823188494e-05, "loss": 0.0027, "step": 16075 }, { "epoch": 7.503383897316219, "grad_norm": 0.00885009765625, "learning_rate": 2.920118329103304e-05, "loss": 0.0002, "step": 16076 }, { "epoch": 7.503850641773629, "grad_norm": 0.00970458984375, "learning_rate": 2.9190821283254688e-05, "loss": 0.0001, "step": 16077 }, { "epoch": 7.504317386231039, "grad_norm": 0.018310546875, "learning_rate": 2.9180460800076493e-05, "loss": 0.0002, "step": 16078 }, { "epoch": 7.504784130688448, "grad_norm": 0.0159912109375, "learning_rate": 2.9170101841721576e-05, "loss": 0.0002, "step": 16079 }, { "epoch": 7.505250875145857, "grad_norm": 0.00579833984375, "learning_rate": 2.9159744408412926e-05, "loss": 0.0001, "step": 16080 }, { "epoch": 7.505250875145857, "eval_loss": 2.4192376136779785, "eval_runtime": 83.0787, "eval_samples_per_second": 21.714, "eval_steps_per_second": 2.72, "step": 16080 }, { "epoch": 7.505717619603267, "grad_norm": 0.050537109375, "learning_rate": 2.9149388500373587e-05, "loss": 0.0019, "step": 16081 }, { "epoch": 7.506184364060677, "grad_norm": 0.0111083984375, "learning_rate": 2.9139034117826535e-05, "loss": 0.0002, "step": 16082 }, { "epoch": 7.506651108518087, "grad_norm": 0.043701171875, "learning_rate": 2.9128681260994682e-05, "loss": 0.0034, "step": 16083 }, { "epoch": 7.507117852975496, "grad_norm": 0.00848388671875, "learning_rate": 2.911832993010095e-05, "loss": 0.0002, "step": 16084 }, { "epoch": 7.507584597432905, "grad_norm": 0.015869140625, "learning_rate": 2.910798012536826e-05, "loss": 0.0002, "step": 16085 }, { "epoch": 7.508051341890315, "grad_norm": 0.01904296875, "learning_rate": 2.909763184701937e-05, "loss": 0.0002, "step": 16086 }, { "epoch": 7.508518086347725, "grad_norm": 0.005889892578125, "learning_rate": 2.908728509527714e-05, "loss": 0.0002, "step": 16087 }, { "epoch": 7.508984830805134, "grad_norm": 0.035888671875, "learning_rate": 2.907693987036437e-05, "loss": 0.0024, "step": 16088 }, { "epoch": 7.509451575262544, "grad_norm": 0.0108642578125, "learning_rate": 2.9066596172503745e-05, "loss": 0.0001, "step": 16089 }, { "epoch": 7.5099183197199535, "grad_norm": 0.01531982421875, "learning_rate": 2.9056254001918004e-05, "loss": 0.0002, "step": 16090 }, { "epoch": 7.510385064177363, "grad_norm": 0.00848388671875, "learning_rate": 2.9045913358829867e-05, "loss": 0.0002, "step": 16091 }, { "epoch": 7.510851808634772, "grad_norm": 0.01239013671875, "learning_rate": 2.9035574243461927e-05, "loss": 0.0001, "step": 16092 }, { "epoch": 7.511318553092182, "grad_norm": 0.01385498046875, "learning_rate": 2.9025236656036792e-05, "loss": 0.0002, "step": 16093 }, { "epoch": 7.511785297549592, "grad_norm": 0.0103759765625, "learning_rate": 2.9014900596777082e-05, "loss": 0.0001, "step": 16094 }, { "epoch": 7.512252042007001, "grad_norm": 0.01171875, "learning_rate": 2.9004566065905293e-05, "loss": 0.0002, "step": 16095 }, { "epoch": 7.5127187864644105, "grad_norm": 0.00701904296875, "learning_rate": 2.8994233063643973e-05, "loss": 0.0001, "step": 16096 }, { "epoch": 7.51318553092182, "grad_norm": 0.0233154296875, "learning_rate": 2.8983901590215622e-05, "loss": 0.0002, "step": 16097 }, { "epoch": 7.51365227537923, "grad_norm": 0.0142822265625, "learning_rate": 2.8973571645842633e-05, "loss": 0.0002, "step": 16098 }, { "epoch": 7.51411901983664, "grad_norm": 0.0223388671875, "learning_rate": 2.896324323074745e-05, "loss": 0.0002, "step": 16099 }, { "epoch": 7.514585764294049, "grad_norm": 0.09716796875, "learning_rate": 2.895291634515249e-05, "loss": 0.0022, "step": 16100 }, { "epoch": 7.515052508751459, "grad_norm": 0.01708984375, "learning_rate": 2.8942590989280037e-05, "loss": 0.0002, "step": 16101 }, { "epoch": 7.515519253208868, "grad_norm": 0.0218505859375, "learning_rate": 2.8932267163352446e-05, "loss": 0.0002, "step": 16102 }, { "epoch": 7.515985997666277, "grad_norm": 0.00848388671875, "learning_rate": 2.892194486759201e-05, "loss": 0.0002, "step": 16103 }, { "epoch": 7.516452742123687, "grad_norm": 0.01104736328125, "learning_rate": 2.8911624102220948e-05, "loss": 0.0002, "step": 16104 }, { "epoch": 7.516919486581097, "grad_norm": 0.0084228515625, "learning_rate": 2.8901304867461486e-05, "loss": 0.0002, "step": 16105 }, { "epoch": 7.517386231038507, "grad_norm": 0.00811767578125, "learning_rate": 2.8890987163535844e-05, "loss": 0.0001, "step": 16106 }, { "epoch": 7.5178529754959165, "grad_norm": 0.0125732421875, "learning_rate": 2.8880670990666124e-05, "loss": 0.0002, "step": 16107 }, { "epoch": 7.518319719953325, "grad_norm": 0.0233154296875, "learning_rate": 2.887035634907449e-05, "loss": 0.0002, "step": 16108 }, { "epoch": 7.518786464410735, "grad_norm": 0.0260009765625, "learning_rate": 2.886004323898297e-05, "loss": 0.0002, "step": 16109 }, { "epoch": 7.519253208868145, "grad_norm": 0.02392578125, "learning_rate": 2.8849731660613654e-05, "loss": 0.0002, "step": 16110 }, { "epoch": 7.519719953325554, "grad_norm": 0.01495361328125, "learning_rate": 2.883942161418859e-05, "loss": 0.0002, "step": 16111 }, { "epoch": 7.520186697782964, "grad_norm": 0.00665283203125, "learning_rate": 2.8829113099929705e-05, "loss": 0.0002, "step": 16112 }, { "epoch": 7.5206534422403735, "grad_norm": 0.01025390625, "learning_rate": 2.8818806118058973e-05, "loss": 0.0001, "step": 16113 }, { "epoch": 7.521120186697783, "grad_norm": 0.026611328125, "learning_rate": 2.8808500668798354e-05, "loss": 0.0002, "step": 16114 }, { "epoch": 7.521586931155192, "grad_norm": 0.050537109375, "learning_rate": 2.8798196752369677e-05, "loss": 0.0009, "step": 16115 }, { "epoch": 7.522053675612602, "grad_norm": 0.05810546875, "learning_rate": 2.8787894368994816e-05, "loss": 0.0018, "step": 16116 }, { "epoch": 7.522520420070012, "grad_norm": 0.00823974609375, "learning_rate": 2.877759351889564e-05, "loss": 0.0001, "step": 16117 }, { "epoch": 7.5229871645274216, "grad_norm": 0.1669921875, "learning_rate": 2.876729420229386e-05, "loss": 0.0006, "step": 16118 }, { "epoch": 7.5234539089848305, "grad_norm": 0.032470703125, "learning_rate": 2.8756996419411276e-05, "loss": 0.0028, "step": 16119 }, { "epoch": 7.52392065344224, "grad_norm": 0.10888671875, "learning_rate": 2.8746700170469633e-05, "loss": 0.0017, "step": 16120 }, { "epoch": 7.52438739789965, "grad_norm": 0.01373291015625, "learning_rate": 2.873640545569056e-05, "loss": 0.0002, "step": 16121 }, { "epoch": 7.52485414235706, "grad_norm": 0.01287841796875, "learning_rate": 2.8726112275295746e-05, "loss": 0.0002, "step": 16122 }, { "epoch": 7.525320886814469, "grad_norm": 0.05419921875, "learning_rate": 2.871582062950685e-05, "loss": 0.0003, "step": 16123 }, { "epoch": 7.5257876312718786, "grad_norm": 0.0576171875, "learning_rate": 2.87055305185454e-05, "loss": 0.0002, "step": 16124 }, { "epoch": 7.526254375729288, "grad_norm": 0.007568359375, "learning_rate": 2.8695241942632967e-05, "loss": 0.0001, "step": 16125 }, { "epoch": 7.526721120186698, "grad_norm": 0.00830078125, "learning_rate": 2.8684954901991123e-05, "loss": 0.0001, "step": 16126 }, { "epoch": 7.527187864644107, "grad_norm": 0.01708984375, "learning_rate": 2.8674669396841302e-05, "loss": 0.0002, "step": 16127 }, { "epoch": 7.527654609101517, "grad_norm": 0.07275390625, "learning_rate": 2.8664385427405017e-05, "loss": 0.0002, "step": 16128 }, { "epoch": 7.528121353558927, "grad_norm": 0.047119140625, "learning_rate": 2.865410299390362e-05, "loss": 0.0022, "step": 16129 }, { "epoch": 7.528588098016336, "grad_norm": 0.006591796875, "learning_rate": 2.864382209655856e-05, "loss": 0.0002, "step": 16130 }, { "epoch": 7.529054842473745, "grad_norm": 0.0419921875, "learning_rate": 2.8633542735591202e-05, "loss": 0.002, "step": 16131 }, { "epoch": 7.529521586931155, "grad_norm": 0.06494140625, "learning_rate": 2.8623264911222814e-05, "loss": 0.0023, "step": 16132 }, { "epoch": 7.529988331388565, "grad_norm": 0.01165771484375, "learning_rate": 2.8612988623674742e-05, "loss": 0.0002, "step": 16133 }, { "epoch": 7.530455075845975, "grad_norm": 0.019775390625, "learning_rate": 2.8602713873168253e-05, "loss": 0.0002, "step": 16134 }, { "epoch": 7.530921820303384, "grad_norm": 0.01190185546875, "learning_rate": 2.8592440659924525e-05, "loss": 0.0002, "step": 16135 }, { "epoch": 7.531388564760793, "grad_norm": 0.025146484375, "learning_rate": 2.858216898416477e-05, "loss": 0.0002, "step": 16136 }, { "epoch": 7.531855309218203, "grad_norm": 0.03173828125, "learning_rate": 2.857189884611019e-05, "loss": 0.0023, "step": 16137 }, { "epoch": 7.532322053675612, "grad_norm": 0.0186767578125, "learning_rate": 2.856163024598185e-05, "loss": 0.0002, "step": 16138 }, { "epoch": 7.532788798133022, "grad_norm": 0.0089111328125, "learning_rate": 2.8551363184000867e-05, "loss": 0.0002, "step": 16139 }, { "epoch": 7.533255542590432, "grad_norm": 0.007049560546875, "learning_rate": 2.8541097660388372e-05, "loss": 0.0002, "step": 16140 }, { "epoch": 7.5337222870478415, "grad_norm": 0.0140380859375, "learning_rate": 2.8530833675365276e-05, "loss": 0.0002, "step": 16141 }, { "epoch": 7.534189031505251, "grad_norm": 0.01129150390625, "learning_rate": 2.8520571229152614e-05, "loss": 0.0002, "step": 16142 }, { "epoch": 7.53465577596266, "grad_norm": 0.041015625, "learning_rate": 2.85103103219714e-05, "loss": 0.0027, "step": 16143 }, { "epoch": 7.53512252042007, "grad_norm": 0.007537841796875, "learning_rate": 2.85000509540425e-05, "loss": 0.0001, "step": 16144 }, { "epoch": 7.53558926487748, "grad_norm": 0.037109375, "learning_rate": 2.8489793125586828e-05, "loss": 0.0002, "step": 16145 }, { "epoch": 7.536056009334889, "grad_norm": 0.017578125, "learning_rate": 2.847953683682528e-05, "loss": 0.0001, "step": 16146 }, { "epoch": 7.5365227537922985, "grad_norm": 0.00872802734375, "learning_rate": 2.846928208797862e-05, "loss": 0.0001, "step": 16147 }, { "epoch": 7.536989498249708, "grad_norm": 0.0220947265625, "learning_rate": 2.845902887926769e-05, "loss": 0.0002, "step": 16148 }, { "epoch": 7.537456242707118, "grad_norm": 0.0230712890625, "learning_rate": 2.844877721091328e-05, "loss": 0.0004, "step": 16149 }, { "epoch": 7.537922987164528, "grad_norm": 0.00830078125, "learning_rate": 2.8438527083136046e-05, "loss": 0.0002, "step": 16150 }, { "epoch": 7.538389731621937, "grad_norm": 0.037841796875, "learning_rate": 2.8428278496156725e-05, "loss": 0.0021, "step": 16151 }, { "epoch": 7.538856476079347, "grad_norm": 0.011474609375, "learning_rate": 2.8418031450196002e-05, "loss": 0.0002, "step": 16152 }, { "epoch": 7.539323220536756, "grad_norm": 0.008056640625, "learning_rate": 2.8407785945474464e-05, "loss": 0.0002, "step": 16153 }, { "epoch": 7.539789964994165, "grad_norm": 0.0966796875, "learning_rate": 2.8397541982212717e-05, "loss": 0.0049, "step": 16154 }, { "epoch": 7.540256709451575, "grad_norm": 0.01324462890625, "learning_rate": 2.8387299560631374e-05, "loss": 0.0002, "step": 16155 }, { "epoch": 7.540723453908985, "grad_norm": 0.011474609375, "learning_rate": 2.8377058680950896e-05, "loss": 0.0001, "step": 16156 }, { "epoch": 7.541190198366395, "grad_norm": 0.08349609375, "learning_rate": 2.8366819343391805e-05, "loss": 0.0003, "step": 16157 }, { "epoch": 7.541656942823804, "grad_norm": 0.05224609375, "learning_rate": 2.835658154817461e-05, "loss": 0.0029, "step": 16158 }, { "epoch": 7.542123687281213, "grad_norm": 0.036865234375, "learning_rate": 2.834634529551967e-05, "loss": 0.0002, "step": 16159 }, { "epoch": 7.542590431738623, "grad_norm": 0.01953125, "learning_rate": 2.8336110585647445e-05, "loss": 0.0002, "step": 16160 }, { "epoch": 7.543057176196033, "grad_norm": 0.036865234375, "learning_rate": 2.8325877418778246e-05, "loss": 0.0021, "step": 16161 }, { "epoch": 7.543523920653442, "grad_norm": 0.0196533203125, "learning_rate": 2.831564579513243e-05, "loss": 0.0002, "step": 16162 }, { "epoch": 7.543990665110852, "grad_norm": 0.01348876953125, "learning_rate": 2.830541571493032e-05, "loss": 0.0001, "step": 16163 }, { "epoch": 7.5444574095682615, "grad_norm": 0.028564453125, "learning_rate": 2.8295187178392136e-05, "loss": 0.0006, "step": 16164 }, { "epoch": 7.544924154025671, "grad_norm": 0.007049560546875, "learning_rate": 2.8284960185738118e-05, "loss": 0.0002, "step": 16165 }, { "epoch": 7.54539089848308, "grad_norm": 0.025634765625, "learning_rate": 2.8274734737188514e-05, "loss": 0.0002, "step": 16166 }, { "epoch": 7.54585764294049, "grad_norm": 0.01806640625, "learning_rate": 2.826451083296342e-05, "loss": 0.0002, "step": 16167 }, { "epoch": 7.5463243873979, "grad_norm": 0.08349609375, "learning_rate": 2.8254288473283e-05, "loss": 0.0035, "step": 16168 }, { "epoch": 7.54679113185531, "grad_norm": 0.0201416015625, "learning_rate": 2.824406765836738e-05, "loss": 0.0002, "step": 16169 }, { "epoch": 7.5472578763127185, "grad_norm": 0.01202392578125, "learning_rate": 2.823384838843657e-05, "loss": 0.0002, "step": 16170 }, { "epoch": 7.547724620770128, "grad_norm": 0.05126953125, "learning_rate": 2.822363066371062e-05, "loss": 0.0003, "step": 16171 }, { "epoch": 7.548191365227538, "grad_norm": 0.01324462890625, "learning_rate": 2.8213414484409582e-05, "loss": 0.0002, "step": 16172 }, { "epoch": 7.548658109684947, "grad_norm": 0.00872802734375, "learning_rate": 2.8203199850753348e-05, "loss": 0.0002, "step": 16173 }, { "epoch": 7.549124854142357, "grad_norm": 0.017333984375, "learning_rate": 2.8192986762961882e-05, "loss": 0.0002, "step": 16174 }, { "epoch": 7.549591598599767, "grad_norm": 0.00897216796875, "learning_rate": 2.8182775221255108e-05, "loss": 0.0002, "step": 16175 }, { "epoch": 7.550058343057176, "grad_norm": 0.0052490234375, "learning_rate": 2.817256522585283e-05, "loss": 0.0001, "step": 16176 }, { "epoch": 7.550525087514586, "grad_norm": 0.10546875, "learning_rate": 2.8162356776974953e-05, "loss": 0.0056, "step": 16177 }, { "epoch": 7.550991831971995, "grad_norm": 0.07421875, "learning_rate": 2.8152149874841217e-05, "loss": 0.0002, "step": 16178 }, { "epoch": 7.551458576429405, "grad_norm": 0.00970458984375, "learning_rate": 2.8141944519671402e-05, "loss": 0.0001, "step": 16179 }, { "epoch": 7.551925320886815, "grad_norm": 0.041259765625, "learning_rate": 2.8131740711685285e-05, "loss": 0.0007, "step": 16180 }, { "epoch": 7.552392065344224, "grad_norm": 0.05810546875, "learning_rate": 2.8121538451102493e-05, "loss": 0.0036, "step": 16181 }, { "epoch": 7.552858809801633, "grad_norm": 0.031982421875, "learning_rate": 2.811133773814273e-05, "loss": 0.0001, "step": 16182 }, { "epoch": 7.553325554259043, "grad_norm": 0.010986328125, "learning_rate": 2.8101138573025654e-05, "loss": 0.0002, "step": 16183 }, { "epoch": 7.553792298716453, "grad_norm": 0.01416015625, "learning_rate": 2.8090940955970812e-05, "loss": 0.0002, "step": 16184 }, { "epoch": 7.554259043173863, "grad_norm": 0.038330078125, "learning_rate": 2.8080744887197797e-05, "loss": 0.0023, "step": 16185 }, { "epoch": 7.554725787631272, "grad_norm": 0.0252685546875, "learning_rate": 2.8070550366926174e-05, "loss": 0.0002, "step": 16186 }, { "epoch": 7.5551925320886815, "grad_norm": 0.0380859375, "learning_rate": 2.8060357395375402e-05, "loss": 0.0018, "step": 16187 }, { "epoch": 7.555659276546091, "grad_norm": 0.06787109375, "learning_rate": 2.805016597276493e-05, "loss": 0.0004, "step": 16188 }, { "epoch": 7.5561260210035, "grad_norm": 0.00921630859375, "learning_rate": 2.8039976099314237e-05, "loss": 0.0001, "step": 16189 }, { "epoch": 7.55659276546091, "grad_norm": 0.06298828125, "learning_rate": 2.8029787775242678e-05, "loss": 0.0041, "step": 16190 }, { "epoch": 7.55705950991832, "grad_norm": 0.007659912109375, "learning_rate": 2.8019601000769636e-05, "loss": 0.0001, "step": 16191 }, { "epoch": 7.55752625437573, "grad_norm": 0.01416015625, "learning_rate": 2.8009415776114478e-05, "loss": 0.0002, "step": 16192 }, { "epoch": 7.557992998833139, "grad_norm": 0.17578125, "learning_rate": 2.7999232101496443e-05, "loss": 0.0007, "step": 16193 }, { "epoch": 7.558459743290548, "grad_norm": 0.01531982421875, "learning_rate": 2.798904997713482e-05, "loss": 0.0001, "step": 16194 }, { "epoch": 7.558926487747958, "grad_norm": 0.012451171875, "learning_rate": 2.7978869403248885e-05, "loss": 0.0002, "step": 16195 }, { "epoch": 7.559393232205368, "grad_norm": 0.0108642578125, "learning_rate": 2.7968690380057773e-05, "loss": 0.0002, "step": 16196 }, { "epoch": 7.559859976662777, "grad_norm": 0.060546875, "learning_rate": 2.7958512907780676e-05, "loss": 0.0024, "step": 16197 }, { "epoch": 7.5603267211201866, "grad_norm": 0.013916015625, "learning_rate": 2.7948336986636757e-05, "loss": 0.0002, "step": 16198 }, { "epoch": 7.560793465577596, "grad_norm": 0.01123046875, "learning_rate": 2.7938162616845055e-05, "loss": 0.0001, "step": 16199 }, { "epoch": 7.561260210035006, "grad_norm": 0.1611328125, "learning_rate": 2.7927989798624666e-05, "loss": 0.0018, "step": 16200 }, { "epoch": 7.561726954492415, "grad_norm": 0.01129150390625, "learning_rate": 2.791781853219465e-05, "loss": 0.0002, "step": 16201 }, { "epoch": 7.562193698949825, "grad_norm": 0.0198974609375, "learning_rate": 2.7907648817773947e-05, "loss": 0.0002, "step": 16202 }, { "epoch": 7.562660443407235, "grad_norm": 0.51171875, "learning_rate": 2.7897480655581553e-05, "loss": 0.0039, "step": 16203 }, { "epoch": 7.563127187864644, "grad_norm": 0.00604248046875, "learning_rate": 2.7887314045836444e-05, "loss": 0.0001, "step": 16204 }, { "epoch": 7.563593932322053, "grad_norm": 0.00823974609375, "learning_rate": 2.787714898875743e-05, "loss": 0.0001, "step": 16205 }, { "epoch": 7.564060676779463, "grad_norm": 0.0732421875, "learning_rate": 2.7866985484563414e-05, "loss": 0.0004, "step": 16206 }, { "epoch": 7.564527421236873, "grad_norm": 0.020263671875, "learning_rate": 2.7856823533473277e-05, "loss": 0.0002, "step": 16207 }, { "epoch": 7.564994165694283, "grad_norm": 0.0274658203125, "learning_rate": 2.784666313570573e-05, "loss": 0.0002, "step": 16208 }, { "epoch": 7.565460910151692, "grad_norm": 0.1884765625, "learning_rate": 2.7836504291479627e-05, "loss": 0.0133, "step": 16209 }, { "epoch": 7.565927654609101, "grad_norm": 0.0123291015625, "learning_rate": 2.7826347001013607e-05, "loss": 0.0002, "step": 16210 }, { "epoch": 7.566394399066511, "grad_norm": 0.00811767578125, "learning_rate": 2.7816191264526426e-05, "loss": 0.0002, "step": 16211 }, { "epoch": 7.566861143523921, "grad_norm": 0.01318359375, "learning_rate": 2.7806037082236768e-05, "loss": 0.0001, "step": 16212 }, { "epoch": 7.56732788798133, "grad_norm": 0.00738525390625, "learning_rate": 2.7795884454363197e-05, "loss": 0.0002, "step": 16213 }, { "epoch": 7.56779463243874, "grad_norm": 0.2255859375, "learning_rate": 2.7785733381124346e-05, "loss": 0.0008, "step": 16214 }, { "epoch": 7.5682613768961495, "grad_norm": 0.010009765625, "learning_rate": 2.7775583862738807e-05, "loss": 0.0002, "step": 16215 }, { "epoch": 7.568728121353558, "grad_norm": 0.00860595703125, "learning_rate": 2.776543589942505e-05, "loss": 0.0002, "step": 16216 }, { "epoch": 7.569194865810968, "grad_norm": 0.0084228515625, "learning_rate": 2.7755289491401602e-05, "loss": 0.0002, "step": 16217 }, { "epoch": 7.569661610268378, "grad_norm": 0.0147705078125, "learning_rate": 2.7745144638886967e-05, "loss": 0.0002, "step": 16218 }, { "epoch": 7.570128354725788, "grad_norm": 0.07080078125, "learning_rate": 2.7735001342099508e-05, "loss": 0.008, "step": 16219 }, { "epoch": 7.570595099183198, "grad_norm": 0.2119140625, "learning_rate": 2.7724859601257634e-05, "loss": 0.0036, "step": 16220 }, { "epoch": 7.5710618436406065, "grad_norm": 0.0281982421875, "learning_rate": 2.771471941657977e-05, "loss": 0.0002, "step": 16221 }, { "epoch": 7.571528588098016, "grad_norm": 0.01080322265625, "learning_rate": 2.7704580788284163e-05, "loss": 0.0001, "step": 16222 }, { "epoch": 7.571995332555426, "grad_norm": 0.02734375, "learning_rate": 2.7694443716589136e-05, "loss": 0.0001, "step": 16223 }, { "epoch": 7.572462077012835, "grad_norm": 0.1552734375, "learning_rate": 2.7684308201713006e-05, "loss": 0.0004, "step": 16224 }, { "epoch": 7.572928821470245, "grad_norm": 0.019287109375, "learning_rate": 2.7674174243873917e-05, "loss": 0.0002, "step": 16225 }, { "epoch": 7.573395565927655, "grad_norm": 0.02392578125, "learning_rate": 2.7664041843290133e-05, "loss": 0.0022, "step": 16226 }, { "epoch": 7.573862310385064, "grad_norm": 0.021484375, "learning_rate": 2.7653911000179754e-05, "loss": 0.0002, "step": 16227 }, { "epoch": 7.574329054842474, "grad_norm": 0.0615234375, "learning_rate": 2.7643781714760942e-05, "loss": 0.0032, "step": 16228 }, { "epoch": 7.574795799299883, "grad_norm": 0.050048828125, "learning_rate": 2.763365398725183e-05, "loss": 0.0003, "step": 16229 }, { "epoch": 7.575262543757293, "grad_norm": 0.0194091796875, "learning_rate": 2.7623527817870397e-05, "loss": 0.0002, "step": 16230 }, { "epoch": 7.575729288214703, "grad_norm": 0.01019287109375, "learning_rate": 2.761340320683472e-05, "loss": 0.0002, "step": 16231 }, { "epoch": 7.576196032672112, "grad_norm": 0.0810546875, "learning_rate": 2.760328015436281e-05, "loss": 0.0037, "step": 16232 }, { "epoch": 7.576662777129521, "grad_norm": 0.02978515625, "learning_rate": 2.759315866067258e-05, "loss": 0.0025, "step": 16233 }, { "epoch": 7.577129521586931, "grad_norm": 0.013671875, "learning_rate": 2.758303872598198e-05, "loss": 0.0002, "step": 16234 }, { "epoch": 7.577596266044341, "grad_norm": 0.01806640625, "learning_rate": 2.7572920350508935e-05, "loss": 0.0002, "step": 16235 }, { "epoch": 7.57806301050175, "grad_norm": 0.130859375, "learning_rate": 2.7562803534471272e-05, "loss": 0.0003, "step": 16236 }, { "epoch": 7.57852975495916, "grad_norm": 0.0228271484375, "learning_rate": 2.755268827808679e-05, "loss": 0.0003, "step": 16237 }, { "epoch": 7.5789964994165695, "grad_norm": 0.00970458984375, "learning_rate": 2.7542574581573345e-05, "loss": 0.0001, "step": 16238 }, { "epoch": 7.579463243873979, "grad_norm": 0.01361083984375, "learning_rate": 2.753246244514862e-05, "loss": 0.0002, "step": 16239 }, { "epoch": 7.579929988331388, "grad_norm": 0.0093994140625, "learning_rate": 2.7522351869030395e-05, "loss": 0.0002, "step": 16240 }, { "epoch": 7.580396732788798, "grad_norm": 0.162109375, "learning_rate": 2.7512242853436376e-05, "loss": 0.0004, "step": 16241 }, { "epoch": 7.580863477246208, "grad_norm": 0.0120849609375, "learning_rate": 2.7502135398584162e-05, "loss": 0.0002, "step": 16242 }, { "epoch": 7.581330221703618, "grad_norm": 0.00927734375, "learning_rate": 2.7492029504691408e-05, "loss": 0.0001, "step": 16243 }, { "epoch": 7.5817969661610265, "grad_norm": 0.01806640625, "learning_rate": 2.7481925171975742e-05, "loss": 0.0002, "step": 16244 }, { "epoch": 7.582263710618436, "grad_norm": 0.08984375, "learning_rate": 2.7471822400654655e-05, "loss": 0.0003, "step": 16245 }, { "epoch": 7.582730455075846, "grad_norm": 0.01116943359375, "learning_rate": 2.746172119094571e-05, "loss": 0.0002, "step": 16246 }, { "epoch": 7.583197199533256, "grad_norm": 0.0223388671875, "learning_rate": 2.745162154306642e-05, "loss": 0.0002, "step": 16247 }, { "epoch": 7.583663943990665, "grad_norm": 0.01251220703125, "learning_rate": 2.7441523457234176e-05, "loss": 0.0001, "step": 16248 }, { "epoch": 7.584130688448075, "grad_norm": 0.01263427734375, "learning_rate": 2.7431426933666438e-05, "loss": 0.0002, "step": 16249 }, { "epoch": 7.584597432905484, "grad_norm": 0.08349609375, "learning_rate": 2.742133197258062e-05, "loss": 0.0004, "step": 16250 }, { "epoch": 7.585064177362894, "grad_norm": 0.0162353515625, "learning_rate": 2.7411238574194033e-05, "loss": 0.0002, "step": 16251 }, { "epoch": 7.585530921820303, "grad_norm": 0.015869140625, "learning_rate": 2.7401146738724017e-05, "loss": 0.0002, "step": 16252 }, { "epoch": 7.585997666277713, "grad_norm": 0.0654296875, "learning_rate": 2.7391056466387887e-05, "loss": 0.0019, "step": 16253 }, { "epoch": 7.586464410735123, "grad_norm": 0.00823974609375, "learning_rate": 2.7380967757402842e-05, "loss": 0.0002, "step": 16254 }, { "epoch": 7.5869311551925325, "grad_norm": 0.01202392578125, "learning_rate": 2.7370880611986127e-05, "loss": 0.0002, "step": 16255 }, { "epoch": 7.587397899649941, "grad_norm": 0.012939453125, "learning_rate": 2.736079503035498e-05, "loss": 0.0002, "step": 16256 }, { "epoch": 7.587864644107351, "grad_norm": 0.01422119140625, "learning_rate": 2.7350711012726472e-05, "loss": 0.0002, "step": 16257 }, { "epoch": 7.588331388564761, "grad_norm": 0.05078125, "learning_rate": 2.7340628559317794e-05, "loss": 0.0025, "step": 16258 }, { "epoch": 7.58879813302217, "grad_norm": 0.011474609375, "learning_rate": 2.7330547670345964e-05, "loss": 0.0002, "step": 16259 }, { "epoch": 7.58926487747958, "grad_norm": 0.05126953125, "learning_rate": 2.7320468346028073e-05, "loss": 0.003, "step": 16260 }, { "epoch": 7.5897316219369895, "grad_norm": 0.00634765625, "learning_rate": 2.731039058658117e-05, "loss": 0.0001, "step": 16261 }, { "epoch": 7.590198366394399, "grad_norm": 0.0123291015625, "learning_rate": 2.7300314392222183e-05, "loss": 0.0002, "step": 16262 }, { "epoch": 7.590665110851809, "grad_norm": 0.044921875, "learning_rate": 2.7290239763168078e-05, "loss": 0.0002, "step": 16263 }, { "epoch": 7.591131855309218, "grad_norm": 0.033447265625, "learning_rate": 2.728016669963582e-05, "loss": 0.0002, "step": 16264 }, { "epoch": 7.591598599766628, "grad_norm": 0.0068359375, "learning_rate": 2.7270095201842237e-05, "loss": 0.0001, "step": 16265 }, { "epoch": 7.592065344224038, "grad_norm": 0.005828857421875, "learning_rate": 2.7260025270004185e-05, "loss": 0.0001, "step": 16266 }, { "epoch": 7.5925320886814465, "grad_norm": 0.01287841796875, "learning_rate": 2.7249956904338536e-05, "loss": 0.0002, "step": 16267 }, { "epoch": 7.592998833138856, "grad_norm": 0.01043701171875, "learning_rate": 2.7239890105061995e-05, "loss": 0.0002, "step": 16268 }, { "epoch": 7.593465577596266, "grad_norm": 0.01239013671875, "learning_rate": 2.7229824872391352e-05, "loss": 0.0002, "step": 16269 }, { "epoch": 7.593932322053676, "grad_norm": 0.011474609375, "learning_rate": 2.721976120654335e-05, "loss": 0.0002, "step": 16270 }, { "epoch": 7.594399066511086, "grad_norm": 0.04052734375, "learning_rate": 2.72096991077346e-05, "loss": 0.003, "step": 16271 }, { "epoch": 7.594865810968495, "grad_norm": 0.007659912109375, "learning_rate": 2.7199638576181796e-05, "loss": 0.0001, "step": 16272 }, { "epoch": 7.595332555425904, "grad_norm": 0.00994873046875, "learning_rate": 2.718957961210158e-05, "loss": 0.0002, "step": 16273 }, { "epoch": 7.595799299883314, "grad_norm": 0.0189208984375, "learning_rate": 2.7179522215710472e-05, "loss": 0.0002, "step": 16274 }, { "epoch": 7.596266044340723, "grad_norm": 0.0255126953125, "learning_rate": 2.7169466387225074e-05, "loss": 0.0002, "step": 16275 }, { "epoch": 7.596732788798133, "grad_norm": 0.02197265625, "learning_rate": 2.7159412126861838e-05, "loss": 0.0002, "step": 16276 }, { "epoch": 7.597199533255543, "grad_norm": 0.0108642578125, "learning_rate": 2.7149359434837285e-05, "loss": 0.0001, "step": 16277 }, { "epoch": 7.597666277712952, "grad_norm": 0.01226806640625, "learning_rate": 2.7139308311367873e-05, "loss": 0.0002, "step": 16278 }, { "epoch": 7.598133022170361, "grad_norm": 0.01031494140625, "learning_rate": 2.7129258756669975e-05, "loss": 0.0001, "step": 16279 }, { "epoch": 7.598599766627771, "grad_norm": 0.017578125, "learning_rate": 2.711921077095998e-05, "loss": 0.0002, "step": 16280 }, { "epoch": 7.599066511085181, "grad_norm": 0.04541015625, "learning_rate": 2.7109164354454265e-05, "loss": 0.0004, "step": 16281 }, { "epoch": 7.599533255542591, "grad_norm": 0.0174560546875, "learning_rate": 2.7099119507369088e-05, "loss": 0.0002, "step": 16282 }, { "epoch": 7.6, "grad_norm": 0.00982666015625, "learning_rate": 2.7089076229920784e-05, "loss": 0.0001, "step": 16283 }, { "epoch": 7.600466744457409, "grad_norm": 0.05908203125, "learning_rate": 2.7079034522325543e-05, "loss": 0.0025, "step": 16284 }, { "epoch": 7.600933488914819, "grad_norm": 0.03271484375, "learning_rate": 2.706899438479962e-05, "loss": 0.0009, "step": 16285 }, { "epoch": 7.601400233372229, "grad_norm": 0.0179443359375, "learning_rate": 2.7058955817559128e-05, "loss": 0.0002, "step": 16286 }, { "epoch": 7.601866977829638, "grad_norm": 0.01171875, "learning_rate": 2.704891882082029e-05, "loss": 0.0002, "step": 16287 }, { "epoch": 7.602333722287048, "grad_norm": 0.00775146484375, "learning_rate": 2.703888339479913e-05, "loss": 0.0001, "step": 16288 }, { "epoch": 7.6028004667444575, "grad_norm": 0.01251220703125, "learning_rate": 2.7028849539711764e-05, "loss": 0.0002, "step": 16289 }, { "epoch": 7.603267211201867, "grad_norm": 0.0076904296875, "learning_rate": 2.7018817255774266e-05, "loss": 0.0002, "step": 16290 }, { "epoch": 7.603733955659276, "grad_norm": 0.01300048828125, "learning_rate": 2.7008786543202568e-05, "loss": 0.0002, "step": 16291 }, { "epoch": 7.604200700116686, "grad_norm": 0.029296875, "learning_rate": 2.6998757402212693e-05, "loss": 0.0022, "step": 16292 }, { "epoch": 7.604667444574096, "grad_norm": 0.0084228515625, "learning_rate": 2.6988729833020587e-05, "loss": 0.0001, "step": 16293 }, { "epoch": 7.605134189031506, "grad_norm": 0.012939453125, "learning_rate": 2.6978703835842112e-05, "loss": 0.0002, "step": 16294 }, { "epoch": 7.6056009334889145, "grad_norm": 0.033935546875, "learning_rate": 2.696867941089316e-05, "loss": 0.0021, "step": 16295 }, { "epoch": 7.606067677946324, "grad_norm": 0.056640625, "learning_rate": 2.6958656558389594e-05, "loss": 0.0003, "step": 16296 }, { "epoch": 7.606534422403734, "grad_norm": 0.197265625, "learning_rate": 2.6948635278547175e-05, "loss": 0.0011, "step": 16297 }, { "epoch": 7.607001166861144, "grad_norm": 0.020751953125, "learning_rate": 2.6938615571581683e-05, "loss": 0.0002, "step": 16298 }, { "epoch": 7.607467911318553, "grad_norm": 0.103515625, "learning_rate": 2.692859743770888e-05, "loss": 0.0003, "step": 16299 }, { "epoch": 7.607934655775963, "grad_norm": 0.12255859375, "learning_rate": 2.691858087714444e-05, "loss": 0.0003, "step": 16300 }, { "epoch": 7.608401400233372, "grad_norm": 0.01080322265625, "learning_rate": 2.690856589010402e-05, "loss": 0.0001, "step": 16301 }, { "epoch": 7.608868144690781, "grad_norm": 0.028076171875, "learning_rate": 2.689855247680331e-05, "loss": 0.0002, "step": 16302 }, { "epoch": 7.609334889148191, "grad_norm": 0.00677490234375, "learning_rate": 2.6888540637457848e-05, "loss": 0.0001, "step": 16303 }, { "epoch": 7.609801633605601, "grad_norm": 0.01397705078125, "learning_rate": 2.6878530372283217e-05, "loss": 0.0002, "step": 16304 }, { "epoch": 7.610268378063011, "grad_norm": 0.0228271484375, "learning_rate": 2.6868521681494997e-05, "loss": 0.0003, "step": 16305 }, { "epoch": 7.6107351225204205, "grad_norm": 0.07958984375, "learning_rate": 2.6858514565308602e-05, "loss": 0.0023, "step": 16306 }, { "epoch": 7.611201866977829, "grad_norm": 0.01202392578125, "learning_rate": 2.684850902393957e-05, "loss": 0.0001, "step": 16307 }, { "epoch": 7.611668611435239, "grad_norm": 0.330078125, "learning_rate": 2.6838505057603293e-05, "loss": 0.0019, "step": 16308 }, { "epoch": 7.612135355892649, "grad_norm": 0.0303955078125, "learning_rate": 2.6828502666515154e-05, "loss": 0.0019, "step": 16309 }, { "epoch": 7.612602100350058, "grad_norm": 0.2236328125, "learning_rate": 2.6818501850890576e-05, "loss": 0.006, "step": 16310 }, { "epoch": 7.613068844807468, "grad_norm": 0.01055908203125, "learning_rate": 2.680850261094482e-05, "loss": 0.0001, "step": 16311 }, { "epoch": 7.6135355892648775, "grad_norm": 0.02587890625, "learning_rate": 2.679850494689321e-05, "loss": 0.0002, "step": 16312 }, { "epoch": 7.614002333722287, "grad_norm": 0.0084228515625, "learning_rate": 2.6788508858951034e-05, "loss": 0.0002, "step": 16313 }, { "epoch": 7.614469078179697, "grad_norm": 0.064453125, "learning_rate": 2.6778514347333462e-05, "loss": 0.0042, "step": 16314 }, { "epoch": 7.614935822637106, "grad_norm": 0.0233154296875, "learning_rate": 2.676852141225572e-05, "loss": 0.0001, "step": 16315 }, { "epoch": 7.615402567094516, "grad_norm": 0.047119140625, "learning_rate": 2.6758530053932997e-05, "loss": 0.0019, "step": 16316 }, { "epoch": 7.615869311551926, "grad_norm": 0.04248046875, "learning_rate": 2.6748540272580348e-05, "loss": 0.002, "step": 16317 }, { "epoch": 7.6163360560093345, "grad_norm": 0.0098876953125, "learning_rate": 2.6738552068412903e-05, "loss": 0.0002, "step": 16318 }, { "epoch": 7.616802800466744, "grad_norm": 0.059814453125, "learning_rate": 2.6728565441645748e-05, "loss": 0.0055, "step": 16319 }, { "epoch": 7.617269544924154, "grad_norm": 0.006195068359375, "learning_rate": 2.6718580392493842e-05, "loss": 0.0001, "step": 16320 }, { "epoch": 7.617736289381564, "grad_norm": 0.039794921875, "learning_rate": 2.6708596921172213e-05, "loss": 0.0019, "step": 16321 }, { "epoch": 7.618203033838973, "grad_norm": 0.016357421875, "learning_rate": 2.6698615027895834e-05, "loss": 0.0002, "step": 16322 }, { "epoch": 7.618669778296383, "grad_norm": 0.01190185546875, "learning_rate": 2.6688634712879578e-05, "loss": 0.0002, "step": 16323 }, { "epoch": 7.619136522753792, "grad_norm": 0.00640869140625, "learning_rate": 2.667865597633835e-05, "loss": 0.0001, "step": 16324 }, { "epoch": 7.619603267211202, "grad_norm": 0.037109375, "learning_rate": 2.6668678818487047e-05, "loss": 0.0002, "step": 16325 }, { "epoch": 7.620070011668611, "grad_norm": 0.00787353515625, "learning_rate": 2.6658703239540417e-05, "loss": 0.0001, "step": 16326 }, { "epoch": 7.620536756126021, "grad_norm": 0.030517578125, "learning_rate": 2.664872923971331e-05, "loss": 0.0002, "step": 16327 }, { "epoch": 7.621003500583431, "grad_norm": 0.00799560546875, "learning_rate": 2.6638756819220423e-05, "loss": 0.0001, "step": 16328 }, { "epoch": 7.6214702450408405, "grad_norm": 0.00653076171875, "learning_rate": 2.6628785978276495e-05, "loss": 0.0001, "step": 16329 }, { "epoch": 7.621936989498249, "grad_norm": 0.01025390625, "learning_rate": 2.661881671709624e-05, "loss": 0.0001, "step": 16330 }, { "epoch": 7.622403733955659, "grad_norm": 0.01361083984375, "learning_rate": 2.6608849035894277e-05, "loss": 0.0002, "step": 16331 }, { "epoch": 7.622870478413069, "grad_norm": 0.01416015625, "learning_rate": 2.6598882934885193e-05, "loss": 0.0002, "step": 16332 }, { "epoch": 7.623337222870479, "grad_norm": 0.02783203125, "learning_rate": 2.6588918414283602e-05, "loss": 0.0002, "step": 16333 }, { "epoch": 7.623803967327888, "grad_norm": 0.01251220703125, "learning_rate": 2.657895547430408e-05, "loss": 0.0002, "step": 16334 }, { "epoch": 7.6242707117852975, "grad_norm": 0.059814453125, "learning_rate": 2.6568994115161084e-05, "loss": 0.0003, "step": 16335 }, { "epoch": 7.624737456242707, "grad_norm": 0.036376953125, "learning_rate": 2.655903433706911e-05, "loss": 0.0002, "step": 16336 }, { "epoch": 7.625204200700117, "grad_norm": 0.0419921875, "learning_rate": 2.6549076140242636e-05, "loss": 0.002, "step": 16337 }, { "epoch": 7.625670945157526, "grad_norm": 0.0164794921875, "learning_rate": 2.6539119524896028e-05, "loss": 0.0002, "step": 16338 }, { "epoch": 7.626137689614936, "grad_norm": 0.01708984375, "learning_rate": 2.652916449124371e-05, "loss": 0.0002, "step": 16339 }, { "epoch": 7.626604434072346, "grad_norm": 0.2216796875, "learning_rate": 2.651921103949997e-05, "loss": 0.0004, "step": 16340 }, { "epoch": 7.627071178529755, "grad_norm": 0.015869140625, "learning_rate": 2.650925916987913e-05, "loss": 0.0001, "step": 16341 }, { "epoch": 7.627537922987164, "grad_norm": 0.0103759765625, "learning_rate": 2.649930888259553e-05, "loss": 0.0002, "step": 16342 }, { "epoch": 7.628004667444574, "grad_norm": 0.01318359375, "learning_rate": 2.648936017786332e-05, "loss": 0.0001, "step": 16343 }, { "epoch": 7.628471411901984, "grad_norm": 0.008544921875, "learning_rate": 2.647941305589674e-05, "loss": 0.0002, "step": 16344 }, { "epoch": 7.628938156359393, "grad_norm": 0.03466796875, "learning_rate": 2.6469467516910006e-05, "loss": 0.003, "step": 16345 }, { "epoch": 7.629404900816803, "grad_norm": 0.022705078125, "learning_rate": 2.6459523561117195e-05, "loss": 0.0002, "step": 16346 }, { "epoch": 7.629871645274212, "grad_norm": 0.00811767578125, "learning_rate": 2.6449581188732432e-05, "loss": 0.0001, "step": 16347 }, { "epoch": 7.630338389731622, "grad_norm": 0.0169677734375, "learning_rate": 2.643964039996982e-05, "loss": 0.0002, "step": 16348 }, { "epoch": 7.630805134189032, "grad_norm": 0.040771484375, "learning_rate": 2.6429701195043344e-05, "loss": 0.0014, "step": 16349 }, { "epoch": 7.631271878646441, "grad_norm": 0.0064697265625, "learning_rate": 2.6419763574167022e-05, "loss": 0.0001, "step": 16350 }, { "epoch": 7.631738623103851, "grad_norm": 0.0118408203125, "learning_rate": 2.6409827537554853e-05, "loss": 0.0002, "step": 16351 }, { "epoch": 7.6322053675612604, "grad_norm": 0.037841796875, "learning_rate": 2.6399893085420734e-05, "loss": 0.0031, "step": 16352 }, { "epoch": 7.632672112018669, "grad_norm": 0.064453125, "learning_rate": 2.638996021797856e-05, "loss": 0.0042, "step": 16353 }, { "epoch": 7.633138856476079, "grad_norm": 0.01385498046875, "learning_rate": 2.6380028935442257e-05, "loss": 0.0002, "step": 16354 }, { "epoch": 7.633605600933489, "grad_norm": 0.08447265625, "learning_rate": 2.637009923802558e-05, "loss": 0.0069, "step": 16355 }, { "epoch": 7.634072345390899, "grad_norm": 0.01300048828125, "learning_rate": 2.636017112594239e-05, "loss": 0.0002, "step": 16356 }, { "epoch": 7.6345390898483085, "grad_norm": 0.03271484375, "learning_rate": 2.6350244599406383e-05, "loss": 0.0008, "step": 16357 }, { "epoch": 7.635005834305717, "grad_norm": 0.0283203125, "learning_rate": 2.634031965863134e-05, "loss": 0.0002, "step": 16358 }, { "epoch": 7.635472578763127, "grad_norm": 0.0106201171875, "learning_rate": 2.6330396303830962e-05, "loss": 0.0002, "step": 16359 }, { "epoch": 7.635939323220537, "grad_norm": 0.0257568359375, "learning_rate": 2.6320474535218854e-05, "loss": 0.0002, "step": 16360 }, { "epoch": 7.636406067677946, "grad_norm": 0.03955078125, "learning_rate": 2.6310554353008688e-05, "loss": 0.0002, "step": 16361 }, { "epoch": 7.636872812135356, "grad_norm": 0.06982421875, "learning_rate": 2.6300635757414072e-05, "loss": 0.0003, "step": 16362 }, { "epoch": 7.6373395565927655, "grad_norm": 0.01556396484375, "learning_rate": 2.6290718748648513e-05, "loss": 0.0002, "step": 16363 }, { "epoch": 7.637806301050175, "grad_norm": 0.01092529296875, "learning_rate": 2.6280803326925562e-05, "loss": 0.0001, "step": 16364 }, { "epoch": 7.638273045507584, "grad_norm": 0.0810546875, "learning_rate": 2.6270889492458728e-05, "loss": 0.0002, "step": 16365 }, { "epoch": 7.638739789964994, "grad_norm": 0.057861328125, "learning_rate": 2.6260977245461426e-05, "loss": 0.0026, "step": 16366 }, { "epoch": 7.639206534422404, "grad_norm": 0.01068115234375, "learning_rate": 2.6251066586147098e-05, "loss": 0.0001, "step": 16367 }, { "epoch": 7.639673278879814, "grad_norm": 0.22265625, "learning_rate": 2.6241157514729163e-05, "loss": 0.0017, "step": 16368 }, { "epoch": 7.6401400233372225, "grad_norm": 0.0306396484375, "learning_rate": 2.6231250031420918e-05, "loss": 0.0002, "step": 16369 }, { "epoch": 7.640606767794632, "grad_norm": 0.00994873046875, "learning_rate": 2.62213441364357e-05, "loss": 0.0001, "step": 16370 }, { "epoch": 7.641073512252042, "grad_norm": 0.031494140625, "learning_rate": 2.6211439829986827e-05, "loss": 0.0002, "step": 16371 }, { "epoch": 7.641540256709452, "grad_norm": 0.0341796875, "learning_rate": 2.62015371122875e-05, "loss": 0.0011, "step": 16372 }, { "epoch": 7.642007001166861, "grad_norm": 0.009033203125, "learning_rate": 2.6191635983550966e-05, "loss": 0.0001, "step": 16373 }, { "epoch": 7.642473745624271, "grad_norm": 0.04150390625, "learning_rate": 2.6181736443990424e-05, "loss": 0.0026, "step": 16374 }, { "epoch": 7.64294049008168, "grad_norm": 0.01214599609375, "learning_rate": 2.6171838493818968e-05, "loss": 0.0002, "step": 16375 }, { "epoch": 7.64340723453909, "grad_norm": 0.08154296875, "learning_rate": 2.6161942133249774e-05, "loss": 0.0003, "step": 16376 }, { "epoch": 7.643873978996499, "grad_norm": 0.0537109375, "learning_rate": 2.615204736249587e-05, "loss": 0.0035, "step": 16377 }, { "epoch": 7.644340723453909, "grad_norm": 0.1337890625, "learning_rate": 2.6142154181770305e-05, "loss": 0.0004, "step": 16378 }, { "epoch": 7.644807467911319, "grad_norm": 0.03271484375, "learning_rate": 2.6132262591286148e-05, "loss": 0.0002, "step": 16379 }, { "epoch": 7.6452742123687285, "grad_norm": 0.01080322265625, "learning_rate": 2.612237259125634e-05, "loss": 0.0002, "step": 16380 }, { "epoch": 7.645740956826137, "grad_norm": 0.01190185546875, "learning_rate": 2.611248418189378e-05, "loss": 0.0002, "step": 16381 }, { "epoch": 7.646207701283547, "grad_norm": 0.010009765625, "learning_rate": 2.6102597363411417e-05, "loss": 0.0002, "step": 16382 }, { "epoch": 7.646674445740957, "grad_norm": 0.0140380859375, "learning_rate": 2.6092712136022146e-05, "loss": 0.0002, "step": 16383 }, { "epoch": 7.647141190198367, "grad_norm": 0.011474609375, "learning_rate": 2.608282849993877e-05, "loss": 0.0001, "step": 16384 }, { "epoch": 7.647607934655776, "grad_norm": 0.02294921875, "learning_rate": 2.6072946455374092e-05, "loss": 0.0002, "step": 16385 }, { "epoch": 7.6480746791131855, "grad_norm": 0.0191650390625, "learning_rate": 2.6063066002540948e-05, "loss": 0.0002, "step": 16386 }, { "epoch": 7.648541423570595, "grad_norm": 0.05615234375, "learning_rate": 2.605318714165198e-05, "loss": 0.0036, "step": 16387 }, { "epoch": 7.649008168028004, "grad_norm": 0.0167236328125, "learning_rate": 2.6043309872919985e-05, "loss": 0.0002, "step": 16388 }, { "epoch": 7.649474912485414, "grad_norm": 0.008544921875, "learning_rate": 2.603343419655754e-05, "loss": 0.0001, "step": 16389 }, { "epoch": 7.649941656942824, "grad_norm": 0.0084228515625, "learning_rate": 2.6023560112777334e-05, "loss": 0.0002, "step": 16390 }, { "epoch": 7.650408401400234, "grad_norm": 0.0086669921875, "learning_rate": 2.601368762179198e-05, "loss": 0.0002, "step": 16391 }, { "epoch": 7.650875145857643, "grad_norm": 0.039306640625, "learning_rate": 2.6003816723814e-05, "loss": 0.0002, "step": 16392 }, { "epoch": 7.651341890315052, "grad_norm": 0.0093994140625, "learning_rate": 2.599394741905594e-05, "loss": 0.0002, "step": 16393 }, { "epoch": 7.651808634772462, "grad_norm": 0.0079345703125, "learning_rate": 2.5984079707730336e-05, "loss": 0.0002, "step": 16394 }, { "epoch": 7.652275379229872, "grad_norm": 0.11376953125, "learning_rate": 2.597421359004958e-05, "loss": 0.0003, "step": 16395 }, { "epoch": 7.652742123687281, "grad_norm": 0.01251220703125, "learning_rate": 2.5964349066226147e-05, "loss": 0.0001, "step": 16396 }, { "epoch": 7.653208868144691, "grad_norm": 0.0135498046875, "learning_rate": 2.5954486136472446e-05, "loss": 0.0001, "step": 16397 }, { "epoch": 7.6536756126021, "grad_norm": 0.00726318359375, "learning_rate": 2.5944624801000794e-05, "loss": 0.0001, "step": 16398 }, { "epoch": 7.65414235705951, "grad_norm": 0.01953125, "learning_rate": 2.5934765060023525e-05, "loss": 0.0002, "step": 16399 }, { "epoch": 7.65460910151692, "grad_norm": 0.006103515625, "learning_rate": 2.5924906913752976e-05, "loss": 0.0001, "step": 16400 }, { "epoch": 7.655075845974329, "grad_norm": 0.08642578125, "learning_rate": 2.5915050362401337e-05, "loss": 0.0003, "step": 16401 }, { "epoch": 7.655542590431739, "grad_norm": 0.0272216796875, "learning_rate": 2.590519540618086e-05, "loss": 0.0002, "step": 16402 }, { "epoch": 7.6560093348891485, "grad_norm": 0.080078125, "learning_rate": 2.589534204530377e-05, "loss": 0.0028, "step": 16403 }, { "epoch": 7.656476079346557, "grad_norm": 0.01287841796875, "learning_rate": 2.588549027998216e-05, "loss": 0.0002, "step": 16404 }, { "epoch": 7.656942823803967, "grad_norm": 0.0048828125, "learning_rate": 2.58756401104282e-05, "loss": 0.0001, "step": 16405 }, { "epoch": 7.657409568261377, "grad_norm": 0.0089111328125, "learning_rate": 2.586579153685392e-05, "loss": 0.0002, "step": 16406 }, { "epoch": 7.657876312718787, "grad_norm": 0.016357421875, "learning_rate": 2.5855944559471402e-05, "loss": 0.0002, "step": 16407 }, { "epoch": 7.658343057176196, "grad_norm": 0.011474609375, "learning_rate": 2.584609917849269e-05, "loss": 0.0002, "step": 16408 }, { "epoch": 7.6588098016336055, "grad_norm": 0.0084228515625, "learning_rate": 2.5836255394129715e-05, "loss": 0.0001, "step": 16409 }, { "epoch": 7.659276546091015, "grad_norm": 0.01055908203125, "learning_rate": 2.5826413206594446e-05, "loss": 0.0002, "step": 16410 }, { "epoch": 7.659743290548425, "grad_norm": 0.01251220703125, "learning_rate": 2.5816572616098823e-05, "loss": 0.0002, "step": 16411 }, { "epoch": 7.660210035005834, "grad_norm": 0.021728515625, "learning_rate": 2.580673362285467e-05, "loss": 0.0003, "step": 16412 }, { "epoch": 7.660676779463244, "grad_norm": 0.0096435546875, "learning_rate": 2.5796896227073862e-05, "loss": 0.0002, "step": 16413 }, { "epoch": 7.661143523920654, "grad_norm": 0.032470703125, "learning_rate": 2.5787060428968235e-05, "loss": 0.0026, "step": 16414 }, { "epoch": 7.661610268378063, "grad_norm": 0.0150146484375, "learning_rate": 2.5777226228749517e-05, "loss": 0.0001, "step": 16415 }, { "epoch": 7.662077012835472, "grad_norm": 0.006103515625, "learning_rate": 2.5767393626629466e-05, "loss": 0.0001, "step": 16416 }, { "epoch": 7.662543757292882, "grad_norm": 0.01409912109375, "learning_rate": 2.575756262281982e-05, "loss": 0.0002, "step": 16417 }, { "epoch": 7.663010501750292, "grad_norm": 0.00543212890625, "learning_rate": 2.5747733217532187e-05, "loss": 0.0001, "step": 16418 }, { "epoch": 7.663477246207702, "grad_norm": 0.06689453125, "learning_rate": 2.5737905410978258e-05, "loss": 0.0002, "step": 16419 }, { "epoch": 7.663943990665111, "grad_norm": 0.02978515625, "learning_rate": 2.5728079203369637e-05, "loss": 0.0025, "step": 16420 }, { "epoch": 7.66441073512252, "grad_norm": 0.0113525390625, "learning_rate": 2.5718254594917858e-05, "loss": 0.0002, "step": 16421 }, { "epoch": 7.66487747957993, "grad_norm": 0.017822265625, "learning_rate": 2.570843158583447e-05, "loss": 0.0002, "step": 16422 }, { "epoch": 7.665344224037339, "grad_norm": 0.0091552734375, "learning_rate": 2.5698610176331005e-05, "loss": 0.0001, "step": 16423 }, { "epoch": 7.665810968494749, "grad_norm": 0.038818359375, "learning_rate": 2.5688790366618875e-05, "loss": 0.0003, "step": 16424 }, { "epoch": 7.666277712952159, "grad_norm": 0.02587890625, "learning_rate": 2.5678972156909563e-05, "loss": 0.0002, "step": 16425 }, { "epoch": 7.6667444574095684, "grad_norm": 0.00640869140625, "learning_rate": 2.5669155547414448e-05, "loss": 0.0001, "step": 16426 }, { "epoch": 7.667211201866978, "grad_norm": 0.08349609375, "learning_rate": 2.5659340538344844e-05, "loss": 0.0003, "step": 16427 }, { "epoch": 7.667677946324387, "grad_norm": 0.03173828125, "learning_rate": 2.5649527129912133e-05, "loss": 0.0013, "step": 16428 }, { "epoch": 7.668144690781797, "grad_norm": 0.007415771484375, "learning_rate": 2.563971532232763e-05, "loss": 0.0002, "step": 16429 }, { "epoch": 7.668611435239207, "grad_norm": 0.0308837890625, "learning_rate": 2.5629905115802523e-05, "loss": 0.0003, "step": 16430 }, { "epoch": 7.669078179696616, "grad_norm": 0.0167236328125, "learning_rate": 2.5620096510548063e-05, "loss": 0.0002, "step": 16431 }, { "epoch": 7.6695449241540254, "grad_norm": 0.08642578125, "learning_rate": 2.5610289506775497e-05, "loss": 0.0052, "step": 16432 }, { "epoch": 7.670011668611435, "grad_norm": 0.033203125, "learning_rate": 2.5600484104695898e-05, "loss": 0.0002, "step": 16433 }, { "epoch": 7.670478413068845, "grad_norm": 0.0260009765625, "learning_rate": 2.559068030452042e-05, "loss": 0.0002, "step": 16434 }, { "epoch": 7.670945157526255, "grad_norm": 0.01318359375, "learning_rate": 2.5580878106460194e-05, "loss": 0.0002, "step": 16435 }, { "epoch": 7.671411901983664, "grad_norm": 0.009765625, "learning_rate": 2.5571077510726193e-05, "loss": 0.0001, "step": 16436 }, { "epoch": 7.6718786464410735, "grad_norm": 0.00872802734375, "learning_rate": 2.556127851752951e-05, "loss": 0.0002, "step": 16437 }, { "epoch": 7.672345390898483, "grad_norm": 0.023681640625, "learning_rate": 2.5551481127081057e-05, "loss": 0.0002, "step": 16438 }, { "epoch": 7.672812135355892, "grad_norm": 0.052490234375, "learning_rate": 2.5541685339591825e-05, "loss": 0.0004, "step": 16439 }, { "epoch": 7.673278879813302, "grad_norm": 0.0218505859375, "learning_rate": 2.5531891155272748e-05, "loss": 0.0001, "step": 16440 }, { "epoch": 7.673745624270712, "grad_norm": 0.1904296875, "learning_rate": 2.5522098574334642e-05, "loss": 0.0051, "step": 16441 }, { "epoch": 7.674212368728122, "grad_norm": 0.0849609375, "learning_rate": 2.5512307596988383e-05, "loss": 0.0022, "step": 16442 }, { "epoch": 7.674679113185531, "grad_norm": 0.0115966796875, "learning_rate": 2.5502518223444826e-05, "loss": 0.0002, "step": 16443 }, { "epoch": 7.67514585764294, "grad_norm": 0.021484375, "learning_rate": 2.5492730453914683e-05, "loss": 0.0002, "step": 16444 }, { "epoch": 7.67561260210035, "grad_norm": 0.0086669921875, "learning_rate": 2.5482944288608713e-05, "loss": 0.0002, "step": 16445 }, { "epoch": 7.67607934655776, "grad_norm": 0.1904296875, "learning_rate": 2.547315972773766e-05, "loss": 0.0005, "step": 16446 }, { "epoch": 7.676546091015169, "grad_norm": 0.0517578125, "learning_rate": 2.546337677151214e-05, "loss": 0.0002, "step": 16447 }, { "epoch": 7.677012835472579, "grad_norm": 0.0634765625, "learning_rate": 2.545359542014282e-05, "loss": 0.0031, "step": 16448 }, { "epoch": 7.677479579929988, "grad_norm": 0.0361328125, "learning_rate": 2.5443815673840334e-05, "loss": 0.0023, "step": 16449 }, { "epoch": 7.677946324387398, "grad_norm": 0.010009765625, "learning_rate": 2.543403753281518e-05, "loss": 0.0001, "step": 16450 }, { "epoch": 7.678413068844807, "grad_norm": 0.0224609375, "learning_rate": 2.5424260997277927e-05, "loss": 0.0002, "step": 16451 }, { "epoch": 7.678879813302217, "grad_norm": 0.0634765625, "learning_rate": 2.5414486067439114e-05, "loss": 0.0033, "step": 16452 }, { "epoch": 7.679346557759627, "grad_norm": 0.01708984375, "learning_rate": 2.540471274350914e-05, "loss": 0.0002, "step": 16453 }, { "epoch": 7.6798133022170365, "grad_norm": 0.00994873046875, "learning_rate": 2.5394941025698494e-05, "loss": 0.0001, "step": 16454 }, { "epoch": 7.680280046674445, "grad_norm": 0.142578125, "learning_rate": 2.5385170914217517e-05, "loss": 0.0004, "step": 16455 }, { "epoch": 7.680746791131855, "grad_norm": 0.036865234375, "learning_rate": 2.5375402409276593e-05, "loss": 0.0002, "step": 16456 }, { "epoch": 7.681213535589265, "grad_norm": 0.031494140625, "learning_rate": 2.5365635511086093e-05, "loss": 0.0002, "step": 16457 }, { "epoch": 7.681680280046675, "grad_norm": 0.00927734375, "learning_rate": 2.5355870219856224e-05, "loss": 0.0002, "step": 16458 }, { "epoch": 7.682147024504084, "grad_norm": 0.01324462890625, "learning_rate": 2.53461065357973e-05, "loss": 0.0002, "step": 16459 }, { "epoch": 7.6826137689614935, "grad_norm": 0.0185546875, "learning_rate": 2.5336344459119556e-05, "loss": 0.0001, "step": 16460 }, { "epoch": 7.683080513418903, "grad_norm": 0.01220703125, "learning_rate": 2.5326583990033125e-05, "loss": 0.0002, "step": 16461 }, { "epoch": 7.683547257876313, "grad_norm": 0.0576171875, "learning_rate": 2.531682512874819e-05, "loss": 0.0024, "step": 16462 }, { "epoch": 7.684014002333722, "grad_norm": 0.01104736328125, "learning_rate": 2.530706787547491e-05, "loss": 0.0002, "step": 16463 }, { "epoch": 7.684480746791132, "grad_norm": 0.038330078125, "learning_rate": 2.5297312230423298e-05, "loss": 0.0002, "step": 16464 }, { "epoch": 7.684947491248542, "grad_norm": 0.01544189453125, "learning_rate": 2.528755819380344e-05, "loss": 0.0002, "step": 16465 }, { "epoch": 7.6854142357059505, "grad_norm": 0.009765625, "learning_rate": 2.527780576582538e-05, "loss": 0.0001, "step": 16466 }, { "epoch": 7.68588098016336, "grad_norm": 0.017822265625, "learning_rate": 2.5268054946699025e-05, "loss": 0.0002, "step": 16467 }, { "epoch": 7.68634772462077, "grad_norm": 0.1884765625, "learning_rate": 2.525830573663438e-05, "loss": 0.0003, "step": 16468 }, { "epoch": 7.68681446907818, "grad_norm": 0.010986328125, "learning_rate": 2.524855813584136e-05, "loss": 0.0002, "step": 16469 }, { "epoch": 7.68728121353559, "grad_norm": 0.01483154296875, "learning_rate": 2.52388121445298e-05, "loss": 0.0002, "step": 16470 }, { "epoch": 7.687747957992999, "grad_norm": 0.00885009765625, "learning_rate": 2.5229067762909553e-05, "loss": 0.0001, "step": 16471 }, { "epoch": 7.688214702450408, "grad_norm": 0.0184326171875, "learning_rate": 2.5219324991190474e-05, "loss": 0.0002, "step": 16472 }, { "epoch": 7.688681446907818, "grad_norm": 0.00982666015625, "learning_rate": 2.5209583829582273e-05, "loss": 0.0001, "step": 16473 }, { "epoch": 7.689148191365227, "grad_norm": 0.0390625, "learning_rate": 2.5199844278294727e-05, "loss": 0.0035, "step": 16474 }, { "epoch": 7.689614935822637, "grad_norm": 0.036865234375, "learning_rate": 2.5190106337537545e-05, "loss": 0.0024, "step": 16475 }, { "epoch": 7.690081680280047, "grad_norm": 0.0478515625, "learning_rate": 2.5180370007520336e-05, "loss": 0.0043, "step": 16476 }, { "epoch": 7.6905484247374565, "grad_norm": 0.008056640625, "learning_rate": 2.5170635288452772e-05, "loss": 0.0001, "step": 16477 }, { "epoch": 7.691015169194866, "grad_norm": 0.049560546875, "learning_rate": 2.5160902180544487e-05, "loss": 0.004, "step": 16478 }, { "epoch": 7.691481913652275, "grad_norm": 0.028564453125, "learning_rate": 2.515117068400499e-05, "loss": 0.0003, "step": 16479 }, { "epoch": 7.691948658109685, "grad_norm": 0.030029296875, "learning_rate": 2.5141440799043824e-05, "loss": 0.0016, "step": 16480 }, { "epoch": 7.692415402567095, "grad_norm": 0.019775390625, "learning_rate": 2.5131712525870534e-05, "loss": 0.0002, "step": 16481 }, { "epoch": 7.692882147024504, "grad_norm": 0.0242919921875, "learning_rate": 2.512198586469451e-05, "loss": 0.0002, "step": 16482 }, { "epoch": 7.6933488914819135, "grad_norm": 0.07421875, "learning_rate": 2.5112260815725208e-05, "loss": 0.0019, "step": 16483 }, { "epoch": 7.693815635939323, "grad_norm": 0.047119140625, "learning_rate": 2.5102537379172043e-05, "loss": 0.0025, "step": 16484 }, { "epoch": 7.694282380396733, "grad_norm": 0.01123046875, "learning_rate": 2.509281555524433e-05, "loss": 0.0001, "step": 16485 }, { "epoch": 7.694749124854142, "grad_norm": 0.01171875, "learning_rate": 2.508309534415143e-05, "loss": 0.0001, "step": 16486 }, { "epoch": 7.695215869311552, "grad_norm": 0.01226806640625, "learning_rate": 2.5073376746102582e-05, "loss": 0.0001, "step": 16487 }, { "epoch": 7.695682613768962, "grad_norm": 0.00799560546875, "learning_rate": 2.5063659761307067e-05, "loss": 0.0001, "step": 16488 }, { "epoch": 7.696149358226371, "grad_norm": 0.1201171875, "learning_rate": 2.5053944389974126e-05, "loss": 0.0025, "step": 16489 }, { "epoch": 7.69661610268378, "grad_norm": 0.01544189453125, "learning_rate": 2.5044230632312892e-05, "loss": 0.0001, "step": 16490 }, { "epoch": 7.69708284714119, "grad_norm": 0.00848388671875, "learning_rate": 2.5034518488532532e-05, "loss": 0.0001, "step": 16491 }, { "epoch": 7.6975495915986, "grad_norm": 0.035888671875, "learning_rate": 2.50248079588422e-05, "loss": 0.0004, "step": 16492 }, { "epoch": 7.69801633605601, "grad_norm": 0.03955078125, "learning_rate": 2.5015099043450907e-05, "loss": 0.0003, "step": 16493 }, { "epoch": 7.698483080513419, "grad_norm": 0.025390625, "learning_rate": 2.5005391742567728e-05, "loss": 0.0026, "step": 16494 }, { "epoch": 7.698949824970828, "grad_norm": 0.007080078125, "learning_rate": 2.49956860564017e-05, "loss": 0.0002, "step": 16495 }, { "epoch": 7.699416569428238, "grad_norm": 0.021240234375, "learning_rate": 2.4985981985161743e-05, "loss": 0.0002, "step": 16496 }, { "epoch": 7.699883313885648, "grad_norm": 0.010498046875, "learning_rate": 2.4976279529056822e-05, "loss": 0.0001, "step": 16497 }, { "epoch": 7.700350058343057, "grad_norm": 0.05810546875, "learning_rate": 2.4966578688295872e-05, "loss": 0.0034, "step": 16498 }, { "epoch": 7.700816802800467, "grad_norm": 0.0096435546875, "learning_rate": 2.4956879463087712e-05, "loss": 0.0002, "step": 16499 }, { "epoch": 7.7012835472578764, "grad_norm": 0.023193359375, "learning_rate": 2.494718185364119e-05, "loss": 0.0002, "step": 16500 }, { "epoch": 7.701750291715286, "grad_norm": 0.0262451171875, "learning_rate": 2.4937485860165145e-05, "loss": 0.0002, "step": 16501 }, { "epoch": 7.702217036172695, "grad_norm": 0.0062255859375, "learning_rate": 2.492779148286829e-05, "loss": 0.0001, "step": 16502 }, { "epoch": 7.702683780630105, "grad_norm": 0.01153564453125, "learning_rate": 2.491809872195937e-05, "loss": 0.0002, "step": 16503 }, { "epoch": 7.703150525087515, "grad_norm": 0.0869140625, "learning_rate": 2.4908407577647118e-05, "loss": 0.0101, "step": 16504 }, { "epoch": 7.7036172695449245, "grad_norm": 0.00567626953125, "learning_rate": 2.4898718050140148e-05, "loss": 0.0001, "step": 16505 }, { "epoch": 7.7040840140023334, "grad_norm": 0.0067138671875, "learning_rate": 2.488903013964713e-05, "loss": 0.0002, "step": 16506 }, { "epoch": 7.704550758459743, "grad_norm": 0.00787353515625, "learning_rate": 2.48793438463766e-05, "loss": 0.0001, "step": 16507 }, { "epoch": 7.705017502917153, "grad_norm": 0.0262451171875, "learning_rate": 2.486965917053714e-05, "loss": 0.0002, "step": 16508 }, { "epoch": 7.705484247374562, "grad_norm": 0.01263427734375, "learning_rate": 2.4859976112337315e-05, "loss": 0.0001, "step": 16509 }, { "epoch": 7.705950991831972, "grad_norm": 0.0101318359375, "learning_rate": 2.485029467198553e-05, "loss": 0.0002, "step": 16510 }, { "epoch": 7.7064177362893815, "grad_norm": 0.0089111328125, "learning_rate": 2.4840614849690292e-05, "loss": 0.0002, "step": 16511 }, { "epoch": 7.706884480746791, "grad_norm": 0.011474609375, "learning_rate": 2.483093664566004e-05, "loss": 0.0001, "step": 16512 }, { "epoch": 7.707351225204201, "grad_norm": 0.0108642578125, "learning_rate": 2.482126006010308e-05, "loss": 0.0002, "step": 16513 }, { "epoch": 7.70781796966161, "grad_norm": 0.009765625, "learning_rate": 2.4811585093227818e-05, "loss": 0.0002, "step": 16514 }, { "epoch": 7.70828471411902, "grad_norm": 0.11181640625, "learning_rate": 2.480191174524258e-05, "loss": 0.0004, "step": 16515 }, { "epoch": 7.70875145857643, "grad_norm": 0.01544189453125, "learning_rate": 2.479224001635557e-05, "loss": 0.0002, "step": 16516 }, { "epoch": 7.7092182030338385, "grad_norm": 0.00897216796875, "learning_rate": 2.478256990677509e-05, "loss": 0.0001, "step": 16517 }, { "epoch": 7.709684947491248, "grad_norm": 0.6640625, "learning_rate": 2.4772901416709372e-05, "loss": 0.0068, "step": 16518 }, { "epoch": 7.710151691948658, "grad_norm": 0.048828125, "learning_rate": 2.476323454636651e-05, "loss": 0.0016, "step": 16519 }, { "epoch": 7.710618436406068, "grad_norm": 0.09130859375, "learning_rate": 2.4753569295954682e-05, "loss": 0.0003, "step": 16520 }, { "epoch": 7.711085180863478, "grad_norm": 0.04248046875, "learning_rate": 2.4743905665682034e-05, "loss": 0.0043, "step": 16521 }, { "epoch": 7.711551925320887, "grad_norm": 0.0262451171875, "learning_rate": 2.4734243655756596e-05, "loss": 0.0002, "step": 16522 }, { "epoch": 7.712018669778296, "grad_norm": 0.00836181640625, "learning_rate": 2.472458326638636e-05, "loss": 0.0001, "step": 16523 }, { "epoch": 7.712485414235706, "grad_norm": 0.013671875, "learning_rate": 2.47149244977794e-05, "loss": 0.0002, "step": 16524 }, { "epoch": 7.712952158693115, "grad_norm": 0.01495361328125, "learning_rate": 2.4705267350143625e-05, "loss": 0.0002, "step": 16525 }, { "epoch": 7.713418903150525, "grad_norm": 0.0159912109375, "learning_rate": 2.469561182368697e-05, "loss": 0.0002, "step": 16526 }, { "epoch": 7.713885647607935, "grad_norm": 0.01153564453125, "learning_rate": 2.4685957918617385e-05, "loss": 0.0002, "step": 16527 }, { "epoch": 7.7143523920653445, "grad_norm": 0.01019287109375, "learning_rate": 2.467630563514266e-05, "loss": 0.0001, "step": 16528 }, { "epoch": 7.714819136522753, "grad_norm": 0.0068359375, "learning_rate": 2.466665497347064e-05, "loss": 0.0001, "step": 16529 }, { "epoch": 7.715285880980163, "grad_norm": 0.009033203125, "learning_rate": 2.4657005933809153e-05, "loss": 0.0002, "step": 16530 }, { "epoch": 7.715752625437573, "grad_norm": 0.037841796875, "learning_rate": 2.4647358516365903e-05, "loss": 0.0002, "step": 16531 }, { "epoch": 7.716219369894983, "grad_norm": 0.08447265625, "learning_rate": 2.4637712721348615e-05, "loss": 0.0004, "step": 16532 }, { "epoch": 7.716686114352392, "grad_norm": 0.040283203125, "learning_rate": 2.4628068548965033e-05, "loss": 0.0002, "step": 16533 }, { "epoch": 7.7171528588098015, "grad_norm": 0.028076171875, "learning_rate": 2.4618425999422722e-05, "loss": 0.0023, "step": 16534 }, { "epoch": 7.717619603267211, "grad_norm": 0.005767822265625, "learning_rate": 2.4608785072929364e-05, "loss": 0.0001, "step": 16535 }, { "epoch": 7.718086347724621, "grad_norm": 0.00634765625, "learning_rate": 2.459914576969249e-05, "loss": 0.0001, "step": 16536 }, { "epoch": 7.71855309218203, "grad_norm": 0.009521484375, "learning_rate": 2.4589508089919657e-05, "loss": 0.0001, "step": 16537 }, { "epoch": 7.71901983663944, "grad_norm": 0.006378173828125, "learning_rate": 2.457987203381842e-05, "loss": 0.0001, "step": 16538 }, { "epoch": 7.71948658109685, "grad_norm": 0.01708984375, "learning_rate": 2.457023760159619e-05, "loss": 0.0002, "step": 16539 }, { "epoch": 7.719953325554259, "grad_norm": 0.01953125, "learning_rate": 2.456060479346043e-05, "loss": 0.0002, "step": 16540 }, { "epoch": 7.720420070011668, "grad_norm": 0.015869140625, "learning_rate": 2.4550973609618576e-05, "loss": 0.0002, "step": 16541 }, { "epoch": 7.720886814469078, "grad_norm": 0.048583984375, "learning_rate": 2.454134405027796e-05, "loss": 0.0003, "step": 16542 }, { "epoch": 7.721353558926488, "grad_norm": 0.01123046875, "learning_rate": 2.4531716115645908e-05, "loss": 0.0002, "step": 16543 }, { "epoch": 7.721820303383898, "grad_norm": 0.005950927734375, "learning_rate": 2.4522089805929783e-05, "loss": 0.0001, "step": 16544 }, { "epoch": 7.722287047841307, "grad_norm": 0.1083984375, "learning_rate": 2.451246512133678e-05, "loss": 0.0024, "step": 16545 }, { "epoch": 7.722753792298716, "grad_norm": 0.0106201171875, "learning_rate": 2.450284206207416e-05, "loss": 0.0002, "step": 16546 }, { "epoch": 7.723220536756126, "grad_norm": 0.028076171875, "learning_rate": 2.4493220628349135e-05, "loss": 0.0001, "step": 16547 }, { "epoch": 7.723687281213536, "grad_norm": 0.044189453125, "learning_rate": 2.4483600820368825e-05, "loss": 0.0018, "step": 16548 }, { "epoch": 7.724154025670945, "grad_norm": 0.0257568359375, "learning_rate": 2.4473982638340377e-05, "loss": 0.0002, "step": 16549 }, { "epoch": 7.724620770128355, "grad_norm": 0.0150146484375, "learning_rate": 2.44643660824709e-05, "loss": 0.0001, "step": 16550 }, { "epoch": 7.7250875145857645, "grad_norm": 0.04638671875, "learning_rate": 2.4454751152967402e-05, "loss": 0.0002, "step": 16551 }, { "epoch": 7.725554259043173, "grad_norm": 0.458984375, "learning_rate": 2.4445137850036938e-05, "loss": 0.0019, "step": 16552 }, { "epoch": 7.726021003500583, "grad_norm": 0.04443359375, "learning_rate": 2.4435526173886514e-05, "loss": 0.0003, "step": 16553 }, { "epoch": 7.726487747957993, "grad_norm": 0.007080078125, "learning_rate": 2.4425916124723026e-05, "loss": 0.0001, "step": 16554 }, { "epoch": 7.726954492415403, "grad_norm": 0.0081787109375, "learning_rate": 2.441630770275344e-05, "loss": 0.0001, "step": 16555 }, { "epoch": 7.727421236872813, "grad_norm": 0.056884765625, "learning_rate": 2.4406700908184577e-05, "loss": 0.002, "step": 16556 }, { "epoch": 7.7278879813302215, "grad_norm": 0.08984375, "learning_rate": 2.439709574122332e-05, "loss": 0.0029, "step": 16557 }, { "epoch": 7.728354725787631, "grad_norm": 0.056396484375, "learning_rate": 2.43874922020765e-05, "loss": 0.0003, "step": 16558 }, { "epoch": 7.728821470245041, "grad_norm": 0.01141357421875, "learning_rate": 2.4377890290950832e-05, "loss": 0.0002, "step": 16559 }, { "epoch": 7.72928821470245, "grad_norm": 0.0380859375, "learning_rate": 2.4368290008053106e-05, "loss": 0.0003, "step": 16560 }, { "epoch": 7.72975495915986, "grad_norm": 0.014404296875, "learning_rate": 2.4358691353590034e-05, "loss": 0.0002, "step": 16561 }, { "epoch": 7.73022170361727, "grad_norm": 0.0390625, "learning_rate": 2.434909432776823e-05, "loss": 0.0018, "step": 16562 }, { "epoch": 7.730688448074679, "grad_norm": 0.01312255859375, "learning_rate": 2.433949893079436e-05, "loss": 0.0002, "step": 16563 }, { "epoch": 7.731155192532089, "grad_norm": 0.06591796875, "learning_rate": 2.4329905162875057e-05, "loss": 0.0003, "step": 16564 }, { "epoch": 7.731621936989498, "grad_norm": 0.0076904296875, "learning_rate": 2.432031302421682e-05, "loss": 0.0001, "step": 16565 }, { "epoch": 7.732088681446908, "grad_norm": 0.046142578125, "learning_rate": 2.4310722515026218e-05, "loss": 0.0003, "step": 16566 }, { "epoch": 7.732555425904318, "grad_norm": 0.01031494140625, "learning_rate": 2.4301133635509766e-05, "loss": 0.0001, "step": 16567 }, { "epoch": 7.733022170361727, "grad_norm": 0.1962890625, "learning_rate": 2.4291546385873864e-05, "loss": 0.0044, "step": 16568 }, { "epoch": 7.733488914819136, "grad_norm": 0.04345703125, "learning_rate": 2.4281960766324974e-05, "loss": 0.0008, "step": 16569 }, { "epoch": 7.733955659276546, "grad_norm": 0.0172119140625, "learning_rate": 2.427237677706954e-05, "loss": 0.0002, "step": 16570 }, { "epoch": 7.734422403733956, "grad_norm": 0.00799560546875, "learning_rate": 2.4262794418313785e-05, "loss": 0.0001, "step": 16571 }, { "epoch": 7.734889148191365, "grad_norm": 0.013916015625, "learning_rate": 2.425321369026411e-05, "loss": 0.0002, "step": 16572 }, { "epoch": 7.735355892648775, "grad_norm": 0.006011962890625, "learning_rate": 2.4243634593126807e-05, "loss": 0.0001, "step": 16573 }, { "epoch": 7.7358226371061845, "grad_norm": 0.005706787109375, "learning_rate": 2.423405712710808e-05, "loss": 0.0001, "step": 16574 }, { "epoch": 7.736289381563594, "grad_norm": 0.0101318359375, "learning_rate": 2.4224481292414157e-05, "loss": 0.0002, "step": 16575 }, { "epoch": 7.736756126021003, "grad_norm": 0.01806640625, "learning_rate": 2.4214907089251272e-05, "loss": 0.0002, "step": 16576 }, { "epoch": 7.737222870478413, "grad_norm": 0.00860595703125, "learning_rate": 2.4205334517825484e-05, "loss": 0.0002, "step": 16577 }, { "epoch": 7.737689614935823, "grad_norm": 0.00738525390625, "learning_rate": 2.4195763578342934e-05, "loss": 0.0002, "step": 16578 }, { "epoch": 7.7381563593932325, "grad_norm": 0.018798828125, "learning_rate": 2.4186194271009742e-05, "loss": 0.0002, "step": 16579 }, { "epoch": 7.7386231038506414, "grad_norm": 0.0118408203125, "learning_rate": 2.417662659603187e-05, "loss": 0.0001, "step": 16580 }, { "epoch": 7.739089848308051, "grad_norm": 0.0091552734375, "learning_rate": 2.416706055361535e-05, "loss": 0.0001, "step": 16581 }, { "epoch": 7.739556592765461, "grad_norm": 0.1767578125, "learning_rate": 2.4157496143966185e-05, "loss": 0.0009, "step": 16582 }, { "epoch": 7.740023337222871, "grad_norm": 0.015380859375, "learning_rate": 2.4147933367290254e-05, "loss": 0.0002, "step": 16583 }, { "epoch": 7.74049008168028, "grad_norm": 0.0380859375, "learning_rate": 2.4138372223793504e-05, "loss": 0.0003, "step": 16584 }, { "epoch": 7.7409568261376895, "grad_norm": 0.044189453125, "learning_rate": 2.4128812713681736e-05, "loss": 0.0016, "step": 16585 }, { "epoch": 7.741423570595099, "grad_norm": 0.060791015625, "learning_rate": 2.411925483716081e-05, "loss": 0.0024, "step": 16586 }, { "epoch": 7.741890315052509, "grad_norm": 0.0062255859375, "learning_rate": 2.4109698594436557e-05, "loss": 0.0001, "step": 16587 }, { "epoch": 7.742357059509918, "grad_norm": 0.01129150390625, "learning_rate": 2.4100143985714664e-05, "loss": 0.0001, "step": 16588 }, { "epoch": 7.742823803967328, "grad_norm": 0.02392578125, "learning_rate": 2.409059101120088e-05, "loss": 0.0002, "step": 16589 }, { "epoch": 7.743290548424738, "grad_norm": 0.05712890625, "learning_rate": 2.4081039671100913e-05, "loss": 0.0002, "step": 16590 }, { "epoch": 7.743757292882147, "grad_norm": 0.05029296875, "learning_rate": 2.4071489965620387e-05, "loss": 0.0002, "step": 16591 }, { "epoch": 7.744224037339556, "grad_norm": 0.036376953125, "learning_rate": 2.406194189496491e-05, "loss": 0.0027, "step": 16592 }, { "epoch": 7.744690781796966, "grad_norm": 0.0174560546875, "learning_rate": 2.4052395459340116e-05, "loss": 0.0001, "step": 16593 }, { "epoch": 7.745157526254376, "grad_norm": 0.02880859375, "learning_rate": 2.4042850658951477e-05, "loss": 0.0002, "step": 16594 }, { "epoch": 7.745624270711785, "grad_norm": 0.016357421875, "learning_rate": 2.403330749400454e-05, "loss": 0.0001, "step": 16595 }, { "epoch": 7.746091015169195, "grad_norm": 0.01025390625, "learning_rate": 2.4023765964704814e-05, "loss": 0.0001, "step": 16596 }, { "epoch": 7.746557759626604, "grad_norm": 0.0208740234375, "learning_rate": 2.4014226071257672e-05, "loss": 0.0002, "step": 16597 }, { "epoch": 7.747024504084014, "grad_norm": 0.053955078125, "learning_rate": 2.4004687813868553e-05, "loss": 0.003, "step": 16598 }, { "epoch": 7.747491248541424, "grad_norm": 0.016357421875, "learning_rate": 2.3995151192742848e-05, "loss": 0.0002, "step": 16599 }, { "epoch": 7.747957992998833, "grad_norm": 0.1142578125, "learning_rate": 2.3985616208085848e-05, "loss": 0.0028, "step": 16600 }, { "epoch": 7.748424737456243, "grad_norm": 0.0101318359375, "learning_rate": 2.3976082860102855e-05, "loss": 0.0002, "step": 16601 }, { "epoch": 7.7488914819136525, "grad_norm": 0.083984375, "learning_rate": 2.3966551148999195e-05, "loss": 0.0035, "step": 16602 }, { "epoch": 7.749358226371061, "grad_norm": 0.0111083984375, "learning_rate": 2.3957021074980002e-05, "loss": 0.0001, "step": 16603 }, { "epoch": 7.749824970828471, "grad_norm": 0.00946044921875, "learning_rate": 2.3947492638250556e-05, "loss": 0.0001, "step": 16604 }, { "epoch": 7.750291715285881, "grad_norm": 0.006927490234375, "learning_rate": 2.393796583901594e-05, "loss": 0.0001, "step": 16605 }, { "epoch": 7.750758459743291, "grad_norm": 0.01043701171875, "learning_rate": 2.3928440677481324e-05, "loss": 0.0002, "step": 16606 }, { "epoch": 7.751225204200701, "grad_norm": 0.006072998046875, "learning_rate": 2.3918917153851794e-05, "loss": 0.0001, "step": 16607 }, { "epoch": 7.7516919486581095, "grad_norm": 0.03466796875, "learning_rate": 2.390939526833238e-05, "loss": 0.0002, "step": 16608 }, { "epoch": 7.752158693115519, "grad_norm": 0.0169677734375, "learning_rate": 2.3899875021128105e-05, "loss": 0.0002, "step": 16609 }, { "epoch": 7.752625437572929, "grad_norm": 0.029541015625, "learning_rate": 2.389035641244397e-05, "loss": 0.0002, "step": 16610 }, { "epoch": 7.753092182030338, "grad_norm": 0.012451171875, "learning_rate": 2.3880839442484892e-05, "loss": 0.0002, "step": 16611 }, { "epoch": 7.753558926487748, "grad_norm": 0.00787353515625, "learning_rate": 2.3871324111455796e-05, "loss": 0.0002, "step": 16612 }, { "epoch": 7.754025670945158, "grad_norm": 0.00994873046875, "learning_rate": 2.3861810419561582e-05, "loss": 0.0002, "step": 16613 }, { "epoch": 7.754492415402567, "grad_norm": 0.01495361328125, "learning_rate": 2.3852298367007043e-05, "loss": 0.0002, "step": 16614 }, { "epoch": 7.754959159859976, "grad_norm": 0.0206298828125, "learning_rate": 2.3842787953997002e-05, "loss": 0.0002, "step": 16615 }, { "epoch": 7.755425904317386, "grad_norm": 0.04638671875, "learning_rate": 2.383327918073627e-05, "loss": 0.0002, "step": 16616 }, { "epoch": 7.755425904317386, "eval_loss": 2.4245951175689697, "eval_runtime": 84.8559, "eval_samples_per_second": 21.26, "eval_steps_per_second": 2.663, "step": 16616 }, { "epoch": 7.755892648774796, "grad_norm": 0.0247802734375, "learning_rate": 2.3823772047429505e-05, "loss": 0.0002, "step": 16617 }, { "epoch": 7.756359393232206, "grad_norm": 0.03271484375, "learning_rate": 2.3814266554281485e-05, "loss": 0.0002, "step": 16618 }, { "epoch": 7.756826137689615, "grad_norm": 0.00616455078125, "learning_rate": 2.380476270149683e-05, "loss": 0.0001, "step": 16619 }, { "epoch": 7.757292882147024, "grad_norm": 0.010498046875, "learning_rate": 2.3795260489280145e-05, "loss": 0.0001, "step": 16620 }, { "epoch": 7.757759626604434, "grad_norm": 0.05615234375, "learning_rate": 2.378575991783606e-05, "loss": 0.0021, "step": 16621 }, { "epoch": 7.758226371061844, "grad_norm": 0.0274658203125, "learning_rate": 2.3776260987369148e-05, "loss": 0.0002, "step": 16622 }, { "epoch": 7.758693115519253, "grad_norm": 0.0118408203125, "learning_rate": 2.3766763698083884e-05, "loss": 0.0002, "step": 16623 }, { "epoch": 7.759159859976663, "grad_norm": 0.018798828125, "learning_rate": 2.3757268050184777e-05, "loss": 0.0003, "step": 16624 }, { "epoch": 7.7596266044340725, "grad_norm": 0.01385498046875, "learning_rate": 2.3747774043876316e-05, "loss": 0.0002, "step": 16625 }, { "epoch": 7.760093348891482, "grad_norm": 0.00665283203125, "learning_rate": 2.373828167936284e-05, "loss": 0.0001, "step": 16626 }, { "epoch": 7.760560093348891, "grad_norm": 0.0126953125, "learning_rate": 2.372879095684879e-05, "loss": 0.0001, "step": 16627 }, { "epoch": 7.761026837806301, "grad_norm": 0.0281982421875, "learning_rate": 2.3719301876538514e-05, "loss": 0.0002, "step": 16628 }, { "epoch": 7.761493582263711, "grad_norm": 0.07080078125, "learning_rate": 2.370981443863628e-05, "loss": 0.0004, "step": 16629 }, { "epoch": 7.761960326721121, "grad_norm": 0.00933837890625, "learning_rate": 2.3700328643346383e-05, "loss": 0.0001, "step": 16630 }, { "epoch": 7.7624270711785295, "grad_norm": 0.1416015625, "learning_rate": 2.3690844490873086e-05, "loss": 0.0005, "step": 16631 }, { "epoch": 7.762893815635939, "grad_norm": 0.0054931640625, "learning_rate": 2.368136198142056e-05, "loss": 0.0001, "step": 16632 }, { "epoch": 7.763360560093349, "grad_norm": 0.123046875, "learning_rate": 2.3671881115193005e-05, "loss": 0.0003, "step": 16633 }, { "epoch": 7.763827304550759, "grad_norm": 0.05078125, "learning_rate": 2.3662401892394525e-05, "loss": 0.0038, "step": 16634 }, { "epoch": 7.764294049008168, "grad_norm": 0.0146484375, "learning_rate": 2.365292431322922e-05, "loss": 0.0002, "step": 16635 }, { "epoch": 7.764760793465578, "grad_norm": 0.02587890625, "learning_rate": 2.364344837790119e-05, "loss": 0.0002, "step": 16636 }, { "epoch": 7.765227537922987, "grad_norm": 0.0118408203125, "learning_rate": 2.3633974086614408e-05, "loss": 0.0002, "step": 16637 }, { "epoch": 7.765694282380396, "grad_norm": 0.013671875, "learning_rate": 2.3624501439572888e-05, "loss": 0.0001, "step": 16638 }, { "epoch": 7.766161026837806, "grad_norm": 0.05615234375, "learning_rate": 2.3615030436980624e-05, "loss": 0.0018, "step": 16639 }, { "epoch": 7.766627771295216, "grad_norm": 0.0177001953125, "learning_rate": 2.3605561079041482e-05, "loss": 0.0002, "step": 16640 }, { "epoch": 7.767094515752626, "grad_norm": 0.01202392578125, "learning_rate": 2.3596093365959372e-05, "loss": 0.0002, "step": 16641 }, { "epoch": 7.7675612602100355, "grad_norm": 0.0157470703125, "learning_rate": 2.358662729793817e-05, "loss": 0.0002, "step": 16642 }, { "epoch": 7.768028004667444, "grad_norm": 0.017333984375, "learning_rate": 2.3577162875181625e-05, "loss": 0.0002, "step": 16643 }, { "epoch": 7.768494749124854, "grad_norm": 0.015869140625, "learning_rate": 2.3567700097893564e-05, "loss": 0.0002, "step": 16644 }, { "epoch": 7.768961493582264, "grad_norm": 0.00830078125, "learning_rate": 2.3558238966277756e-05, "loss": 0.0002, "step": 16645 }, { "epoch": 7.769428238039673, "grad_norm": 0.0162353515625, "learning_rate": 2.354877948053784e-05, "loss": 0.0002, "step": 16646 }, { "epoch": 7.769894982497083, "grad_norm": 0.03076171875, "learning_rate": 2.3539321640877533e-05, "loss": 0.0002, "step": 16647 }, { "epoch": 7.7703617269544925, "grad_norm": 0.01007080078125, "learning_rate": 2.3529865447500487e-05, "loss": 0.0001, "step": 16648 }, { "epoch": 7.770828471411902, "grad_norm": 0.0115966796875, "learning_rate": 2.3520410900610268e-05, "loss": 0.0001, "step": 16649 }, { "epoch": 7.771295215869312, "grad_norm": 0.01397705078125, "learning_rate": 2.3510958000410443e-05, "loss": 0.0002, "step": 16650 }, { "epoch": 7.771761960326721, "grad_norm": 0.016845703125, "learning_rate": 2.3501506747104596e-05, "loss": 0.0001, "step": 16651 }, { "epoch": 7.772228704784131, "grad_norm": 0.02294921875, "learning_rate": 2.3492057140896152e-05, "loss": 0.0002, "step": 16652 }, { "epoch": 7.7726954492415405, "grad_norm": 0.19921875, "learning_rate": 2.3482609181988636e-05, "loss": 0.0003, "step": 16653 }, { "epoch": 7.7731621936989495, "grad_norm": 0.02587890625, "learning_rate": 2.3473162870585407e-05, "loss": 0.0002, "step": 16654 }, { "epoch": 7.773628938156359, "grad_norm": 0.03515625, "learning_rate": 2.3463718206889894e-05, "loss": 0.0024, "step": 16655 }, { "epoch": 7.774095682613769, "grad_norm": 0.046142578125, "learning_rate": 2.3454275191105478e-05, "loss": 0.0002, "step": 16656 }, { "epoch": 7.774562427071179, "grad_norm": 0.01177978515625, "learning_rate": 2.344483382343542e-05, "loss": 0.0002, "step": 16657 }, { "epoch": 7.775029171528588, "grad_norm": 0.0228271484375, "learning_rate": 2.343539410408302e-05, "loss": 0.0002, "step": 16658 }, { "epoch": 7.7754959159859975, "grad_norm": 0.00994873046875, "learning_rate": 2.342595603325156e-05, "loss": 0.0002, "step": 16659 }, { "epoch": 7.775962660443407, "grad_norm": 0.01422119140625, "learning_rate": 2.3416519611144216e-05, "loss": 0.0002, "step": 16660 }, { "epoch": 7.776429404900817, "grad_norm": 0.007354736328125, "learning_rate": 2.3407084837964166e-05, "loss": 0.0001, "step": 16661 }, { "epoch": 7.776896149358226, "grad_norm": 0.007537841796875, "learning_rate": 2.339765171391458e-05, "loss": 0.0001, "step": 16662 }, { "epoch": 7.777362893815636, "grad_norm": 0.00897216796875, "learning_rate": 2.3388220239198532e-05, "loss": 0.0002, "step": 16663 }, { "epoch": 7.777829638273046, "grad_norm": 0.06396484375, "learning_rate": 2.3378790414019093e-05, "loss": 0.0033, "step": 16664 }, { "epoch": 7.778296382730455, "grad_norm": 0.01007080078125, "learning_rate": 2.3369362238579372e-05, "loss": 0.0001, "step": 16665 }, { "epoch": 7.778763127187864, "grad_norm": 0.00701904296875, "learning_rate": 2.3359935713082248e-05, "loss": 0.0001, "step": 16666 }, { "epoch": 7.779229871645274, "grad_norm": 0.00958251953125, "learning_rate": 2.3350510837730733e-05, "loss": 0.0002, "step": 16667 }, { "epoch": 7.779696616102684, "grad_norm": 0.00982666015625, "learning_rate": 2.334108761272781e-05, "loss": 0.0002, "step": 16668 }, { "epoch": 7.780163360560094, "grad_norm": 0.034423828125, "learning_rate": 2.333166603827628e-05, "loss": 0.0002, "step": 16669 }, { "epoch": 7.780630105017503, "grad_norm": 0.011962890625, "learning_rate": 2.332224611457905e-05, "loss": 0.0001, "step": 16670 }, { "epoch": 7.781096849474912, "grad_norm": 0.1298828125, "learning_rate": 2.331282784183897e-05, "loss": 0.0037, "step": 16671 }, { "epoch": 7.781563593932322, "grad_norm": 0.0283203125, "learning_rate": 2.3303411220258752e-05, "loss": 0.0002, "step": 16672 }, { "epoch": 7.782030338389731, "grad_norm": 0.01263427734375, "learning_rate": 2.3293996250041195e-05, "loss": 0.0002, "step": 16673 }, { "epoch": 7.782497082847141, "grad_norm": 0.00946044921875, "learning_rate": 2.328458293138904e-05, "loss": 0.0001, "step": 16674 }, { "epoch": 7.782963827304551, "grad_norm": 0.02001953125, "learning_rate": 2.327517126450489e-05, "loss": 0.0002, "step": 16675 }, { "epoch": 7.7834305717619605, "grad_norm": 0.0098876953125, "learning_rate": 2.3265761249591445e-05, "loss": 0.0001, "step": 16676 }, { "epoch": 7.78389731621937, "grad_norm": 0.056640625, "learning_rate": 2.3256352886851317e-05, "loss": 0.0002, "step": 16677 }, { "epoch": 7.784364060676779, "grad_norm": 0.04345703125, "learning_rate": 2.3246946176487038e-05, "loss": 0.0022, "step": 16678 }, { "epoch": 7.784830805134189, "grad_norm": 0.032470703125, "learning_rate": 2.3237541118701155e-05, "loss": 0.0003, "step": 16679 }, { "epoch": 7.785297549591599, "grad_norm": 0.0279541015625, "learning_rate": 2.322813771369622e-05, "loss": 0.0002, "step": 16680 }, { "epoch": 7.785764294049008, "grad_norm": 0.0194091796875, "learning_rate": 2.3218735961674633e-05, "loss": 0.0002, "step": 16681 }, { "epoch": 7.7862310385064175, "grad_norm": 0.0068359375, "learning_rate": 2.3209335862838844e-05, "loss": 0.0001, "step": 16682 }, { "epoch": 7.786697782963827, "grad_norm": 0.00518798828125, "learning_rate": 2.3199937417391283e-05, "loss": 0.0001, "step": 16683 }, { "epoch": 7.787164527421237, "grad_norm": 0.02880859375, "learning_rate": 2.3190540625534252e-05, "loss": 0.0001, "step": 16684 }, { "epoch": 7.787631271878647, "grad_norm": 0.0086669921875, "learning_rate": 2.3181145487470135e-05, "loss": 0.0001, "step": 16685 }, { "epoch": 7.788098016336056, "grad_norm": 0.026611328125, "learning_rate": 2.317175200340116e-05, "loss": 0.0002, "step": 16686 }, { "epoch": 7.788564760793466, "grad_norm": 0.01171875, "learning_rate": 2.3162360173529608e-05, "loss": 0.0002, "step": 16687 }, { "epoch": 7.789031505250875, "grad_norm": 0.0269775390625, "learning_rate": 2.3152969998057718e-05, "loss": 0.0002, "step": 16688 }, { "epoch": 7.789498249708284, "grad_norm": 0.043212890625, "learning_rate": 2.3143581477187627e-05, "loss": 0.0003, "step": 16689 }, { "epoch": 7.789964994165694, "grad_norm": 0.07275390625, "learning_rate": 2.3134194611121484e-05, "loss": 0.0002, "step": 16690 }, { "epoch": 7.790431738623104, "grad_norm": 0.00872802734375, "learning_rate": 2.3124809400061454e-05, "loss": 0.0001, "step": 16691 }, { "epoch": 7.790898483080514, "grad_norm": 0.0123291015625, "learning_rate": 2.311542584420955e-05, "loss": 0.0002, "step": 16692 }, { "epoch": 7.791365227537923, "grad_norm": 0.0439453125, "learning_rate": 2.310604394376782e-05, "loss": 0.0003, "step": 16693 }, { "epoch": 7.791831971995332, "grad_norm": 0.0625, "learning_rate": 2.3096663698938314e-05, "loss": 0.0017, "step": 16694 }, { "epoch": 7.792298716452742, "grad_norm": 0.01007080078125, "learning_rate": 2.308728510992294e-05, "loss": 0.0002, "step": 16695 }, { "epoch": 7.792765460910152, "grad_norm": 0.1455078125, "learning_rate": 2.3077908176923648e-05, "loss": 0.0003, "step": 16696 }, { "epoch": 7.793232205367561, "grad_norm": 0.059814453125, "learning_rate": 2.306853290014237e-05, "loss": 0.0003, "step": 16697 }, { "epoch": 7.793698949824971, "grad_norm": 0.0201416015625, "learning_rate": 2.3059159279780907e-05, "loss": 0.0002, "step": 16698 }, { "epoch": 7.7941656942823805, "grad_norm": 0.012451171875, "learning_rate": 2.3049787316041103e-05, "loss": 0.0002, "step": 16699 }, { "epoch": 7.79463243873979, "grad_norm": 0.053955078125, "learning_rate": 2.304041700912479e-05, "loss": 0.0023, "step": 16700 }, { "epoch": 7.795099183197199, "grad_norm": 0.010986328125, "learning_rate": 2.3031048359233664e-05, "loss": 0.0002, "step": 16701 }, { "epoch": 7.795565927654609, "grad_norm": 0.0693359375, "learning_rate": 2.302168136656949e-05, "loss": 0.0024, "step": 16702 }, { "epoch": 7.796032672112019, "grad_norm": 0.01025390625, "learning_rate": 2.3012316031333904e-05, "loss": 0.0001, "step": 16703 }, { "epoch": 7.796499416569429, "grad_norm": 0.0126953125, "learning_rate": 2.3002952353728567e-05, "loss": 0.0002, "step": 16704 }, { "epoch": 7.7969661610268375, "grad_norm": 0.02392578125, "learning_rate": 2.299359033395512e-05, "loss": 0.0002, "step": 16705 }, { "epoch": 7.797432905484247, "grad_norm": 0.01019287109375, "learning_rate": 2.2984229972215088e-05, "loss": 0.0002, "step": 16706 }, { "epoch": 7.797899649941657, "grad_norm": 0.020751953125, "learning_rate": 2.2974871268710042e-05, "loss": 0.0002, "step": 16707 }, { "epoch": 7.798366394399067, "grad_norm": 0.0419921875, "learning_rate": 2.2965514223641493e-05, "loss": 0.0002, "step": 16708 }, { "epoch": 7.798833138856476, "grad_norm": 0.07275390625, "learning_rate": 2.2956158837210872e-05, "loss": 0.0007, "step": 16709 }, { "epoch": 7.799299883313886, "grad_norm": 0.039306640625, "learning_rate": 2.294680510961964e-05, "loss": 0.0002, "step": 16710 }, { "epoch": 7.799766627771295, "grad_norm": 0.0179443359375, "learning_rate": 2.2937453041069202e-05, "loss": 0.0002, "step": 16711 }, { "epoch": 7.800233372228705, "grad_norm": 0.00555419921875, "learning_rate": 2.2928102631760883e-05, "loss": 0.0001, "step": 16712 }, { "epoch": 7.800700116686114, "grad_norm": 0.006591796875, "learning_rate": 2.291875388189605e-05, "loss": 0.0001, "step": 16713 }, { "epoch": 7.801166861143524, "grad_norm": 0.0118408203125, "learning_rate": 2.290940679167597e-05, "loss": 0.0001, "step": 16714 }, { "epoch": 7.801633605600934, "grad_norm": 0.033447265625, "learning_rate": 2.290006136130187e-05, "loss": 0.0002, "step": 16715 }, { "epoch": 7.802100350058343, "grad_norm": 0.01007080078125, "learning_rate": 2.2890717590974996e-05, "loss": 0.0001, "step": 16716 }, { "epoch": 7.802567094515752, "grad_norm": 0.042724609375, "learning_rate": 2.288137548089655e-05, "loss": 0.0026, "step": 16717 }, { "epoch": 7.803033838973162, "grad_norm": 0.00958251953125, "learning_rate": 2.2872035031267625e-05, "loss": 0.0002, "step": 16718 }, { "epoch": 7.803500583430572, "grad_norm": 0.06640625, "learning_rate": 2.286269624228936e-05, "loss": 0.0003, "step": 16719 }, { "epoch": 7.803967327887982, "grad_norm": 0.021728515625, "learning_rate": 2.2853359114162865e-05, "loss": 0.0002, "step": 16720 }, { "epoch": 7.804434072345391, "grad_norm": 0.00921630859375, "learning_rate": 2.284402364708911e-05, "loss": 0.0001, "step": 16721 }, { "epoch": 7.8049008168028005, "grad_norm": 0.0269775390625, "learning_rate": 2.283468984126913e-05, "loss": 0.0002, "step": 16722 }, { "epoch": 7.80536756126021, "grad_norm": 0.017333984375, "learning_rate": 2.2825357696903925e-05, "loss": 0.0002, "step": 16723 }, { "epoch": 7.805834305717619, "grad_norm": 0.013427734375, "learning_rate": 2.2816027214194368e-05, "loss": 0.0002, "step": 16724 }, { "epoch": 7.806301050175029, "grad_norm": 0.044921875, "learning_rate": 2.2806698393341387e-05, "loss": 0.0002, "step": 16725 }, { "epoch": 7.806767794632439, "grad_norm": 0.0086669921875, "learning_rate": 2.2797371234545863e-05, "loss": 0.0002, "step": 16726 }, { "epoch": 7.8072345390898485, "grad_norm": 0.054931640625, "learning_rate": 2.2788045738008557e-05, "loss": 0.004, "step": 16727 }, { "epoch": 7.807701283547258, "grad_norm": 0.0107421875, "learning_rate": 2.27787219039303e-05, "loss": 0.0002, "step": 16728 }, { "epoch": 7.808168028004667, "grad_norm": 0.008056640625, "learning_rate": 2.2769399732511863e-05, "loss": 0.0001, "step": 16729 }, { "epoch": 7.808634772462077, "grad_norm": 0.0250244140625, "learning_rate": 2.2760079223953924e-05, "loss": 0.0002, "step": 16730 }, { "epoch": 7.809101516919487, "grad_norm": 0.005859375, "learning_rate": 2.275076037845717e-05, "loss": 0.0001, "step": 16731 }, { "epoch": 7.809568261376896, "grad_norm": 0.006683349609375, "learning_rate": 2.274144319622228e-05, "loss": 0.0001, "step": 16732 }, { "epoch": 7.8100350058343055, "grad_norm": 0.0390625, "learning_rate": 2.273212767744981e-05, "loss": 0.0002, "step": 16733 }, { "epoch": 7.810501750291715, "grad_norm": 0.015625, "learning_rate": 2.2722813822340393e-05, "loss": 0.0002, "step": 16734 }, { "epoch": 7.810968494749125, "grad_norm": 0.01348876953125, "learning_rate": 2.2713501631094503e-05, "loss": 0.0002, "step": 16735 }, { "epoch": 7.811435239206534, "grad_norm": 0.04150390625, "learning_rate": 2.2704191103912674e-05, "loss": 0.0037, "step": 16736 }, { "epoch": 7.811901983663944, "grad_norm": 0.010986328125, "learning_rate": 2.2694882240995408e-05, "loss": 0.0002, "step": 16737 }, { "epoch": 7.812368728121354, "grad_norm": 0.01324462890625, "learning_rate": 2.268557504254306e-05, "loss": 0.0002, "step": 16738 }, { "epoch": 7.812835472578763, "grad_norm": 0.1025390625, "learning_rate": 2.2676269508756066e-05, "loss": 0.0023, "step": 16739 }, { "epoch": 7.813302217036172, "grad_norm": 0.010986328125, "learning_rate": 2.2666965639834813e-05, "loss": 0.0001, "step": 16740 }, { "epoch": 7.813768961493582, "grad_norm": 0.007537841796875, "learning_rate": 2.2657663435979558e-05, "loss": 0.0001, "step": 16741 }, { "epoch": 7.814235705950992, "grad_norm": 0.0284423828125, "learning_rate": 2.264836289739063e-05, "loss": 0.0002, "step": 16742 }, { "epoch": 7.814702450408402, "grad_norm": 0.03759765625, "learning_rate": 2.2639064024268297e-05, "loss": 0.0019, "step": 16743 }, { "epoch": 7.815169194865811, "grad_norm": 0.0164794921875, "learning_rate": 2.2629766816812724e-05, "loss": 0.0002, "step": 16744 }, { "epoch": 7.81563593932322, "grad_norm": 0.041748046875, "learning_rate": 2.262047127522411e-05, "loss": 0.0002, "step": 16745 }, { "epoch": 7.81610268378063, "grad_norm": 0.0126953125, "learning_rate": 2.2611177399702632e-05, "loss": 0.0002, "step": 16746 }, { "epoch": 7.81656942823804, "grad_norm": 0.012939453125, "learning_rate": 2.2601885190448345e-05, "loss": 0.0002, "step": 16747 }, { "epoch": 7.817036172695449, "grad_norm": 0.0198974609375, "learning_rate": 2.2592594647661335e-05, "loss": 0.0002, "step": 16748 }, { "epoch": 7.817502917152859, "grad_norm": 0.03564453125, "learning_rate": 2.2583305771541686e-05, "loss": 0.0034, "step": 16749 }, { "epoch": 7.8179696616102685, "grad_norm": 0.00933837890625, "learning_rate": 2.2574018562289324e-05, "loss": 0.0001, "step": 16750 }, { "epoch": 7.818436406067678, "grad_norm": 0.041015625, "learning_rate": 2.2564733020104277e-05, "loss": 0.0002, "step": 16751 }, { "epoch": 7.818903150525087, "grad_norm": 0.01544189453125, "learning_rate": 2.2555449145186426e-05, "loss": 0.0002, "step": 16752 }, { "epoch": 7.819369894982497, "grad_norm": 0.039306640625, "learning_rate": 2.254616693773567e-05, "loss": 0.0031, "step": 16753 }, { "epoch": 7.819836639439907, "grad_norm": 0.05712890625, "learning_rate": 2.2536886397951907e-05, "loss": 0.0002, "step": 16754 }, { "epoch": 7.820303383897317, "grad_norm": 0.048828125, "learning_rate": 2.25276075260349e-05, "loss": 0.0002, "step": 16755 }, { "epoch": 7.8207701283547255, "grad_norm": 0.00927734375, "learning_rate": 2.2518330322184455e-05, "loss": 0.0001, "step": 16756 }, { "epoch": 7.821236872812135, "grad_norm": 0.125, "learning_rate": 2.250905478660036e-05, "loss": 0.0004, "step": 16757 }, { "epoch": 7.821703617269545, "grad_norm": 0.007354736328125, "learning_rate": 2.249978091948226e-05, "loss": 0.0002, "step": 16758 }, { "epoch": 7.822170361726954, "grad_norm": 0.015869140625, "learning_rate": 2.2490508721029867e-05, "loss": 0.0002, "step": 16759 }, { "epoch": 7.822637106184364, "grad_norm": 0.034423828125, "learning_rate": 2.2481238191442854e-05, "loss": 0.0019, "step": 16760 }, { "epoch": 7.823103850641774, "grad_norm": 0.0081787109375, "learning_rate": 2.2471969330920783e-05, "loss": 0.0002, "step": 16761 }, { "epoch": 7.823570595099183, "grad_norm": 0.008544921875, "learning_rate": 2.2462702139663207e-05, "loss": 0.0002, "step": 16762 }, { "epoch": 7.824037339556593, "grad_norm": 0.007049560546875, "learning_rate": 2.2453436617869704e-05, "loss": 0.0001, "step": 16763 }, { "epoch": 7.824504084014002, "grad_norm": 0.0126953125, "learning_rate": 2.2444172765739734e-05, "loss": 0.0001, "step": 16764 }, { "epoch": 7.824970828471412, "grad_norm": 0.041259765625, "learning_rate": 2.243491058347277e-05, "loss": 0.0002, "step": 16765 }, { "epoch": 7.825437572928822, "grad_norm": 0.049560546875, "learning_rate": 2.2425650071268255e-05, "loss": 0.0021, "step": 16766 }, { "epoch": 7.825904317386231, "grad_norm": 0.007354736328125, "learning_rate": 2.241639122932555e-05, "loss": 0.0002, "step": 16767 }, { "epoch": 7.82637106184364, "grad_norm": 0.007080078125, "learning_rate": 2.2407134057844026e-05, "loss": 0.0001, "step": 16768 }, { "epoch": 7.82683780630105, "grad_norm": 0.015380859375, "learning_rate": 2.2397878557023023e-05, "loss": 0.0001, "step": 16769 }, { "epoch": 7.82730455075846, "grad_norm": 0.0255126953125, "learning_rate": 2.238862472706176e-05, "loss": 0.002, "step": 16770 }, { "epoch": 7.82777129521587, "grad_norm": 0.0137939453125, "learning_rate": 2.2379372568159517e-05, "loss": 0.0001, "step": 16771 }, { "epoch": 7.828238039673279, "grad_norm": 0.013427734375, "learning_rate": 2.237012208051554e-05, "loss": 0.0002, "step": 16772 }, { "epoch": 7.8287047841306885, "grad_norm": 0.0184326171875, "learning_rate": 2.236087326432893e-05, "loss": 0.0002, "step": 16773 }, { "epoch": 7.829171528588098, "grad_norm": 0.02734375, "learning_rate": 2.2351626119798874e-05, "loss": 0.0002, "step": 16774 }, { "epoch": 7.829638273045507, "grad_norm": 0.00823974609375, "learning_rate": 2.234238064712447e-05, "loss": 0.0001, "step": 16775 }, { "epoch": 7.830105017502917, "grad_norm": 0.008544921875, "learning_rate": 2.2333136846504754e-05, "loss": 0.0002, "step": 16776 }, { "epoch": 7.830571761960327, "grad_norm": 0.01190185546875, "learning_rate": 2.2323894718138783e-05, "loss": 0.0002, "step": 16777 }, { "epoch": 7.831038506417737, "grad_norm": 0.025146484375, "learning_rate": 2.2314654262225563e-05, "loss": 0.0002, "step": 16778 }, { "epoch": 7.8315052508751455, "grad_norm": 0.0791015625, "learning_rate": 2.2305415478964e-05, "loss": 0.005, "step": 16779 }, { "epoch": 7.831971995332555, "grad_norm": 0.005615234375, "learning_rate": 2.2296178368553046e-05, "loss": 0.0001, "step": 16780 }, { "epoch": 7.832438739789965, "grad_norm": 0.4140625, "learning_rate": 2.2286942931191613e-05, "loss": 0.0011, "step": 16781 }, { "epoch": 7.832905484247375, "grad_norm": 0.05126953125, "learning_rate": 2.22777091670785e-05, "loss": 0.0002, "step": 16782 }, { "epoch": 7.833372228704784, "grad_norm": 0.03955078125, "learning_rate": 2.2268477076412562e-05, "loss": 0.0019, "step": 16783 }, { "epoch": 7.833838973162194, "grad_norm": 0.07568359375, "learning_rate": 2.225924665939254e-05, "loss": 0.0027, "step": 16784 }, { "epoch": 7.834305717619603, "grad_norm": 0.023193359375, "learning_rate": 2.225001791621718e-05, "loss": 0.0002, "step": 16785 }, { "epoch": 7.834772462077013, "grad_norm": 0.036865234375, "learning_rate": 2.2240790847085234e-05, "loss": 0.0027, "step": 16786 }, { "epoch": 7.835239206534422, "grad_norm": 0.07275390625, "learning_rate": 2.2231565452195312e-05, "loss": 0.0041, "step": 16787 }, { "epoch": 7.835705950991832, "grad_norm": 0.0220947265625, "learning_rate": 2.2222341731746065e-05, "loss": 0.0002, "step": 16788 }, { "epoch": 7.836172695449242, "grad_norm": 0.06591796875, "learning_rate": 2.2213119685936135e-05, "loss": 0.0015, "step": 16789 }, { "epoch": 7.8366394399066515, "grad_norm": 0.00946044921875, "learning_rate": 2.2203899314964004e-05, "loss": 0.0001, "step": 16790 }, { "epoch": 7.83710618436406, "grad_norm": 0.01806640625, "learning_rate": 2.219468061902824e-05, "loss": 0.0002, "step": 16791 }, { "epoch": 7.83757292882147, "grad_norm": 0.005706787109375, "learning_rate": 2.2185463598327362e-05, "loss": 0.0001, "step": 16792 }, { "epoch": 7.83803967327888, "grad_norm": 0.0076904296875, "learning_rate": 2.217624825305975e-05, "loss": 0.0001, "step": 16793 }, { "epoch": 7.83850641773629, "grad_norm": 0.0595703125, "learning_rate": 2.2167034583423873e-05, "loss": 0.003, "step": 16794 }, { "epoch": 7.838973162193699, "grad_norm": 0.03662109375, "learning_rate": 2.2157822589618127e-05, "loss": 0.0037, "step": 16795 }, { "epoch": 7.8394399066511085, "grad_norm": 0.02392578125, "learning_rate": 2.2148612271840784e-05, "loss": 0.0002, "step": 16796 }, { "epoch": 7.839906651108518, "grad_norm": 0.0115966796875, "learning_rate": 2.213940363029021e-05, "loss": 0.0002, "step": 16797 }, { "epoch": 7.840373395565928, "grad_norm": 0.007293701171875, "learning_rate": 2.2130196665164693e-05, "loss": 0.0001, "step": 16798 }, { "epoch": 7.840840140023337, "grad_norm": 0.00823974609375, "learning_rate": 2.2120991376662405e-05, "loss": 0.0001, "step": 16799 }, { "epoch": 7.841306884480747, "grad_norm": 0.0556640625, "learning_rate": 2.2111787764981586e-05, "loss": 0.0002, "step": 16800 }, { "epoch": 7.8417736289381565, "grad_norm": 0.00872802734375, "learning_rate": 2.210258583032042e-05, "loss": 0.0001, "step": 16801 }, { "epoch": 7.8422403733955655, "grad_norm": 0.0108642578125, "learning_rate": 2.2093385572876978e-05, "loss": 0.0002, "step": 16802 }, { "epoch": 7.842707117852975, "grad_norm": 0.007049560546875, "learning_rate": 2.208418699284942e-05, "loss": 0.0001, "step": 16803 }, { "epoch": 7.843173862310385, "grad_norm": 0.0216064453125, "learning_rate": 2.2074990090435733e-05, "loss": 0.0002, "step": 16804 }, { "epoch": 7.843640606767795, "grad_norm": 0.0211181640625, "learning_rate": 2.2065794865833967e-05, "loss": 0.0003, "step": 16805 }, { "epoch": 7.844107351225205, "grad_norm": 0.01287841796875, "learning_rate": 2.205660131924213e-05, "loss": 0.0002, "step": 16806 }, { "epoch": 7.8445740956826135, "grad_norm": 0.01153564453125, "learning_rate": 2.2047409450858124e-05, "loss": 0.0002, "step": 16807 }, { "epoch": 7.845040840140023, "grad_norm": 0.01177978515625, "learning_rate": 2.203821926087989e-05, "loss": 0.0001, "step": 16808 }, { "epoch": 7.845507584597433, "grad_norm": 0.0078125, "learning_rate": 2.2029030749505307e-05, "loss": 0.0002, "step": 16809 }, { "epoch": 7.845974329054842, "grad_norm": 0.017822265625, "learning_rate": 2.2019843916932213e-05, "loss": 0.0002, "step": 16810 }, { "epoch": 7.846441073512252, "grad_norm": 0.0050048828125, "learning_rate": 2.201065876335837e-05, "loss": 0.0001, "step": 16811 }, { "epoch": 7.846907817969662, "grad_norm": 0.01239013671875, "learning_rate": 2.2001475288981578e-05, "loss": 0.0002, "step": 16812 }, { "epoch": 7.847374562427071, "grad_norm": 0.0439453125, "learning_rate": 2.1992293493999583e-05, "loss": 0.0026, "step": 16813 }, { "epoch": 7.847841306884481, "grad_norm": 0.0791015625, "learning_rate": 2.198311337861004e-05, "loss": 0.0045, "step": 16814 }, { "epoch": 7.84830805134189, "grad_norm": 0.05224609375, "learning_rate": 2.1973934943010653e-05, "loss": 0.0003, "step": 16815 }, { "epoch": 7.8487747957993, "grad_norm": 0.036865234375, "learning_rate": 2.1964758187398994e-05, "loss": 0.0015, "step": 16816 }, { "epoch": 7.84924154025671, "grad_norm": 0.01220703125, "learning_rate": 2.1955583111972667e-05, "loss": 0.0002, "step": 16817 }, { "epoch": 7.849708284714119, "grad_norm": 0.0118408203125, "learning_rate": 2.194640971692926e-05, "loss": 0.0001, "step": 16818 }, { "epoch": 7.850175029171528, "grad_norm": 0.015869140625, "learning_rate": 2.193723800246622e-05, "loss": 0.0002, "step": 16819 }, { "epoch": 7.850641773628938, "grad_norm": 0.10009765625, "learning_rate": 2.1928067968781063e-05, "loss": 0.0003, "step": 16820 }, { "epoch": 7.851108518086348, "grad_norm": 0.0101318359375, "learning_rate": 2.1918899616071252e-05, "loss": 0.0001, "step": 16821 }, { "epoch": 7.851575262543757, "grad_norm": 0.01226806640625, "learning_rate": 2.1909732944534133e-05, "loss": 0.0002, "step": 16822 }, { "epoch": 7.852042007001167, "grad_norm": 0.0478515625, "learning_rate": 2.1900567954367114e-05, "loss": 0.0002, "step": 16823 }, { "epoch": 7.8525087514585765, "grad_norm": 0.007659912109375, "learning_rate": 2.1891404645767544e-05, "loss": 0.0002, "step": 16824 }, { "epoch": 7.852975495915986, "grad_norm": 0.0277099609375, "learning_rate": 2.1882243018932656e-05, "loss": 0.0002, "step": 16825 }, { "epoch": 7.853442240373395, "grad_norm": 0.01080322265625, "learning_rate": 2.1873083074059762e-05, "loss": 0.0001, "step": 16826 }, { "epoch": 7.853908984830805, "grad_norm": 0.06494140625, "learning_rate": 2.1863924811346102e-05, "loss": 0.0024, "step": 16827 }, { "epoch": 7.854375729288215, "grad_norm": 0.0126953125, "learning_rate": 2.1854768230988797e-05, "loss": 0.0001, "step": 16828 }, { "epoch": 7.854842473745625, "grad_norm": 0.013427734375, "learning_rate": 2.1845613333185043e-05, "loss": 0.0002, "step": 16829 }, { "epoch": 7.8553092182030335, "grad_norm": 0.0191650390625, "learning_rate": 2.1836460118131972e-05, "loss": 0.0002, "step": 16830 }, { "epoch": 7.855775962660443, "grad_norm": 0.0184326171875, "learning_rate": 2.1827308586026608e-05, "loss": 0.0002, "step": 16831 }, { "epoch": 7.856242707117853, "grad_norm": 0.0272216796875, "learning_rate": 2.1818158737066064e-05, "loss": 0.0002, "step": 16832 }, { "epoch": 7.856709451575263, "grad_norm": 0.033447265625, "learning_rate": 2.1809010571447262e-05, "loss": 0.0003, "step": 16833 }, { "epoch": 7.857176196032672, "grad_norm": 0.0400390625, "learning_rate": 2.1799864089367227e-05, "loss": 0.0026, "step": 16834 }, { "epoch": 7.857642940490082, "grad_norm": 0.0595703125, "learning_rate": 2.179071929102291e-05, "loss": 0.0002, "step": 16835 }, { "epoch": 7.858109684947491, "grad_norm": 0.01373291015625, "learning_rate": 2.1781576176611153e-05, "loss": 0.0002, "step": 16836 }, { "epoch": 7.858576429404901, "grad_norm": 0.022705078125, "learning_rate": 2.1772434746328853e-05, "loss": 0.0002, "step": 16837 }, { "epoch": 7.85904317386231, "grad_norm": 0.025390625, "learning_rate": 2.1763295000372842e-05, "loss": 0.0002, "step": 16838 }, { "epoch": 7.85950991831972, "grad_norm": 0.055908203125, "learning_rate": 2.1754156938939873e-05, "loss": 0.0026, "step": 16839 }, { "epoch": 7.85997666277713, "grad_norm": 0.0087890625, "learning_rate": 2.1745020562226715e-05, "loss": 0.0002, "step": 16840 }, { "epoch": 7.8604434072345395, "grad_norm": 0.0108642578125, "learning_rate": 2.1735885870430127e-05, "loss": 0.0002, "step": 16841 }, { "epoch": 7.860910151691948, "grad_norm": 0.0277099609375, "learning_rate": 2.1726752863746725e-05, "loss": 0.0002, "step": 16842 }, { "epoch": 7.861376896149358, "grad_norm": 0.0264892578125, "learning_rate": 2.1717621542373168e-05, "loss": 0.0002, "step": 16843 }, { "epoch": 7.861843640606768, "grad_norm": 0.03466796875, "learning_rate": 2.170849190650611e-05, "loss": 0.0002, "step": 16844 }, { "epoch": 7.862310385064177, "grad_norm": 0.006988525390625, "learning_rate": 2.1699363956342058e-05, "loss": 0.0001, "step": 16845 }, { "epoch": 7.862777129521587, "grad_norm": 0.0164794921875, "learning_rate": 2.1690237692077576e-05, "loss": 0.0001, "step": 16846 }, { "epoch": 7.8632438739789965, "grad_norm": 0.0174560546875, "learning_rate": 2.168111311390919e-05, "loss": 0.0002, "step": 16847 }, { "epoch": 7.863710618436406, "grad_norm": 0.042724609375, "learning_rate": 2.167199022203331e-05, "loss": 0.0022, "step": 16848 }, { "epoch": 7.864177362893816, "grad_norm": 0.01007080078125, "learning_rate": 2.166286901664638e-05, "loss": 0.0002, "step": 16849 }, { "epoch": 7.864644107351225, "grad_norm": 0.00982666015625, "learning_rate": 2.165374949794483e-05, "loss": 0.0001, "step": 16850 }, { "epoch": 7.865110851808635, "grad_norm": 0.0517578125, "learning_rate": 2.164463166612496e-05, "loss": 0.0027, "step": 16851 }, { "epoch": 7.865577596266045, "grad_norm": 0.0072021484375, "learning_rate": 2.1635515521383122e-05, "loss": 0.0001, "step": 16852 }, { "epoch": 7.8660443407234535, "grad_norm": 0.010009765625, "learning_rate": 2.1626401063915557e-05, "loss": 0.0001, "step": 16853 }, { "epoch": 7.866511085180863, "grad_norm": 0.009521484375, "learning_rate": 2.1617288293918536e-05, "loss": 0.0002, "step": 16854 }, { "epoch": 7.866977829638273, "grad_norm": 0.0069580078125, "learning_rate": 2.16081772115883e-05, "loss": 0.0002, "step": 16855 }, { "epoch": 7.867444574095683, "grad_norm": 0.048095703125, "learning_rate": 2.1599067817120945e-05, "loss": 0.0003, "step": 16856 }, { "epoch": 7.867911318553093, "grad_norm": 0.0262451171875, "learning_rate": 2.158996011071268e-05, "loss": 0.0002, "step": 16857 }, { "epoch": 7.868378063010502, "grad_norm": 0.0186767578125, "learning_rate": 2.1580854092559545e-05, "loss": 0.0002, "step": 16858 }, { "epoch": 7.868844807467911, "grad_norm": 0.014404296875, "learning_rate": 2.1571749762857652e-05, "loss": 0.0001, "step": 16859 }, { "epoch": 7.869311551925321, "grad_norm": 0.00970458984375, "learning_rate": 2.156264712180298e-05, "loss": 0.0001, "step": 16860 }, { "epoch": 7.86977829638273, "grad_norm": 0.01513671875, "learning_rate": 2.1553546169591543e-05, "loss": 0.0002, "step": 16861 }, { "epoch": 7.87024504084014, "grad_norm": 0.042236328125, "learning_rate": 2.1544446906419324e-05, "loss": 0.0016, "step": 16862 }, { "epoch": 7.87071178529755, "grad_norm": 0.023681640625, "learning_rate": 2.153534933248218e-05, "loss": 0.0003, "step": 16863 }, { "epoch": 7.8711785297549595, "grad_norm": 0.017822265625, "learning_rate": 2.1526253447976053e-05, "loss": 0.0002, "step": 16864 }, { "epoch": 7.871645274212368, "grad_norm": 0.01446533203125, "learning_rate": 2.151715925309673e-05, "loss": 0.0001, "step": 16865 }, { "epoch": 7.872112018669778, "grad_norm": 0.01251220703125, "learning_rate": 2.1508066748040046e-05, "loss": 0.0002, "step": 16866 }, { "epoch": 7.872578763127188, "grad_norm": 0.0064697265625, "learning_rate": 2.1498975933001808e-05, "loss": 0.0001, "step": 16867 }, { "epoch": 7.873045507584598, "grad_norm": 0.01611328125, "learning_rate": 2.1489886808177683e-05, "loss": 0.0002, "step": 16868 }, { "epoch": 7.873512252042007, "grad_norm": 0.06103515625, "learning_rate": 2.1480799373763395e-05, "loss": 0.0027, "step": 16869 }, { "epoch": 7.8739789964994165, "grad_norm": 0.00933837890625, "learning_rate": 2.1471713629954648e-05, "loss": 0.0001, "step": 16870 }, { "epoch": 7.874445740956826, "grad_norm": 0.0123291015625, "learning_rate": 2.146262957694701e-05, "loss": 0.0002, "step": 16871 }, { "epoch": 7.874912485414236, "grad_norm": 0.0322265625, "learning_rate": 2.1453547214936097e-05, "loss": 0.0003, "step": 16872 }, { "epoch": 7.875379229871645, "grad_norm": 0.0439453125, "learning_rate": 2.144446654411749e-05, "loss": 0.0026, "step": 16873 }, { "epoch": 7.875845974329055, "grad_norm": 0.01153564453125, "learning_rate": 2.143538756468665e-05, "loss": 0.0002, "step": 16874 }, { "epoch": 7.8763127187864646, "grad_norm": 0.010498046875, "learning_rate": 2.1426310276839077e-05, "loss": 0.0001, "step": 16875 }, { "epoch": 7.876779463243874, "grad_norm": 0.1025390625, "learning_rate": 2.1417234680770258e-05, "loss": 0.0004, "step": 16876 }, { "epoch": 7.877246207701283, "grad_norm": 0.0098876953125, "learning_rate": 2.1408160776675534e-05, "loss": 0.0001, "step": 16877 }, { "epoch": 7.877712952158693, "grad_norm": 0.0140380859375, "learning_rate": 2.1399088564750314e-05, "loss": 0.0002, "step": 16878 }, { "epoch": 7.878179696616103, "grad_norm": 0.017578125, "learning_rate": 2.1390018045189942e-05, "loss": 0.0002, "step": 16879 }, { "epoch": 7.878646441073512, "grad_norm": 0.005706787109375, "learning_rate": 2.1380949218189685e-05, "loss": 0.0001, "step": 16880 }, { "epoch": 7.8791131855309215, "grad_norm": 0.04931640625, "learning_rate": 2.137188208394484e-05, "loss": 0.0026, "step": 16881 }, { "epoch": 7.879579929988331, "grad_norm": 0.021240234375, "learning_rate": 2.1362816642650584e-05, "loss": 0.0002, "step": 16882 }, { "epoch": 7.880046674445741, "grad_norm": 0.015869140625, "learning_rate": 2.1353752894502145e-05, "loss": 0.0002, "step": 16883 }, { "epoch": 7.880513418903151, "grad_norm": 0.017822265625, "learning_rate": 2.1344690839694683e-05, "loss": 0.0002, "step": 16884 }, { "epoch": 7.88098016336056, "grad_norm": 0.046142578125, "learning_rate": 2.1335630478423275e-05, "loss": 0.0017, "step": 16885 }, { "epoch": 7.88144690781797, "grad_norm": 0.02294921875, "learning_rate": 2.1326571810883022e-05, "loss": 0.0002, "step": 16886 }, { "epoch": 7.881913652275379, "grad_norm": 0.0120849609375, "learning_rate": 2.1317514837268994e-05, "loss": 0.0001, "step": 16887 }, { "epoch": 7.882380396732788, "grad_norm": 0.00848388671875, "learning_rate": 2.130845955777615e-05, "loss": 0.0001, "step": 16888 }, { "epoch": 7.882847141190198, "grad_norm": 0.0072021484375, "learning_rate": 2.1299405972599475e-05, "loss": 0.0002, "step": 16889 }, { "epoch": 7.883313885647608, "grad_norm": 0.173828125, "learning_rate": 2.129035408193394e-05, "loss": 0.0004, "step": 16890 }, { "epoch": 7.883780630105018, "grad_norm": 0.010986328125, "learning_rate": 2.128130388597438e-05, "loss": 0.0002, "step": 16891 }, { "epoch": 7.8842473745624275, "grad_norm": 0.00811767578125, "learning_rate": 2.1272255384915685e-05, "loss": 0.0001, "step": 16892 }, { "epoch": 7.884714119019836, "grad_norm": 0.0634765625, "learning_rate": 2.126320857895271e-05, "loss": 0.0021, "step": 16893 }, { "epoch": 7.885180863477246, "grad_norm": 0.01226806640625, "learning_rate": 2.1254163468280196e-05, "loss": 0.0002, "step": 16894 }, { "epoch": 7.885647607934656, "grad_norm": 0.033447265625, "learning_rate": 2.1245120053092905e-05, "loss": 0.0002, "step": 16895 }, { "epoch": 7.886114352392065, "grad_norm": 0.0181884765625, "learning_rate": 2.1236078333585584e-05, "loss": 0.0002, "step": 16896 }, { "epoch": 7.886581096849475, "grad_norm": 0.00958251953125, "learning_rate": 2.122703830995286e-05, "loss": 0.0002, "step": 16897 }, { "epoch": 7.8870478413068845, "grad_norm": 0.0115966796875, "learning_rate": 2.1217999982389415e-05, "loss": 0.0001, "step": 16898 }, { "epoch": 7.887514585764294, "grad_norm": 0.01611328125, "learning_rate": 2.120896335108985e-05, "loss": 0.0002, "step": 16899 }, { "epoch": 7.887981330221704, "grad_norm": 0.01165771484375, "learning_rate": 2.1199928416248704e-05, "loss": 0.0002, "step": 16900 }, { "epoch": 7.888448074679113, "grad_norm": 0.006103515625, "learning_rate": 2.1190895178060556e-05, "loss": 0.0001, "step": 16901 }, { "epoch": 7.888914819136523, "grad_norm": 0.00701904296875, "learning_rate": 2.1181863636719847e-05, "loss": 0.0001, "step": 16902 }, { "epoch": 7.889381563593933, "grad_norm": 0.03515625, "learning_rate": 2.1172833792421066e-05, "loss": 0.002, "step": 16903 }, { "epoch": 7.8898483080513415, "grad_norm": 0.05078125, "learning_rate": 2.1163805645358657e-05, "loss": 0.0048, "step": 16904 }, { "epoch": 7.890315052508751, "grad_norm": 0.0107421875, "learning_rate": 2.1154779195726992e-05, "loss": 0.0002, "step": 16905 }, { "epoch": 7.890781796966161, "grad_norm": 0.020751953125, "learning_rate": 2.114575444372038e-05, "loss": 0.0002, "step": 16906 }, { "epoch": 7.891248541423571, "grad_norm": 0.05224609375, "learning_rate": 2.113673138953316e-05, "loss": 0.0002, "step": 16907 }, { "epoch": 7.89171528588098, "grad_norm": 0.30859375, "learning_rate": 2.112771003335965e-05, "loss": 0.0007, "step": 16908 }, { "epoch": 7.89218203033839, "grad_norm": 0.01177978515625, "learning_rate": 2.1118690375394012e-05, "loss": 0.0001, "step": 16909 }, { "epoch": 7.892648774795799, "grad_norm": 0.00921630859375, "learning_rate": 2.1109672415830506e-05, "loss": 0.0001, "step": 16910 }, { "epoch": 7.893115519253209, "grad_norm": 0.01519775390625, "learning_rate": 2.1100656154863297e-05, "loss": 0.0002, "step": 16911 }, { "epoch": 7.893582263710618, "grad_norm": 0.0281982421875, "learning_rate": 2.1091641592686485e-05, "loss": 0.0002, "step": 16912 }, { "epoch": 7.894049008168028, "grad_norm": 0.00732421875, "learning_rate": 2.1082628729494204e-05, "loss": 0.0001, "step": 16913 }, { "epoch": 7.894515752625438, "grad_norm": 0.038818359375, "learning_rate": 2.1073617565480453e-05, "loss": 0.0002, "step": 16914 }, { "epoch": 7.8949824970828475, "grad_norm": 0.041748046875, "learning_rate": 2.106460810083929e-05, "loss": 0.0003, "step": 16915 }, { "epoch": 7.895449241540256, "grad_norm": 0.03515625, "learning_rate": 2.1055600335764723e-05, "loss": 0.0027, "step": 16916 }, { "epoch": 7.895915985997666, "grad_norm": 0.009033203125, "learning_rate": 2.1046594270450647e-05, "loss": 0.0002, "step": 16917 }, { "epoch": 7.896382730455076, "grad_norm": 0.0274658203125, "learning_rate": 2.1037589905090994e-05, "loss": 0.002, "step": 16918 }, { "epoch": 7.896849474912486, "grad_norm": 0.031982421875, "learning_rate": 2.102858723987967e-05, "loss": 0.0002, "step": 16919 }, { "epoch": 7.897316219369895, "grad_norm": 0.09423828125, "learning_rate": 2.101958627501045e-05, "loss": 0.0036, "step": 16920 }, { "epoch": 7.8977829638273045, "grad_norm": 0.0308837890625, "learning_rate": 2.1010587010677175e-05, "loss": 0.0022, "step": 16921 }, { "epoch": 7.898249708284714, "grad_norm": 0.0101318359375, "learning_rate": 2.100158944707362e-05, "loss": 0.0002, "step": 16922 }, { "epoch": 7.898716452742123, "grad_norm": 0.016357421875, "learning_rate": 2.0992593584393472e-05, "loss": 0.0002, "step": 16923 }, { "epoch": 7.899183197199533, "grad_norm": 0.00634765625, "learning_rate": 2.0983599422830445e-05, "loss": 0.0001, "step": 16924 }, { "epoch": 7.899649941656943, "grad_norm": 0.043701171875, "learning_rate": 2.0974606962578227e-05, "loss": 0.0002, "step": 16925 }, { "epoch": 7.900116686114353, "grad_norm": 0.01513671875, "learning_rate": 2.0965616203830363e-05, "loss": 0.0001, "step": 16926 }, { "epoch": 7.900583430571762, "grad_norm": 0.0242919921875, "learning_rate": 2.095662714678047e-05, "loss": 0.0003, "step": 16927 }, { "epoch": 7.901050175029171, "grad_norm": 0.048828125, "learning_rate": 2.094763979162213e-05, "loss": 0.0025, "step": 16928 }, { "epoch": 7.901516919486581, "grad_norm": 0.011962890625, "learning_rate": 2.0938654138548797e-05, "loss": 0.0002, "step": 16929 }, { "epoch": 7.901983663943991, "grad_norm": 0.0093994140625, "learning_rate": 2.092967018775397e-05, "loss": 0.0001, "step": 16930 }, { "epoch": 7.9024504084014, "grad_norm": 0.018310546875, "learning_rate": 2.092068793943106e-05, "loss": 0.0002, "step": 16931 }, { "epoch": 7.90291715285881, "grad_norm": 0.01202392578125, "learning_rate": 2.091170739377347e-05, "loss": 0.0002, "step": 16932 }, { "epoch": 7.903383897316219, "grad_norm": 0.10302734375, "learning_rate": 2.0902728550974592e-05, "loss": 0.0004, "step": 16933 }, { "epoch": 7.903850641773629, "grad_norm": 0.0235595703125, "learning_rate": 2.0893751411227712e-05, "loss": 0.0002, "step": 16934 }, { "epoch": 7.904317386231039, "grad_norm": 0.1337890625, "learning_rate": 2.0884775974726134e-05, "loss": 0.0003, "step": 16935 }, { "epoch": 7.904784130688448, "grad_norm": 0.052978515625, "learning_rate": 2.0875802241663135e-05, "loss": 0.0029, "step": 16936 }, { "epoch": 7.905250875145858, "grad_norm": 0.00927734375, "learning_rate": 2.086683021223187e-05, "loss": 0.0001, "step": 16937 }, { "epoch": 7.9057176196032675, "grad_norm": 0.006134033203125, "learning_rate": 2.085785988662555e-05, "loss": 0.0001, "step": 16938 }, { "epoch": 7.906184364060676, "grad_norm": 0.011474609375, "learning_rate": 2.0848891265037353e-05, "loss": 0.0002, "step": 16939 }, { "epoch": 7.906651108518086, "grad_norm": 0.01025390625, "learning_rate": 2.0839924347660312e-05, "loss": 0.0001, "step": 16940 }, { "epoch": 7.907117852975496, "grad_norm": 0.007781982421875, "learning_rate": 2.0830959134687534e-05, "loss": 0.0001, "step": 16941 }, { "epoch": 7.907584597432906, "grad_norm": 0.012451171875, "learning_rate": 2.0821995626312073e-05, "loss": 0.0001, "step": 16942 }, { "epoch": 7.908051341890315, "grad_norm": 0.00811767578125, "learning_rate": 2.0813033822726858e-05, "loss": 0.0002, "step": 16943 }, { "epoch": 7.9085180863477245, "grad_norm": 0.00994873046875, "learning_rate": 2.0804073724124895e-05, "loss": 0.0001, "step": 16944 }, { "epoch": 7.908984830805134, "grad_norm": 0.01397705078125, "learning_rate": 2.0795115330699123e-05, "loss": 0.0001, "step": 16945 }, { "epoch": 7.909451575262544, "grad_norm": 0.028076171875, "learning_rate": 2.078615864264236e-05, "loss": 0.002, "step": 16946 }, { "epoch": 7.909918319719953, "grad_norm": 0.015380859375, "learning_rate": 2.0777203660147504e-05, "loss": 0.0002, "step": 16947 }, { "epoch": 7.910385064177363, "grad_norm": 0.00811767578125, "learning_rate": 2.0768250383407362e-05, "loss": 0.0002, "step": 16948 }, { "epoch": 7.9108518086347726, "grad_norm": 0.041259765625, "learning_rate": 2.075929881261468e-05, "loss": 0.0002, "step": 16949 }, { "epoch": 7.911318553092182, "grad_norm": 0.0106201171875, "learning_rate": 2.075034894796225e-05, "loss": 0.0002, "step": 16950 }, { "epoch": 7.911785297549591, "grad_norm": 0.01708984375, "learning_rate": 2.0741400789642695e-05, "loss": 0.0001, "step": 16951 }, { "epoch": 7.912252042007001, "grad_norm": 0.0208740234375, "learning_rate": 2.073245433784876e-05, "loss": 0.0002, "step": 16952 }, { "epoch": 7.912718786464411, "grad_norm": 0.012939453125, "learning_rate": 2.0723509592772993e-05, "loss": 0.0002, "step": 16953 }, { "epoch": 7.913185530921821, "grad_norm": 0.0194091796875, "learning_rate": 2.071456655460806e-05, "loss": 0.0002, "step": 16954 }, { "epoch": 7.9136522753792296, "grad_norm": 0.011962890625, "learning_rate": 2.0705625223546444e-05, "loss": 0.0001, "step": 16955 }, { "epoch": 7.914119019836639, "grad_norm": 0.01708984375, "learning_rate": 2.0696685599780696e-05, "loss": 0.0002, "step": 16956 }, { "epoch": 7.914585764294049, "grad_norm": 0.04443359375, "learning_rate": 2.0687747683503324e-05, "loss": 0.0014, "step": 16957 }, { "epoch": 7.915052508751459, "grad_norm": 0.04248046875, "learning_rate": 2.067881147490671e-05, "loss": 0.0002, "step": 16958 }, { "epoch": 7.915519253208868, "grad_norm": 0.00885009765625, "learning_rate": 2.06698769741833e-05, "loss": 0.0002, "step": 16959 }, { "epoch": 7.915985997666278, "grad_norm": 0.0091552734375, "learning_rate": 2.0660944181525487e-05, "loss": 0.0002, "step": 16960 }, { "epoch": 7.916452742123687, "grad_norm": 0.048095703125, "learning_rate": 2.0652013097125545e-05, "loss": 0.002, "step": 16961 }, { "epoch": 7.916919486581097, "grad_norm": 0.00750732421875, "learning_rate": 2.0643083721175827e-05, "loss": 0.0001, "step": 16962 }, { "epoch": 7.917386231038506, "grad_norm": 0.00799560546875, "learning_rate": 2.0634156053868537e-05, "loss": 0.0001, "step": 16963 }, { "epoch": 7.917852975495916, "grad_norm": 0.03564453125, "learning_rate": 2.062523009539592e-05, "loss": 0.0002, "step": 16964 }, { "epoch": 7.918319719953326, "grad_norm": 0.0101318359375, "learning_rate": 2.061630584595019e-05, "loss": 0.0002, "step": 16965 }, { "epoch": 7.918786464410735, "grad_norm": 0.0096435546875, "learning_rate": 2.060738330572346e-05, "loss": 0.0001, "step": 16966 }, { "epoch": 7.919253208868144, "grad_norm": 0.01153564453125, "learning_rate": 2.0598462474907842e-05, "loss": 0.0002, "step": 16967 }, { "epoch": 7.919719953325554, "grad_norm": 0.0103759765625, "learning_rate": 2.058954335369546e-05, "loss": 0.0002, "step": 16968 }, { "epoch": 7.920186697782964, "grad_norm": 0.0126953125, "learning_rate": 2.0580625942278288e-05, "loss": 0.0002, "step": 16969 }, { "epoch": 7.920653442240374, "grad_norm": 0.1123046875, "learning_rate": 2.0571710240848352e-05, "loss": 0.0003, "step": 16970 }, { "epoch": 7.921120186697783, "grad_norm": 0.0230712890625, "learning_rate": 2.056279624959766e-05, "loss": 0.0002, "step": 16971 }, { "epoch": 7.9215869311551925, "grad_norm": 0.008056640625, "learning_rate": 2.0553883968718056e-05, "loss": 0.0001, "step": 16972 }, { "epoch": 7.922053675612602, "grad_norm": 0.0108642578125, "learning_rate": 2.054497339840149e-05, "loss": 0.0002, "step": 16973 }, { "epoch": 7.922520420070011, "grad_norm": 0.1904296875, "learning_rate": 2.053606453883984e-05, "loss": 0.0007, "step": 16974 }, { "epoch": 7.922987164527421, "grad_norm": 0.015625, "learning_rate": 2.0527157390224848e-05, "loss": 0.0002, "step": 16975 }, { "epoch": 7.923453908984831, "grad_norm": 0.0096435546875, "learning_rate": 2.0518251952748337e-05, "loss": 0.0002, "step": 16976 }, { "epoch": 7.923920653442241, "grad_norm": 0.01129150390625, "learning_rate": 2.0509348226602064e-05, "loss": 0.0002, "step": 16977 }, { "epoch": 7.92438739789965, "grad_norm": 0.031005859375, "learning_rate": 2.050044621197771e-05, "loss": 0.0002, "step": 16978 }, { "epoch": 7.924854142357059, "grad_norm": 0.0191650390625, "learning_rate": 2.0491545909066945e-05, "loss": 0.0002, "step": 16979 }, { "epoch": 7.925320886814469, "grad_norm": 0.021728515625, "learning_rate": 2.0482647318061442e-05, "loss": 0.0002, "step": 16980 }, { "epoch": 7.925787631271879, "grad_norm": 0.052734375, "learning_rate": 2.047375043915274e-05, "loss": 0.002, "step": 16981 }, { "epoch": 7.926254375729288, "grad_norm": 0.0103759765625, "learning_rate": 2.0464855272532456e-05, "loss": 0.0001, "step": 16982 }, { "epoch": 7.926721120186698, "grad_norm": 0.01226806640625, "learning_rate": 2.0455961818392055e-05, "loss": 0.0002, "step": 16983 }, { "epoch": 7.927187864644107, "grad_norm": 0.033935546875, "learning_rate": 2.0447070076923057e-05, "loss": 0.0024, "step": 16984 }, { "epoch": 7.927654609101517, "grad_norm": 0.005279541015625, "learning_rate": 2.0438180048316923e-05, "loss": 0.0001, "step": 16985 }, { "epoch": 7.928121353558926, "grad_norm": 0.04931640625, "learning_rate": 2.042929173276503e-05, "loss": 0.0022, "step": 16986 }, { "epoch": 7.928588098016336, "grad_norm": 0.02783203125, "learning_rate": 2.0420405130458777e-05, "loss": 0.0002, "step": 16987 }, { "epoch": 7.929054842473746, "grad_norm": 0.08642578125, "learning_rate": 2.041152024158951e-05, "loss": 0.0024, "step": 16988 }, { "epoch": 7.9295215869311555, "grad_norm": 0.01080322265625, "learning_rate": 2.0402637066348495e-05, "loss": 0.0002, "step": 16989 }, { "epoch": 7.929988331388564, "grad_norm": 0.01177978515625, "learning_rate": 2.0393755604927022e-05, "loss": 0.0002, "step": 16990 }, { "epoch": 7.930455075845974, "grad_norm": 0.01190185546875, "learning_rate": 2.038487585751634e-05, "loss": 0.0002, "step": 16991 }, { "epoch": 7.930921820303384, "grad_norm": 0.0264892578125, "learning_rate": 2.037599782430758e-05, "loss": 0.0002, "step": 16992 }, { "epoch": 7.931388564760794, "grad_norm": 0.091796875, "learning_rate": 2.0367121505491937e-05, "loss": 0.0033, "step": 16993 }, { "epoch": 7.931855309218203, "grad_norm": 0.0167236328125, "learning_rate": 2.035824690126056e-05, "loss": 0.0002, "step": 16994 }, { "epoch": 7.9323220536756125, "grad_norm": 0.0084228515625, "learning_rate": 2.0349374011804445e-05, "loss": 0.0001, "step": 16995 }, { "epoch": 7.932788798133022, "grad_norm": 0.00872802734375, "learning_rate": 2.0340502837314703e-05, "loss": 0.0001, "step": 16996 }, { "epoch": 7.933255542590432, "grad_norm": 0.00787353515625, "learning_rate": 2.0331633377982328e-05, "loss": 0.0002, "step": 16997 }, { "epoch": 7.933722287047841, "grad_norm": 0.01409912109375, "learning_rate": 2.0322765633998265e-05, "loss": 0.0002, "step": 16998 }, { "epoch": 7.934189031505251, "grad_norm": 0.01275634765625, "learning_rate": 2.0313899605553476e-05, "loss": 0.0001, "step": 16999 }, { "epoch": 7.934655775962661, "grad_norm": 0.04248046875, "learning_rate": 2.0305035292838848e-05, "loss": 0.0025, "step": 17000 }, { "epoch": 7.93512252042007, "grad_norm": 0.021240234375, "learning_rate": 2.0296172696045212e-05, "loss": 0.0002, "step": 17001 }, { "epoch": 7.935589264877479, "grad_norm": 0.041015625, "learning_rate": 2.0287311815363397e-05, "loss": 0.0002, "step": 17002 }, { "epoch": 7.936056009334889, "grad_norm": 0.01239013671875, "learning_rate": 2.0278452650984235e-05, "loss": 0.0002, "step": 17003 }, { "epoch": 7.936522753792299, "grad_norm": 0.01300048828125, "learning_rate": 2.0269595203098422e-05, "loss": 0.0002, "step": 17004 }, { "epoch": 7.936989498249709, "grad_norm": 0.007293701171875, "learning_rate": 2.0260739471896672e-05, "loss": 0.0002, "step": 17005 }, { "epoch": 7.937456242707118, "grad_norm": 0.0263671875, "learning_rate": 2.025188545756971e-05, "loss": 0.0002, "step": 17006 }, { "epoch": 7.937922987164527, "grad_norm": 0.037841796875, "learning_rate": 2.024303316030811e-05, "loss": 0.0006, "step": 17007 }, { "epoch": 7.938389731621937, "grad_norm": 0.007476806640625, "learning_rate": 2.0234182580302497e-05, "loss": 0.0002, "step": 17008 }, { "epoch": 7.938856476079346, "grad_norm": 0.007110595703125, "learning_rate": 2.0225333717743456e-05, "loss": 0.0001, "step": 17009 }, { "epoch": 7.939323220536756, "grad_norm": 0.00927734375, "learning_rate": 2.021648657282147e-05, "loss": 0.0002, "step": 17010 }, { "epoch": 7.939789964994166, "grad_norm": 0.0108642578125, "learning_rate": 2.0207641145727074e-05, "loss": 0.0002, "step": 17011 }, { "epoch": 7.9402567094515755, "grad_norm": 0.009521484375, "learning_rate": 2.0198797436650674e-05, "loss": 0.0002, "step": 17012 }, { "epoch": 7.940723453908985, "grad_norm": 0.007049560546875, "learning_rate": 2.0189955445782695e-05, "loss": 0.0001, "step": 17013 }, { "epoch": 7.941190198366394, "grad_norm": 0.036865234375, "learning_rate": 2.018111517331356e-05, "loss": 0.0013, "step": 17014 }, { "epoch": 7.941656942823804, "grad_norm": 0.00787353515625, "learning_rate": 2.017227661943355e-05, "loss": 0.0001, "step": 17015 }, { "epoch": 7.942123687281214, "grad_norm": 0.01220703125, "learning_rate": 2.0163439784332994e-05, "loss": 0.0002, "step": 17016 }, { "epoch": 7.942590431738623, "grad_norm": 0.01202392578125, "learning_rate": 2.015460466820218e-05, "loss": 0.0001, "step": 17017 }, { "epoch": 7.9430571761960325, "grad_norm": 0.0238037109375, "learning_rate": 2.0145771271231297e-05, "loss": 0.0002, "step": 17018 }, { "epoch": 7.943523920653442, "grad_norm": 0.0076904296875, "learning_rate": 2.013693959361056e-05, "loss": 0.0001, "step": 17019 }, { "epoch": 7.943990665110852, "grad_norm": 0.0123291015625, "learning_rate": 2.0128109635530144e-05, "loss": 0.0001, "step": 17020 }, { "epoch": 7.944457409568262, "grad_norm": 0.0152587890625, "learning_rate": 2.0119281397180122e-05, "loss": 0.0001, "step": 17021 }, { "epoch": 7.944924154025671, "grad_norm": 0.06103515625, "learning_rate": 2.0110454878750605e-05, "loss": 0.0034, "step": 17022 }, { "epoch": 7.9453908984830806, "grad_norm": 0.0208740234375, "learning_rate": 2.0101630080431656e-05, "loss": 0.0002, "step": 17023 }, { "epoch": 7.94585764294049, "grad_norm": 0.010498046875, "learning_rate": 2.009280700241324e-05, "loss": 0.0001, "step": 17024 }, { "epoch": 7.946324387397899, "grad_norm": 0.0108642578125, "learning_rate": 2.0083985644885338e-05, "loss": 0.0002, "step": 17025 }, { "epoch": 7.946791131855309, "grad_norm": 0.005126953125, "learning_rate": 2.0075166008037927e-05, "loss": 0.0001, "step": 17026 }, { "epoch": 7.947257876312719, "grad_norm": 0.011962890625, "learning_rate": 2.006634809206084e-05, "loss": 0.0002, "step": 17027 }, { "epoch": 7.947724620770129, "grad_norm": 0.00970458984375, "learning_rate": 2.0057531897143966e-05, "loss": 0.0002, "step": 17028 }, { "epoch": 7.9481913652275376, "grad_norm": 0.01544189453125, "learning_rate": 2.0048717423477158e-05, "loss": 0.0002, "step": 17029 }, { "epoch": 7.948658109684947, "grad_norm": 0.1767578125, "learning_rate": 2.0039904671250143e-05, "loss": 0.0004, "step": 17030 }, { "epoch": 7.949124854142357, "grad_norm": 0.010009765625, "learning_rate": 2.0031093640652722e-05, "loss": 0.0002, "step": 17031 }, { "epoch": 7.949591598599767, "grad_norm": 0.046875, "learning_rate": 2.0022284331874552e-05, "loss": 0.0002, "step": 17032 }, { "epoch": 7.950058343057176, "grad_norm": 0.05517578125, "learning_rate": 2.001347674510534e-05, "loss": 0.004, "step": 17033 }, { "epoch": 7.950525087514586, "grad_norm": 0.01513671875, "learning_rate": 2.000467088053476e-05, "loss": 0.0001, "step": 17034 }, { "epoch": 7.950991831971995, "grad_norm": 0.004669189453125, "learning_rate": 1.999586673835233e-05, "loss": 0.0001, "step": 17035 }, { "epoch": 7.951458576429405, "grad_norm": 0.00653076171875, "learning_rate": 1.998706431874766e-05, "loss": 0.0001, "step": 17036 }, { "epoch": 7.951925320886814, "grad_norm": 0.0196533203125, "learning_rate": 1.99782636219103e-05, "loss": 0.0002, "step": 17037 }, { "epoch": 7.952392065344224, "grad_norm": 0.009521484375, "learning_rate": 1.996946464802968e-05, "loss": 0.0001, "step": 17038 }, { "epoch": 7.952858809801634, "grad_norm": 0.0294189453125, "learning_rate": 1.9960667397295285e-05, "loss": 0.0018, "step": 17039 }, { "epoch": 7.9533255542590435, "grad_norm": 0.1103515625, "learning_rate": 1.9951871869896544e-05, "loss": 0.0004, "step": 17040 }, { "epoch": 7.953792298716452, "grad_norm": 0.010498046875, "learning_rate": 1.99430780660228e-05, "loss": 0.0002, "step": 17041 }, { "epoch": 7.954259043173862, "grad_norm": 0.0419921875, "learning_rate": 1.993428598586341e-05, "loss": 0.0002, "step": 17042 }, { "epoch": 7.954725787631272, "grad_norm": 0.048095703125, "learning_rate": 1.9925495629607695e-05, "loss": 0.0002, "step": 17043 }, { "epoch": 7.955192532088682, "grad_norm": 0.012451171875, "learning_rate": 1.991670699744489e-05, "loss": 0.0002, "step": 17044 }, { "epoch": 7.955659276546091, "grad_norm": 0.006927490234375, "learning_rate": 1.9907920089564225e-05, "loss": 0.0001, "step": 17045 }, { "epoch": 7.9561260210035005, "grad_norm": 0.004364013671875, "learning_rate": 1.9899134906154936e-05, "loss": 0.0001, "step": 17046 }, { "epoch": 7.95659276546091, "grad_norm": 0.01153564453125, "learning_rate": 1.989035144740612e-05, "loss": 0.0002, "step": 17047 }, { "epoch": 7.95705950991832, "grad_norm": 0.020263671875, "learning_rate": 1.9881569713506943e-05, "loss": 0.0002, "step": 17048 }, { "epoch": 7.957526254375729, "grad_norm": 0.01422119140625, "learning_rate": 1.9872789704646465e-05, "loss": 0.0001, "step": 17049 }, { "epoch": 7.957992998833139, "grad_norm": 0.006072998046875, "learning_rate": 1.9864011421013696e-05, "loss": 0.0001, "step": 17050 }, { "epoch": 7.958459743290549, "grad_norm": 0.04248046875, "learning_rate": 1.985523486279768e-05, "loss": 0.0023, "step": 17051 }, { "epoch": 7.9589264877479575, "grad_norm": 0.0517578125, "learning_rate": 1.9846460030187408e-05, "loss": 0.0016, "step": 17052 }, { "epoch": 7.959393232205367, "grad_norm": 0.0098876953125, "learning_rate": 1.9837686923371758e-05, "loss": 0.0001, "step": 17053 }, { "epoch": 7.959859976662777, "grad_norm": 0.00909423828125, "learning_rate": 1.982891554253964e-05, "loss": 0.0002, "step": 17054 }, { "epoch": 7.960326721120187, "grad_norm": 0.07421875, "learning_rate": 1.982014588787996e-05, "loss": 0.003, "step": 17055 }, { "epoch": 7.960793465577597, "grad_norm": 0.072265625, "learning_rate": 1.9811377959581466e-05, "loss": 0.0041, "step": 17056 }, { "epoch": 7.961260210035006, "grad_norm": 0.005889892578125, "learning_rate": 1.9802611757832977e-05, "loss": 0.0001, "step": 17057 }, { "epoch": 7.961726954492415, "grad_norm": 0.00811767578125, "learning_rate": 1.9793847282823263e-05, "loss": 0.0001, "step": 17058 }, { "epoch": 7.962193698949825, "grad_norm": 0.02099609375, "learning_rate": 1.9785084534740984e-05, "loss": 0.0002, "step": 17059 }, { "epoch": 7.962660443407234, "grad_norm": 0.0113525390625, "learning_rate": 1.9776323513774853e-05, "loss": 0.0002, "step": 17060 }, { "epoch": 7.963127187864644, "grad_norm": 0.0673828125, "learning_rate": 1.9767564220113466e-05, "loss": 0.0003, "step": 17061 }, { "epoch": 7.963593932322054, "grad_norm": 0.00994873046875, "learning_rate": 1.975880665394544e-05, "loss": 0.0002, "step": 17062 }, { "epoch": 7.9640606767794635, "grad_norm": 0.053955078125, "learning_rate": 1.9750050815459363e-05, "loss": 0.0031, "step": 17063 }, { "epoch": 7.964527421236873, "grad_norm": 0.01080322265625, "learning_rate": 1.9741296704843704e-05, "loss": 0.0002, "step": 17064 }, { "epoch": 7.964994165694282, "grad_norm": 0.016845703125, "learning_rate": 1.973254432228698e-05, "loss": 0.0002, "step": 17065 }, { "epoch": 7.965460910151692, "grad_norm": 0.00970458984375, "learning_rate": 1.9723793667977653e-05, "loss": 0.0001, "step": 17066 }, { "epoch": 7.965927654609102, "grad_norm": 0.01458740234375, "learning_rate": 1.9715044742104095e-05, "loss": 0.0002, "step": 17067 }, { "epoch": 7.966394399066511, "grad_norm": 0.003753662109375, "learning_rate": 1.9706297544854702e-05, "loss": 0.0001, "step": 17068 }, { "epoch": 7.9668611435239205, "grad_norm": 0.01556396484375, "learning_rate": 1.9697552076417837e-05, "loss": 0.0002, "step": 17069 }, { "epoch": 7.96732788798133, "grad_norm": 0.006195068359375, "learning_rate": 1.9688808336981747e-05, "loss": 0.0001, "step": 17070 }, { "epoch": 7.96779463243874, "grad_norm": 0.018798828125, "learning_rate": 1.9680066326734715e-05, "loss": 0.0002, "step": 17071 }, { "epoch": 7.968261376896149, "grad_norm": 0.03271484375, "learning_rate": 1.9671326045865e-05, "loss": 0.0017, "step": 17072 }, { "epoch": 7.968728121353559, "grad_norm": 0.00970458984375, "learning_rate": 1.9662587494560737e-05, "loss": 0.0001, "step": 17073 }, { "epoch": 7.969194865810969, "grad_norm": 0.0147705078125, "learning_rate": 1.9653850673010098e-05, "loss": 0.0002, "step": 17074 }, { "epoch": 7.969661610268378, "grad_norm": 0.03662109375, "learning_rate": 1.964511558140123e-05, "loss": 0.0003, "step": 17075 }, { "epoch": 7.970128354725787, "grad_norm": 0.00604248046875, "learning_rate": 1.9636382219922155e-05, "loss": 0.0001, "step": 17076 }, { "epoch": 7.970595099183197, "grad_norm": 0.080078125, "learning_rate": 1.9627650588760928e-05, "loss": 0.0002, "step": 17077 }, { "epoch": 7.971061843640607, "grad_norm": 0.0537109375, "learning_rate": 1.961892068810559e-05, "loss": 0.0028, "step": 17078 }, { "epoch": 7.971528588098017, "grad_norm": 0.0123291015625, "learning_rate": 1.9610192518144042e-05, "loss": 0.0002, "step": 17079 }, { "epoch": 7.971995332555426, "grad_norm": 0.0230712890625, "learning_rate": 1.960146607906428e-05, "loss": 0.0001, "step": 17080 }, { "epoch": 7.972462077012835, "grad_norm": 0.2412109375, "learning_rate": 1.9592741371054124e-05, "loss": 0.0005, "step": 17081 }, { "epoch": 7.972928821470245, "grad_norm": 0.046142578125, "learning_rate": 1.9584018394301464e-05, "loss": 0.0003, "step": 17082 }, { "epoch": 7.973395565927655, "grad_norm": 0.009765625, "learning_rate": 1.957529714899412e-05, "loss": 0.0002, "step": 17083 }, { "epoch": 7.973862310385064, "grad_norm": 0.0125732421875, "learning_rate": 1.956657763531985e-05, "loss": 0.0002, "step": 17084 }, { "epoch": 7.974329054842474, "grad_norm": 0.00982666015625, "learning_rate": 1.955785985346641e-05, "loss": 0.0001, "step": 17085 }, { "epoch": 7.9747957992998835, "grad_norm": 0.00933837890625, "learning_rate": 1.9549143803621516e-05, "loss": 0.0002, "step": 17086 }, { "epoch": 7.975262543757293, "grad_norm": 0.007110595703125, "learning_rate": 1.95404294859728e-05, "loss": 0.0001, "step": 17087 }, { "epoch": 7.975729288214702, "grad_norm": 0.036865234375, "learning_rate": 1.9531716900707896e-05, "loss": 0.0022, "step": 17088 }, { "epoch": 7.976196032672112, "grad_norm": 0.01953125, "learning_rate": 1.9523006048014448e-05, "loss": 0.0002, "step": 17089 }, { "epoch": 7.976662777129522, "grad_norm": 0.0045166015625, "learning_rate": 1.9514296928079932e-05, "loss": 0.0001, "step": 17090 }, { "epoch": 7.977129521586932, "grad_norm": 0.008544921875, "learning_rate": 1.9505589541091916e-05, "loss": 0.0002, "step": 17091 }, { "epoch": 7.9775962660443405, "grad_norm": 0.007659912109375, "learning_rate": 1.949688388723788e-05, "loss": 0.0001, "step": 17092 }, { "epoch": 7.97806301050175, "grad_norm": 0.0048828125, "learning_rate": 1.9488179966705245e-05, "loss": 0.0001, "step": 17093 }, { "epoch": 7.97852975495916, "grad_norm": 0.0703125, "learning_rate": 1.947947777968141e-05, "loss": 0.0022, "step": 17094 }, { "epoch": 7.978996499416569, "grad_norm": 0.010498046875, "learning_rate": 1.9470777326353816e-05, "loss": 0.0002, "step": 17095 }, { "epoch": 7.979463243873979, "grad_norm": 0.06494140625, "learning_rate": 1.9462078606909686e-05, "loss": 0.0014, "step": 17096 }, { "epoch": 7.979929988331389, "grad_norm": 0.060302734375, "learning_rate": 1.9453381621536348e-05, "loss": 0.0024, "step": 17097 }, { "epoch": 7.980396732788798, "grad_norm": 0.004302978515625, "learning_rate": 1.9444686370421107e-05, "loss": 0.0001, "step": 17098 }, { "epoch": 7.980863477246208, "grad_norm": 0.017822265625, "learning_rate": 1.9435992853751128e-05, "loss": 0.0002, "step": 17099 }, { "epoch": 7.981330221703617, "grad_norm": 0.0196533203125, "learning_rate": 1.94273010717136e-05, "loss": 0.0002, "step": 17100 }, { "epoch": 7.981796966161027, "grad_norm": 0.04248046875, "learning_rate": 1.9418611024495702e-05, "loss": 0.0002, "step": 17101 }, { "epoch": 7.982263710618437, "grad_norm": 0.006591796875, "learning_rate": 1.940992271228449e-05, "loss": 0.0001, "step": 17102 }, { "epoch": 7.9827304550758456, "grad_norm": 0.0390625, "learning_rate": 1.9401236135267064e-05, "loss": 0.0019, "step": 17103 }, { "epoch": 7.983197199533255, "grad_norm": 0.015625, "learning_rate": 1.9392551293630467e-05, "loss": 0.0002, "step": 17104 }, { "epoch": 7.983663943990665, "grad_norm": 0.06396484375, "learning_rate": 1.938386818756166e-05, "loss": 0.0045, "step": 17105 }, { "epoch": 7.984130688448075, "grad_norm": 0.00927734375, "learning_rate": 1.9375186817247615e-05, "loss": 0.0002, "step": 17106 }, { "epoch": 7.984597432905485, "grad_norm": 0.0286865234375, "learning_rate": 1.936650718287527e-05, "loss": 0.0002, "step": 17107 }, { "epoch": 7.985064177362894, "grad_norm": 0.0546875, "learning_rate": 1.935782928463147e-05, "loss": 0.0024, "step": 17108 }, { "epoch": 7.985530921820303, "grad_norm": 0.1298828125, "learning_rate": 1.93491531227031e-05, "loss": 0.0003, "step": 17109 }, { "epoch": 7.985997666277713, "grad_norm": 0.01123046875, "learning_rate": 1.934047869727693e-05, "loss": 0.0002, "step": 17110 }, { "epoch": 7.986464410735122, "grad_norm": 0.01397705078125, "learning_rate": 1.9331806008539742e-05, "loss": 0.0002, "step": 17111 }, { "epoch": 7.986931155192532, "grad_norm": 0.05029296875, "learning_rate": 1.93231350566783e-05, "loss": 0.0017, "step": 17112 }, { "epoch": 7.987397899649942, "grad_norm": 0.0067138671875, "learning_rate": 1.9314465841879247e-05, "loss": 0.0001, "step": 17113 }, { "epoch": 7.9878646441073515, "grad_norm": 0.007476806640625, "learning_rate": 1.9305798364329262e-05, "loss": 0.0001, "step": 17114 }, { "epoch": 7.98833138856476, "grad_norm": 0.0059814453125, "learning_rate": 1.9297132624215007e-05, "loss": 0.0001, "step": 17115 }, { "epoch": 7.98879813302217, "grad_norm": 0.01007080078125, "learning_rate": 1.928846862172299e-05, "loss": 0.0002, "step": 17116 }, { "epoch": 7.98926487747958, "grad_norm": 0.048095703125, "learning_rate": 1.9279806357039798e-05, "loss": 0.0006, "step": 17117 }, { "epoch": 7.98973162193699, "grad_norm": 0.0272216796875, "learning_rate": 1.9271145830351967e-05, "loss": 0.0002, "step": 17118 }, { "epoch": 7.990198366394399, "grad_norm": 0.099609375, "learning_rate": 1.9262487041845913e-05, "loss": 0.007, "step": 17119 }, { "epoch": 7.9906651108518085, "grad_norm": 0.029052734375, "learning_rate": 1.9253829991708083e-05, "loss": 0.0019, "step": 17120 }, { "epoch": 7.991131855309218, "grad_norm": 0.037109375, "learning_rate": 1.924517468012491e-05, "loss": 0.0027, "step": 17121 }, { "epoch": 7.991598599766628, "grad_norm": 0.058837890625, "learning_rate": 1.92365211072827e-05, "loss": 0.0024, "step": 17122 }, { "epoch": 7.992065344224037, "grad_norm": 0.0201416015625, "learning_rate": 1.9227869273367803e-05, "loss": 0.0002, "step": 17123 }, { "epoch": 7.992532088681447, "grad_norm": 0.0086669921875, "learning_rate": 1.9219219178566518e-05, "loss": 0.0002, "step": 17124 }, { "epoch": 7.992998833138857, "grad_norm": 0.007110595703125, "learning_rate": 1.9210570823065056e-05, "loss": 0.0002, "step": 17125 }, { "epoch": 7.993465577596266, "grad_norm": 0.050537109375, "learning_rate": 1.9201924207049637e-05, "loss": 0.0034, "step": 17126 }, { "epoch": 7.993932322053675, "grad_norm": 0.427734375, "learning_rate": 1.919327933070646e-05, "loss": 0.0017, "step": 17127 }, { "epoch": 7.994399066511085, "grad_norm": 0.279296875, "learning_rate": 1.9184636194221607e-05, "loss": 0.0012, "step": 17128 }, { "epoch": 7.994865810968495, "grad_norm": 0.010009765625, "learning_rate": 1.917599479778124e-05, "loss": 0.0001, "step": 17129 }, { "epoch": 7.995332555425904, "grad_norm": 0.01171875, "learning_rate": 1.916735514157134e-05, "loss": 0.0002, "step": 17130 }, { "epoch": 7.995799299883314, "grad_norm": 0.0240478515625, "learning_rate": 1.9158717225777977e-05, "loss": 0.0002, "step": 17131 }, { "epoch": 7.996266044340723, "grad_norm": 0.00970458984375, "learning_rate": 1.9150081050587155e-05, "loss": 0.0001, "step": 17132 }, { "epoch": 7.996732788798133, "grad_norm": 0.00738525390625, "learning_rate": 1.9141446616184765e-05, "loss": 0.0001, "step": 17133 }, { "epoch": 7.997199533255543, "grad_norm": 0.0093994140625, "learning_rate": 1.9132813922756744e-05, "loss": 0.0001, "step": 17134 }, { "epoch": 7.997666277712952, "grad_norm": 0.006439208984375, "learning_rate": 1.9124182970488992e-05, "loss": 0.0001, "step": 17135 }, { "epoch": 7.998133022170362, "grad_norm": 0.05078125, "learning_rate": 1.9115553759567283e-05, "loss": 0.0015, "step": 17136 }, { "epoch": 7.9985997666277715, "grad_norm": 0.2265625, "learning_rate": 1.910692629017745e-05, "loss": 0.0008, "step": 17137 }, { "epoch": 7.99906651108518, "grad_norm": 0.01953125, "learning_rate": 1.9098300562505266e-05, "loss": 0.0002, "step": 17138 }, { "epoch": 7.99953325554259, "grad_norm": 0.011962890625, "learning_rate": 1.9089676576736405e-05, "loss": 0.0002, "step": 17139 }, { "epoch": 8.0, "grad_norm": 0.03662109375, "learning_rate": 1.908105433305658e-05, "loss": 0.0029, "step": 17140 }, { "epoch": 8.000466744457409, "grad_norm": 0.2734375, "learning_rate": 1.907243383165147e-05, "loss": 0.0005, "step": 17141 }, { "epoch": 8.00093348891482, "grad_norm": 0.004730224609375, "learning_rate": 1.9063815072706614e-05, "loss": 0.0001, "step": 17142 }, { "epoch": 8.001400233372228, "grad_norm": 0.038818359375, "learning_rate": 1.9055198056407652e-05, "loss": 0.0002, "step": 17143 }, { "epoch": 8.001866977829637, "grad_norm": 0.006011962890625, "learning_rate": 1.904658278294009e-05, "loss": 0.0001, "step": 17144 }, { "epoch": 8.002333722287048, "grad_norm": 0.049072265625, "learning_rate": 1.9037969252489386e-05, "loss": 0.0024, "step": 17145 }, { "epoch": 8.002800466744457, "grad_norm": 0.005523681640625, "learning_rate": 1.902935746524104e-05, "loss": 0.0001, "step": 17146 }, { "epoch": 8.003267211201868, "grad_norm": 0.0458984375, "learning_rate": 1.902074742138049e-05, "loss": 0.0023, "step": 17147 }, { "epoch": 8.003733955659277, "grad_norm": 0.007080078125, "learning_rate": 1.9012139121093064e-05, "loss": 0.0001, "step": 17148 }, { "epoch": 8.004200700116685, "grad_norm": 0.00579833984375, "learning_rate": 1.9003532564564142e-05, "loss": 0.0001, "step": 17149 }, { "epoch": 8.004667444574096, "grad_norm": 0.005279541015625, "learning_rate": 1.8994927751979063e-05, "loss": 0.0001, "step": 17150 }, { "epoch": 8.005134189031505, "grad_norm": 0.008056640625, "learning_rate": 1.898632468352304e-05, "loss": 0.0001, "step": 17151 }, { "epoch": 8.005600933488914, "grad_norm": 0.004119873046875, "learning_rate": 1.8977723359381327e-05, "loss": 0.0001, "step": 17152 }, { "epoch": 8.005600933488914, "eval_loss": 2.4350552558898926, "eval_runtime": 84.5635, "eval_samples_per_second": 21.333, "eval_steps_per_second": 2.673, "step": 17152 }, { "epoch": 8.006067677946325, "grad_norm": 0.0390625, "learning_rate": 1.896912377973915e-05, "loss": 0.002, "step": 17153 }, { "epoch": 8.006534422403734, "grad_norm": 0.01068115234375, "learning_rate": 1.8960525944781628e-05, "loss": 0.0002, "step": 17154 }, { "epoch": 8.007001166861144, "grad_norm": 0.01092529296875, "learning_rate": 1.8951929854693882e-05, "loss": 0.0002, "step": 17155 }, { "epoch": 8.007467911318553, "grad_norm": 0.025146484375, "learning_rate": 1.8943335509661054e-05, "loss": 0.0002, "step": 17156 }, { "epoch": 8.007934655775962, "grad_norm": 0.037353515625, "learning_rate": 1.893474290986811e-05, "loss": 0.0028, "step": 17157 }, { "epoch": 8.008401400233373, "grad_norm": 0.00848388671875, "learning_rate": 1.892615205550009e-05, "loss": 0.0001, "step": 17158 }, { "epoch": 8.008868144690782, "grad_norm": 0.006317138671875, "learning_rate": 1.8917562946742006e-05, "loss": 0.0001, "step": 17159 }, { "epoch": 8.00933488914819, "grad_norm": 0.008544921875, "learning_rate": 1.8908975583778722e-05, "loss": 0.0002, "step": 17160 }, { "epoch": 8.009801633605601, "grad_norm": 0.0272216796875, "learning_rate": 1.8900389966795196e-05, "loss": 0.0002, "step": 17161 }, { "epoch": 8.01026837806301, "grad_norm": 0.01190185546875, "learning_rate": 1.8891806095976227e-05, "loss": 0.0002, "step": 17162 }, { "epoch": 8.01073512252042, "grad_norm": 0.038818359375, "learning_rate": 1.8883223971506657e-05, "loss": 0.0013, "step": 17163 }, { "epoch": 8.01120186697783, "grad_norm": 0.01275634765625, "learning_rate": 1.8874643593571316e-05, "loss": 0.0002, "step": 17164 }, { "epoch": 8.011668611435239, "grad_norm": 0.02099609375, "learning_rate": 1.8866064962354877e-05, "loss": 0.0001, "step": 17165 }, { "epoch": 8.01213535589265, "grad_norm": 0.01446533203125, "learning_rate": 1.8857488078042073e-05, "loss": 0.0002, "step": 17166 }, { "epoch": 8.012602100350058, "grad_norm": 0.0093994140625, "learning_rate": 1.884891294081761e-05, "loss": 0.0002, "step": 17167 }, { "epoch": 8.013068844807467, "grad_norm": 0.0380859375, "learning_rate": 1.884033955086607e-05, "loss": 0.0017, "step": 17168 }, { "epoch": 8.013535589264878, "grad_norm": 0.0115966796875, "learning_rate": 1.8831767908372056e-05, "loss": 0.0002, "step": 17169 }, { "epoch": 8.014002333722287, "grad_norm": 0.00970458984375, "learning_rate": 1.882319801352017e-05, "loss": 0.0002, "step": 17170 }, { "epoch": 8.014469078179697, "grad_norm": 0.0089111328125, "learning_rate": 1.881462986649487e-05, "loss": 0.0002, "step": 17171 }, { "epoch": 8.014935822637106, "grad_norm": 0.043212890625, "learning_rate": 1.880606346748066e-05, "loss": 0.0022, "step": 17172 }, { "epoch": 8.015402567094515, "grad_norm": 0.09619140625, "learning_rate": 1.879749881666203e-05, "loss": 0.0046, "step": 17173 }, { "epoch": 8.015869311551926, "grad_norm": 0.00555419921875, "learning_rate": 1.8788935914223305e-05, "loss": 0.0001, "step": 17174 }, { "epoch": 8.016336056009335, "grad_norm": 0.005401611328125, "learning_rate": 1.8780374760348906e-05, "loss": 0.0002, "step": 17175 }, { "epoch": 8.016802800466744, "grad_norm": 0.0107421875, "learning_rate": 1.8771815355223178e-05, "loss": 0.0002, "step": 17176 }, { "epoch": 8.017269544924154, "grad_norm": 0.005401611328125, "learning_rate": 1.8763257699030358e-05, "loss": 0.0001, "step": 17177 }, { "epoch": 8.017736289381563, "grad_norm": 0.007293701171875, "learning_rate": 1.8754701791954767e-05, "loss": 0.0001, "step": 17178 }, { "epoch": 8.018203033838974, "grad_norm": 0.0125732421875, "learning_rate": 1.8746147634180556e-05, "loss": 0.0002, "step": 17179 }, { "epoch": 8.018669778296383, "grad_norm": 0.04052734375, "learning_rate": 1.8737595225891945e-05, "loss": 0.0018, "step": 17180 }, { "epoch": 8.019136522753792, "grad_norm": 0.009521484375, "learning_rate": 1.872904456727309e-05, "loss": 0.0002, "step": 17181 }, { "epoch": 8.019603267211203, "grad_norm": 0.058349609375, "learning_rate": 1.8720495658508053e-05, "loss": 0.005, "step": 17182 }, { "epoch": 8.020070011668611, "grad_norm": 0.00848388671875, "learning_rate": 1.871194849978093e-05, "loss": 0.0001, "step": 17183 }, { "epoch": 8.02053675612602, "grad_norm": 0.7109375, "learning_rate": 1.8703403091275772e-05, "loss": 0.006, "step": 17184 }, { "epoch": 8.021003500583431, "grad_norm": 0.01348876953125, "learning_rate": 1.8694859433176516e-05, "loss": 0.0002, "step": 17185 }, { "epoch": 8.02147024504084, "grad_norm": 0.0595703125, "learning_rate": 1.868631752566715e-05, "loss": 0.0002, "step": 17186 }, { "epoch": 8.021936989498249, "grad_norm": 0.014404296875, "learning_rate": 1.8677777368931603e-05, "loss": 0.0002, "step": 17187 }, { "epoch": 8.02240373395566, "grad_norm": 0.0079345703125, "learning_rate": 1.8669238963153722e-05, "loss": 0.0001, "step": 17188 }, { "epoch": 8.022870478413068, "grad_norm": 0.01953125, "learning_rate": 1.8660702308517363e-05, "loss": 0.0005, "step": 17189 }, { "epoch": 8.02333722287048, "grad_norm": 0.01263427734375, "learning_rate": 1.8652167405206356e-05, "loss": 0.0002, "step": 17190 }, { "epoch": 8.023803967327888, "grad_norm": 0.0091552734375, "learning_rate": 1.864363425340444e-05, "loss": 0.0002, "step": 17191 }, { "epoch": 8.024270711785297, "grad_norm": 0.01153564453125, "learning_rate": 1.863510285329533e-05, "loss": 0.0001, "step": 17192 }, { "epoch": 8.024737456242708, "grad_norm": 0.054443359375, "learning_rate": 1.8626573205062738e-05, "loss": 0.0029, "step": 17193 }, { "epoch": 8.025204200700117, "grad_norm": 0.0250244140625, "learning_rate": 1.8618045308890297e-05, "loss": 0.0002, "step": 17194 }, { "epoch": 8.025670945157525, "grad_norm": 0.05029296875, "learning_rate": 1.8609519164961632e-05, "loss": 0.002, "step": 17195 }, { "epoch": 8.026137689614936, "grad_norm": 0.006072998046875, "learning_rate": 1.8600994773460344e-05, "loss": 0.0001, "step": 17196 }, { "epoch": 8.026604434072345, "grad_norm": 0.0322265625, "learning_rate": 1.859247213456994e-05, "loss": 0.0002, "step": 17197 }, { "epoch": 8.027071178529756, "grad_norm": 0.00836181640625, "learning_rate": 1.858395124847393e-05, "loss": 0.0002, "step": 17198 }, { "epoch": 8.027537922987165, "grad_norm": 0.515625, "learning_rate": 1.8575432115355797e-05, "loss": 0.0022, "step": 17199 }, { "epoch": 8.028004667444574, "grad_norm": 0.064453125, "learning_rate": 1.8566914735398933e-05, "loss": 0.0002, "step": 17200 }, { "epoch": 8.028471411901984, "grad_norm": 0.034423828125, "learning_rate": 1.855839910878675e-05, "loss": 0.0002, "step": 17201 }, { "epoch": 8.028938156359393, "grad_norm": 0.0159912109375, "learning_rate": 1.8549885235702624e-05, "loss": 0.0002, "step": 17202 }, { "epoch": 8.029404900816802, "grad_norm": 0.00677490234375, "learning_rate": 1.854137311632981e-05, "loss": 0.0001, "step": 17203 }, { "epoch": 8.029871645274213, "grad_norm": 0.01190185546875, "learning_rate": 1.853286275085161e-05, "loss": 0.0001, "step": 17204 }, { "epoch": 8.030338389731622, "grad_norm": 0.01043701171875, "learning_rate": 1.8524354139451282e-05, "loss": 0.0001, "step": 17205 }, { "epoch": 8.030805134189032, "grad_norm": 0.006195068359375, "learning_rate": 1.8515847282311994e-05, "loss": 0.0001, "step": 17206 }, { "epoch": 8.031271878646441, "grad_norm": 0.0235595703125, "learning_rate": 1.8507342179616928e-05, "loss": 0.0002, "step": 17207 }, { "epoch": 8.03173862310385, "grad_norm": 0.00799560546875, "learning_rate": 1.8498838831549216e-05, "loss": 0.0001, "step": 17208 }, { "epoch": 8.03220536756126, "grad_norm": 0.0198974609375, "learning_rate": 1.849033723829191e-05, "loss": 0.0002, "step": 17209 }, { "epoch": 8.03267211201867, "grad_norm": 0.00823974609375, "learning_rate": 1.8481837400028102e-05, "loss": 0.0002, "step": 17210 }, { "epoch": 8.033138856476079, "grad_norm": 0.01239013671875, "learning_rate": 1.8473339316940753e-05, "loss": 0.0002, "step": 17211 }, { "epoch": 8.03360560093349, "grad_norm": 0.03173828125, "learning_rate": 1.846484298921287e-05, "loss": 0.0002, "step": 17212 }, { "epoch": 8.034072345390898, "grad_norm": 0.0096435546875, "learning_rate": 1.8456348417027415e-05, "loss": 0.0002, "step": 17213 }, { "epoch": 8.034539089848309, "grad_norm": 0.006591796875, "learning_rate": 1.844785560056721e-05, "loss": 0.0001, "step": 17214 }, { "epoch": 8.035005834305718, "grad_norm": 0.06689453125, "learning_rate": 1.843936454001517e-05, "loss": 0.0022, "step": 17215 }, { "epoch": 8.035472578763127, "grad_norm": 0.0162353515625, "learning_rate": 1.8430875235554125e-05, "loss": 0.0002, "step": 17216 }, { "epoch": 8.035939323220537, "grad_norm": 0.010009765625, "learning_rate": 1.8422387687366816e-05, "loss": 0.0001, "step": 17217 }, { "epoch": 8.036406067677946, "grad_norm": 0.00579833984375, "learning_rate": 1.841390189563601e-05, "loss": 0.0001, "step": 17218 }, { "epoch": 8.036872812135355, "grad_norm": 0.01397705078125, "learning_rate": 1.840541786054445e-05, "loss": 0.0001, "step": 17219 }, { "epoch": 8.037339556592766, "grad_norm": 0.0189208984375, "learning_rate": 1.8396935582274733e-05, "loss": 0.0002, "step": 17220 }, { "epoch": 8.037806301050175, "grad_norm": 0.01123046875, "learning_rate": 1.8388455061009546e-05, "loss": 0.0002, "step": 17221 }, { "epoch": 8.038273045507585, "grad_norm": 0.01202392578125, "learning_rate": 1.837997629693149e-05, "loss": 0.0002, "step": 17222 }, { "epoch": 8.038739789964994, "grad_norm": 0.03857421875, "learning_rate": 1.8371499290223082e-05, "loss": 0.0026, "step": 17223 }, { "epoch": 8.039206534422403, "grad_norm": 0.005279541015625, "learning_rate": 1.8363024041066867e-05, "loss": 0.0001, "step": 17224 }, { "epoch": 8.039673278879814, "grad_norm": 0.020263671875, "learning_rate": 1.8354550549645343e-05, "loss": 0.0002, "step": 17225 }, { "epoch": 8.040140023337223, "grad_norm": 0.1103515625, "learning_rate": 1.83460788161409e-05, "loss": 0.0004, "step": 17226 }, { "epoch": 8.040606767794632, "grad_norm": 0.0087890625, "learning_rate": 1.833760884073601e-05, "loss": 0.0001, "step": 17227 }, { "epoch": 8.041073512252042, "grad_norm": 0.00653076171875, "learning_rate": 1.8329140623612984e-05, "loss": 0.0002, "step": 17228 }, { "epoch": 8.041540256709451, "grad_norm": 0.00848388671875, "learning_rate": 1.8320674164954175e-05, "loss": 0.0001, "step": 17229 }, { "epoch": 8.04200700116686, "grad_norm": 0.007049560546875, "learning_rate": 1.83122094649419e-05, "loss": 0.0001, "step": 17230 }, { "epoch": 8.042473745624271, "grad_norm": 0.0174560546875, "learning_rate": 1.830374652375836e-05, "loss": 0.0002, "step": 17231 }, { "epoch": 8.04294049008168, "grad_norm": 0.046142578125, "learning_rate": 1.82952853415858e-05, "loss": 0.0018, "step": 17232 }, { "epoch": 8.04340723453909, "grad_norm": 0.016845703125, "learning_rate": 1.8286825918606433e-05, "loss": 0.0001, "step": 17233 }, { "epoch": 8.0438739789965, "grad_norm": 0.040283203125, "learning_rate": 1.8278368255002332e-05, "loss": 0.0018, "step": 17234 }, { "epoch": 8.044340723453908, "grad_norm": 0.01458740234375, "learning_rate": 1.8269912350955642e-05, "loss": 0.0001, "step": 17235 }, { "epoch": 8.044807467911319, "grad_norm": 0.0267333984375, "learning_rate": 1.826145820664844e-05, "loss": 0.0002, "step": 17236 }, { "epoch": 8.045274212368728, "grad_norm": 0.01312255859375, "learning_rate": 1.8253005822262715e-05, "loss": 0.0001, "step": 17237 }, { "epoch": 8.045740956826137, "grad_norm": 0.0137939453125, "learning_rate": 1.8244555197980472e-05, "loss": 0.0001, "step": 17238 }, { "epoch": 8.046207701283548, "grad_norm": 0.047607421875, "learning_rate": 1.8236106333983715e-05, "loss": 0.0017, "step": 17239 }, { "epoch": 8.046674445740956, "grad_norm": 0.007354736328125, "learning_rate": 1.8227659230454252e-05, "loss": 0.0001, "step": 17240 }, { "epoch": 8.047141190198367, "grad_norm": 0.0087890625, "learning_rate": 1.8219213887574017e-05, "loss": 0.0001, "step": 17241 }, { "epoch": 8.047607934655776, "grad_norm": 0.010986328125, "learning_rate": 1.8210770305524872e-05, "loss": 0.0002, "step": 17242 }, { "epoch": 8.048074679113185, "grad_norm": 0.048828125, "learning_rate": 1.820232848448856e-05, "loss": 0.002, "step": 17243 }, { "epoch": 8.048541423570596, "grad_norm": 0.01165771484375, "learning_rate": 1.8193888424646876e-05, "loss": 0.0001, "step": 17244 }, { "epoch": 8.049008168028005, "grad_norm": 0.0135498046875, "learning_rate": 1.8185450126181557e-05, "loss": 0.0001, "step": 17245 }, { "epoch": 8.049474912485413, "grad_norm": 0.006988525390625, "learning_rate": 1.817701358927425e-05, "loss": 0.0001, "step": 17246 }, { "epoch": 8.049941656942824, "grad_norm": 0.01068115234375, "learning_rate": 1.816857881410663e-05, "loss": 0.0001, "step": 17247 }, { "epoch": 8.050408401400233, "grad_norm": 0.01116943359375, "learning_rate": 1.8160145800860316e-05, "loss": 0.0002, "step": 17248 }, { "epoch": 8.050875145857644, "grad_norm": 0.01806640625, "learning_rate": 1.8151714549716857e-05, "loss": 0.0002, "step": 17249 }, { "epoch": 8.051341890315053, "grad_norm": 0.0101318359375, "learning_rate": 1.8143285060857784e-05, "loss": 0.0002, "step": 17250 }, { "epoch": 8.051808634772462, "grad_norm": 0.01055908203125, "learning_rate": 1.8134857334464627e-05, "loss": 0.0001, "step": 17251 }, { "epoch": 8.052275379229872, "grad_norm": 0.007598876953125, "learning_rate": 1.812643137071881e-05, "loss": 0.0002, "step": 17252 }, { "epoch": 8.052742123687281, "grad_norm": 0.005950927734375, "learning_rate": 1.8118007169801755e-05, "loss": 0.0001, "step": 17253 }, { "epoch": 8.05320886814469, "grad_norm": 0.014892578125, "learning_rate": 1.810958473189489e-05, "loss": 0.0002, "step": 17254 }, { "epoch": 8.0536756126021, "grad_norm": 0.03857421875, "learning_rate": 1.8101164057179488e-05, "loss": 0.002, "step": 17255 }, { "epoch": 8.05414235705951, "grad_norm": 0.01141357421875, "learning_rate": 1.80927451458369e-05, "loss": 0.0001, "step": 17256 }, { "epoch": 8.05460910151692, "grad_norm": 0.00958251953125, "learning_rate": 1.808432799804841e-05, "loss": 0.0002, "step": 17257 }, { "epoch": 8.05507584597433, "grad_norm": 0.00921630859375, "learning_rate": 1.8075912613995203e-05, "loss": 0.0001, "step": 17258 }, { "epoch": 8.055542590431738, "grad_norm": 0.006866455078125, "learning_rate": 1.8067498993858513e-05, "loss": 0.0001, "step": 17259 }, { "epoch": 8.056009334889149, "grad_norm": 0.023681640625, "learning_rate": 1.8059087137819453e-05, "loss": 0.0022, "step": 17260 }, { "epoch": 8.056476079346558, "grad_norm": 0.0155029296875, "learning_rate": 1.805067704605916e-05, "loss": 0.0002, "step": 17261 }, { "epoch": 8.056942823803967, "grad_norm": 0.04345703125, "learning_rate": 1.804226871875875e-05, "loss": 0.0032, "step": 17262 }, { "epoch": 8.057409568261377, "grad_norm": 0.02001953125, "learning_rate": 1.803386215609919e-05, "loss": 0.0002, "step": 17263 }, { "epoch": 8.057876312718786, "grad_norm": 0.0034332275390625, "learning_rate": 1.802545735826152e-05, "loss": 0.0001, "step": 17264 }, { "epoch": 8.058343057176195, "grad_norm": 0.00689697265625, "learning_rate": 1.801705432542674e-05, "loss": 0.0002, "step": 17265 }, { "epoch": 8.058809801633606, "grad_norm": 0.09423828125, "learning_rate": 1.8008653057775715e-05, "loss": 0.0021, "step": 17266 }, { "epoch": 8.059276546091015, "grad_norm": 0.007080078125, "learning_rate": 1.8000253555489356e-05, "loss": 0.0001, "step": 17267 }, { "epoch": 8.059743290548425, "grad_norm": 0.0419921875, "learning_rate": 1.799185581874855e-05, "loss": 0.0022, "step": 17268 }, { "epoch": 8.060210035005834, "grad_norm": 0.035400390625, "learning_rate": 1.7983459847734053e-05, "loss": 0.0002, "step": 17269 }, { "epoch": 8.060676779463243, "grad_norm": 0.0263671875, "learning_rate": 1.7975065642626652e-05, "loss": 0.0002, "step": 17270 }, { "epoch": 8.061143523920654, "grad_norm": 0.0654296875, "learning_rate": 1.7966673203607132e-05, "loss": 0.0003, "step": 17271 }, { "epoch": 8.061610268378063, "grad_norm": 0.00579833984375, "learning_rate": 1.795828253085613e-05, "loss": 0.0001, "step": 17272 }, { "epoch": 8.062077012835472, "grad_norm": 0.005859375, "learning_rate": 1.7949893624554326e-05, "loss": 0.0001, "step": 17273 }, { "epoch": 8.062543757292882, "grad_norm": 0.0260009765625, "learning_rate": 1.7941506484882375e-05, "loss": 0.0002, "step": 17274 }, { "epoch": 8.063010501750291, "grad_norm": 0.00848388671875, "learning_rate": 1.7933121112020812e-05, "loss": 0.0002, "step": 17275 }, { "epoch": 8.063477246207702, "grad_norm": 0.0230712890625, "learning_rate": 1.7924737506150236e-05, "loss": 0.0002, "step": 17276 }, { "epoch": 8.063943990665111, "grad_norm": 0.00677490234375, "learning_rate": 1.79163556674511e-05, "loss": 0.0001, "step": 17277 }, { "epoch": 8.06441073512252, "grad_norm": 0.00885009765625, "learning_rate": 1.790797559610389e-05, "loss": 0.0002, "step": 17278 }, { "epoch": 8.06487747957993, "grad_norm": 0.005767822265625, "learning_rate": 1.7899597292289083e-05, "loss": 0.0001, "step": 17279 }, { "epoch": 8.06534422403734, "grad_norm": 0.01123046875, "learning_rate": 1.789122075618701e-05, "loss": 0.0001, "step": 17280 }, { "epoch": 8.065810968494748, "grad_norm": 0.0230712890625, "learning_rate": 1.7882845987978048e-05, "loss": 0.0001, "step": 17281 }, { "epoch": 8.066277712952159, "grad_norm": 0.007171630859375, "learning_rate": 1.7874472987842562e-05, "loss": 0.0001, "step": 17282 }, { "epoch": 8.066744457409568, "grad_norm": 0.00970458984375, "learning_rate": 1.7866101755960752e-05, "loss": 0.0002, "step": 17283 }, { "epoch": 8.067211201866979, "grad_norm": 0.009521484375, "learning_rate": 1.785773229251291e-05, "loss": 0.0001, "step": 17284 }, { "epoch": 8.067677946324388, "grad_norm": 0.0284423828125, "learning_rate": 1.7849364597679252e-05, "loss": 0.0001, "step": 17285 }, { "epoch": 8.068144690781796, "grad_norm": 0.028076171875, "learning_rate": 1.7840998671639897e-05, "loss": 0.0002, "step": 17286 }, { "epoch": 8.068611435239207, "grad_norm": 0.009521484375, "learning_rate": 1.7832634514575018e-05, "loss": 0.0002, "step": 17287 }, { "epoch": 8.069078179696616, "grad_norm": 0.02880859375, "learning_rate": 1.78242721266647e-05, "loss": 0.0002, "step": 17288 }, { "epoch": 8.069544924154025, "grad_norm": 0.017578125, "learning_rate": 1.7815911508088932e-05, "loss": 0.0002, "step": 17289 }, { "epoch": 8.070011668611436, "grad_norm": 0.012451171875, "learning_rate": 1.7807552659027792e-05, "loss": 0.0001, "step": 17290 }, { "epoch": 8.070478413068845, "grad_norm": 0.014892578125, "learning_rate": 1.7799195579661255e-05, "loss": 0.0002, "step": 17291 }, { "epoch": 8.070945157526255, "grad_norm": 0.00701904296875, "learning_rate": 1.7790840270169217e-05, "loss": 0.0001, "step": 17292 }, { "epoch": 8.071411901983664, "grad_norm": 0.021484375, "learning_rate": 1.7782486730731596e-05, "loss": 0.0001, "step": 17293 }, { "epoch": 8.071878646441073, "grad_norm": 0.034423828125, "learning_rate": 1.7774134961528288e-05, "loss": 0.0019, "step": 17294 }, { "epoch": 8.072345390898484, "grad_norm": 0.04736328125, "learning_rate": 1.7765784962739062e-05, "loss": 0.0021, "step": 17295 }, { "epoch": 8.072812135355893, "grad_norm": 0.0810546875, "learning_rate": 1.7757436734543718e-05, "loss": 0.0036, "step": 17296 }, { "epoch": 8.073278879813302, "grad_norm": 0.490234375, "learning_rate": 1.7749090277122037e-05, "loss": 0.0057, "step": 17297 }, { "epoch": 8.073745624270712, "grad_norm": 0.18359375, "learning_rate": 1.7740745590653683e-05, "loss": 0.0005, "step": 17298 }, { "epoch": 8.074212368728121, "grad_norm": 0.00665283203125, "learning_rate": 1.773240267531835e-05, "loss": 0.0001, "step": 17299 }, { "epoch": 8.074679113185532, "grad_norm": 0.04638671875, "learning_rate": 1.772406153129568e-05, "loss": 0.0002, "step": 17300 }, { "epoch": 8.07514585764294, "grad_norm": 0.0242919921875, "learning_rate": 1.7715722158765224e-05, "loss": 0.0002, "step": 17301 }, { "epoch": 8.07561260210035, "grad_norm": 0.0106201171875, "learning_rate": 1.770738455790657e-05, "loss": 0.0002, "step": 17302 }, { "epoch": 8.07607934655776, "grad_norm": 0.056396484375, "learning_rate": 1.769904872889926e-05, "loss": 0.0034, "step": 17303 }, { "epoch": 8.07654609101517, "grad_norm": 0.0191650390625, "learning_rate": 1.7690714671922714e-05, "loss": 0.0002, "step": 17304 }, { "epoch": 8.077012835472578, "grad_norm": 0.01055908203125, "learning_rate": 1.7682382387156405e-05, "loss": 0.0001, "step": 17305 }, { "epoch": 8.077479579929989, "grad_norm": 0.0084228515625, "learning_rate": 1.7674051874779775e-05, "loss": 0.0001, "step": 17306 }, { "epoch": 8.077946324387398, "grad_norm": 0.01806640625, "learning_rate": 1.766572313497211e-05, "loss": 0.0002, "step": 17307 }, { "epoch": 8.078413068844807, "grad_norm": 0.0145263671875, "learning_rate": 1.765739616791281e-05, "loss": 0.0002, "step": 17308 }, { "epoch": 8.078879813302217, "grad_norm": 0.051513671875, "learning_rate": 1.7649070973781102e-05, "loss": 0.0026, "step": 17309 }, { "epoch": 8.079346557759626, "grad_norm": 0.0081787109375, "learning_rate": 1.7640747552756263e-05, "loss": 0.0001, "step": 17310 }, { "epoch": 8.079813302217037, "grad_norm": 0.034912109375, "learning_rate": 1.7632425905017535e-05, "loss": 0.0026, "step": 17311 }, { "epoch": 8.080280046674446, "grad_norm": 0.01214599609375, "learning_rate": 1.7624106030744046e-05, "loss": 0.0001, "step": 17312 }, { "epoch": 8.080746791131855, "grad_norm": 0.015380859375, "learning_rate": 1.7615787930114946e-05, "loss": 0.0002, "step": 17313 }, { "epoch": 8.081213535589265, "grad_norm": 0.0115966796875, "learning_rate": 1.7607471603309365e-05, "loss": 0.0001, "step": 17314 }, { "epoch": 8.081680280046674, "grad_norm": 0.007476806640625, "learning_rate": 1.759915705050632e-05, "loss": 0.0001, "step": 17315 }, { "epoch": 8.082147024504083, "grad_norm": 0.0107421875, "learning_rate": 1.759084427188484e-05, "loss": 0.0002, "step": 17316 }, { "epoch": 8.082613768961494, "grad_norm": 0.0081787109375, "learning_rate": 1.7582533267623946e-05, "loss": 0.0002, "step": 17317 }, { "epoch": 8.083080513418903, "grad_norm": 0.01080322265625, "learning_rate": 1.757422403790253e-05, "loss": 0.0002, "step": 17318 }, { "epoch": 8.083547257876313, "grad_norm": 0.00946044921875, "learning_rate": 1.7565916582899524e-05, "loss": 0.0002, "step": 17319 }, { "epoch": 8.084014002333722, "grad_norm": 0.0108642578125, "learning_rate": 1.755761090279382e-05, "loss": 0.0002, "step": 17320 }, { "epoch": 8.084480746791131, "grad_norm": 0.00872802734375, "learning_rate": 1.7549306997764193e-05, "loss": 0.0002, "step": 17321 }, { "epoch": 8.084947491248542, "grad_norm": 0.01904296875, "learning_rate": 1.7541004867989475e-05, "loss": 0.0002, "step": 17322 }, { "epoch": 8.08541423570595, "grad_norm": 0.01507568359375, "learning_rate": 1.7532704513648436e-05, "loss": 0.0002, "step": 17323 }, { "epoch": 8.08588098016336, "grad_norm": 0.0498046875, "learning_rate": 1.7524405934919742e-05, "loss": 0.002, "step": 17324 }, { "epoch": 8.08634772462077, "grad_norm": 0.01708984375, "learning_rate": 1.7516109131982093e-05, "loss": 0.0002, "step": 17325 }, { "epoch": 8.08681446907818, "grad_norm": 0.00823974609375, "learning_rate": 1.7507814105014152e-05, "loss": 0.0002, "step": 17326 }, { "epoch": 8.08728121353559, "grad_norm": 0.01129150390625, "learning_rate": 1.7499520854194484e-05, "loss": 0.0002, "step": 17327 }, { "epoch": 8.087747957992999, "grad_norm": 0.0120849609375, "learning_rate": 1.749122937970169e-05, "loss": 0.0002, "step": 17328 }, { "epoch": 8.088214702450408, "grad_norm": 0.0123291015625, "learning_rate": 1.748293968171425e-05, "loss": 0.0002, "step": 17329 }, { "epoch": 8.088681446907819, "grad_norm": 0.005859375, "learning_rate": 1.7474651760410676e-05, "loss": 0.0001, "step": 17330 }, { "epoch": 8.089148191365227, "grad_norm": 0.046630859375, "learning_rate": 1.7466365615969437e-05, "loss": 0.0002, "step": 17331 }, { "epoch": 8.089614935822636, "grad_norm": 0.005950927734375, "learning_rate": 1.7458081248568892e-05, "loss": 0.0001, "step": 17332 }, { "epoch": 8.090081680280047, "grad_norm": 0.01068115234375, "learning_rate": 1.744979865838743e-05, "loss": 0.0002, "step": 17333 }, { "epoch": 8.090548424737456, "grad_norm": 0.05859375, "learning_rate": 1.744151784560343e-05, "loss": 0.0019, "step": 17334 }, { "epoch": 8.091015169194867, "grad_norm": 0.048583984375, "learning_rate": 1.743323881039515e-05, "loss": 0.0017, "step": 17335 }, { "epoch": 8.091481913652276, "grad_norm": 0.0673828125, "learning_rate": 1.742496155294082e-05, "loss": 0.0042, "step": 17336 }, { "epoch": 8.091948658109684, "grad_norm": 0.00897216796875, "learning_rate": 1.7416686073418685e-05, "loss": 0.0002, "step": 17337 }, { "epoch": 8.092415402567095, "grad_norm": 0.007080078125, "learning_rate": 1.7408412372006954e-05, "loss": 0.0002, "step": 17338 }, { "epoch": 8.092882147024504, "grad_norm": 0.0186767578125, "learning_rate": 1.7400140448883717e-05, "loss": 0.0001, "step": 17339 }, { "epoch": 8.093348891481913, "grad_norm": 0.0169677734375, "learning_rate": 1.7391870304227133e-05, "loss": 0.0002, "step": 17340 }, { "epoch": 8.093815635939324, "grad_norm": 0.035400390625, "learning_rate": 1.73836019382152e-05, "loss": 0.0002, "step": 17341 }, { "epoch": 8.094282380396733, "grad_norm": 0.0185546875, "learning_rate": 1.737533535102599e-05, "loss": 0.0002, "step": 17342 }, { "epoch": 8.094749124854143, "grad_norm": 0.006591796875, "learning_rate": 1.7367070542837503e-05, "loss": 0.0001, "step": 17343 }, { "epoch": 8.095215869311552, "grad_norm": 0.0517578125, "learning_rate": 1.7358807513827648e-05, "loss": 0.0002, "step": 17344 }, { "epoch": 8.095682613768961, "grad_norm": 0.01171875, "learning_rate": 1.7350546264174362e-05, "loss": 0.0001, "step": 17345 }, { "epoch": 8.096149358226372, "grad_norm": 0.010498046875, "learning_rate": 1.734228679405554e-05, "loss": 0.0001, "step": 17346 }, { "epoch": 8.09661610268378, "grad_norm": 0.015380859375, "learning_rate": 1.733402910364896e-05, "loss": 0.0002, "step": 17347 }, { "epoch": 8.09708284714119, "grad_norm": 0.0118408203125, "learning_rate": 1.7325773193132466e-05, "loss": 0.0001, "step": 17348 }, { "epoch": 8.0975495915986, "grad_norm": 0.02734375, "learning_rate": 1.731751906268382e-05, "loss": 0.0002, "step": 17349 }, { "epoch": 8.09801633605601, "grad_norm": 0.040283203125, "learning_rate": 1.730926671248071e-05, "loss": 0.0023, "step": 17350 }, { "epoch": 8.098483080513418, "grad_norm": 0.0142822265625, "learning_rate": 1.7301016142700823e-05, "loss": 0.0001, "step": 17351 }, { "epoch": 8.098949824970829, "grad_norm": 0.0093994140625, "learning_rate": 1.7292767353521843e-05, "loss": 0.0001, "step": 17352 }, { "epoch": 8.099416569428238, "grad_norm": 0.00518798828125, "learning_rate": 1.7284520345121323e-05, "loss": 0.0001, "step": 17353 }, { "epoch": 8.099883313885648, "grad_norm": 0.005157470703125, "learning_rate": 1.7276275117676855e-05, "loss": 0.0001, "step": 17354 }, { "epoch": 8.100350058343057, "grad_norm": 0.009765625, "learning_rate": 1.726803167136599e-05, "loss": 0.0001, "step": 17355 }, { "epoch": 8.100816802800466, "grad_norm": 0.130859375, "learning_rate": 1.7259790006366184e-05, "loss": 0.0004, "step": 17356 }, { "epoch": 8.101283547257877, "grad_norm": 0.006683349609375, "learning_rate": 1.7251550122854908e-05, "loss": 0.0001, "step": 17357 }, { "epoch": 8.101750291715286, "grad_norm": 0.0137939453125, "learning_rate": 1.7243312021009562e-05, "loss": 0.0002, "step": 17358 }, { "epoch": 8.102217036172695, "grad_norm": 0.0179443359375, "learning_rate": 1.723507570100751e-05, "loss": 0.0002, "step": 17359 }, { "epoch": 8.102683780630105, "grad_norm": 0.037353515625, "learning_rate": 1.7226841163026142e-05, "loss": 0.0002, "step": 17360 }, { "epoch": 8.103150525087514, "grad_norm": 0.00897216796875, "learning_rate": 1.7218608407242697e-05, "loss": 0.0002, "step": 17361 }, { "epoch": 8.103617269544925, "grad_norm": 0.007415771484375, "learning_rate": 1.7210377433834457e-05, "loss": 0.0001, "step": 17362 }, { "epoch": 8.104084014002334, "grad_norm": 0.00958251953125, "learning_rate": 1.7202148242978676e-05, "loss": 0.0001, "step": 17363 }, { "epoch": 8.104550758459743, "grad_norm": 0.048583984375, "learning_rate": 1.7193920834852484e-05, "loss": 0.0002, "step": 17364 }, { "epoch": 8.105017502917153, "grad_norm": 0.01177978515625, "learning_rate": 1.7185695209633045e-05, "loss": 0.0002, "step": 17365 }, { "epoch": 8.105484247374562, "grad_norm": 0.005950927734375, "learning_rate": 1.71774713674975e-05, "loss": 0.0001, "step": 17366 }, { "epoch": 8.105950991831971, "grad_norm": 0.01031494140625, "learning_rate": 1.716924930862287e-05, "loss": 0.0002, "step": 17367 }, { "epoch": 8.106417736289382, "grad_norm": 0.04150390625, "learning_rate": 1.7161029033186205e-05, "loss": 0.0022, "step": 17368 }, { "epoch": 8.10688448074679, "grad_norm": 0.006988525390625, "learning_rate": 1.7152810541364517e-05, "loss": 0.0001, "step": 17369 }, { "epoch": 8.107351225204201, "grad_norm": 0.00653076171875, "learning_rate": 1.714459383333471e-05, "loss": 0.0001, "step": 17370 }, { "epoch": 8.10781796966161, "grad_norm": 0.037353515625, "learning_rate": 1.7136378909273732e-05, "loss": 0.0002, "step": 17371 }, { "epoch": 8.10828471411902, "grad_norm": 0.0086669921875, "learning_rate": 1.7128165769358485e-05, "loss": 0.0002, "step": 17372 }, { "epoch": 8.10875145857643, "grad_norm": 0.004608154296875, "learning_rate": 1.711995441376575e-05, "loss": 0.0001, "step": 17373 }, { "epoch": 8.109218203033839, "grad_norm": 0.01031494140625, "learning_rate": 1.7111744842672338e-05, "loss": 0.0002, "step": 17374 }, { "epoch": 8.109684947491248, "grad_norm": 0.037841796875, "learning_rate": 1.710353705625507e-05, "loss": 0.0002, "step": 17375 }, { "epoch": 8.110151691948658, "grad_norm": 0.0111083984375, "learning_rate": 1.709533105469059e-05, "loss": 0.0002, "step": 17376 }, { "epoch": 8.110618436406067, "grad_norm": 0.0296630859375, "learning_rate": 1.708712683815564e-05, "loss": 0.0018, "step": 17377 }, { "epoch": 8.111085180863478, "grad_norm": 0.0093994140625, "learning_rate": 1.7078924406826823e-05, "loss": 0.0002, "step": 17378 }, { "epoch": 8.111551925320887, "grad_norm": 0.006927490234375, "learning_rate": 1.7070723760880767e-05, "loss": 0.0002, "step": 17379 }, { "epoch": 8.112018669778296, "grad_norm": 0.0146484375, "learning_rate": 1.706252490049406e-05, "loss": 0.0002, "step": 17380 }, { "epoch": 8.112485414235707, "grad_norm": 0.0279541015625, "learning_rate": 1.7054327825843196e-05, "loss": 0.0002, "step": 17381 }, { "epoch": 8.112952158693115, "grad_norm": 0.01092529296875, "learning_rate": 1.7046132537104697e-05, "loss": 0.0002, "step": 17382 }, { "epoch": 8.113418903150524, "grad_norm": 0.01531982421875, "learning_rate": 1.7037939034454984e-05, "loss": 0.0002, "step": 17383 }, { "epoch": 8.113885647607935, "grad_norm": 0.01171875, "learning_rate": 1.702974731807052e-05, "loss": 0.0002, "step": 17384 }, { "epoch": 8.114352392065344, "grad_norm": 0.006195068359375, "learning_rate": 1.702155738812763e-05, "loss": 0.0001, "step": 17385 }, { "epoch": 8.114819136522755, "grad_norm": 0.02783203125, "learning_rate": 1.701336924480267e-05, "loss": 0.0023, "step": 17386 }, { "epoch": 8.115285880980164, "grad_norm": 0.0216064453125, "learning_rate": 1.7005182888271965e-05, "loss": 0.0002, "step": 17387 }, { "epoch": 8.115752625437572, "grad_norm": 0.006439208984375, "learning_rate": 1.6996998318711743e-05, "loss": 0.0001, "step": 17388 }, { "epoch": 8.116219369894983, "grad_norm": 0.0634765625, "learning_rate": 1.6988815536298253e-05, "loss": 0.0049, "step": 17389 }, { "epoch": 8.116686114352392, "grad_norm": 0.0089111328125, "learning_rate": 1.698063454120765e-05, "loss": 0.0002, "step": 17390 }, { "epoch": 8.117152858809801, "grad_norm": 0.007110595703125, "learning_rate": 1.69724553336161e-05, "loss": 0.0001, "step": 17391 }, { "epoch": 8.117619603267212, "grad_norm": 0.021240234375, "learning_rate": 1.6964277913699732e-05, "loss": 0.0002, "step": 17392 }, { "epoch": 8.11808634772462, "grad_norm": 0.03564453125, "learning_rate": 1.6956102281634556e-05, "loss": 0.0002, "step": 17393 }, { "epoch": 8.11855309218203, "grad_norm": 0.009765625, "learning_rate": 1.694792843759665e-05, "loss": 0.0001, "step": 17394 }, { "epoch": 8.11901983663944, "grad_norm": 0.01611328125, "learning_rate": 1.6939756381762008e-05, "loss": 0.0002, "step": 17395 }, { "epoch": 8.119486581096849, "grad_norm": 0.006988525390625, "learning_rate": 1.693158611430654e-05, "loss": 0.0002, "step": 17396 }, { "epoch": 8.11995332555426, "grad_norm": 0.00628662109375, "learning_rate": 1.69234176354062e-05, "loss": 0.0001, "step": 17397 }, { "epoch": 8.120420070011669, "grad_norm": 0.011962890625, "learning_rate": 1.691525094523687e-05, "loss": 0.0001, "step": 17398 }, { "epoch": 8.120886814469078, "grad_norm": 0.00537109375, "learning_rate": 1.690708604397435e-05, "loss": 0.0001, "step": 17399 }, { "epoch": 8.121353558926488, "grad_norm": 0.0203857421875, "learning_rate": 1.689892293179446e-05, "loss": 0.0002, "step": 17400 }, { "epoch": 8.121820303383897, "grad_norm": 0.00927734375, "learning_rate": 1.6890761608872986e-05, "loss": 0.0002, "step": 17401 }, { "epoch": 8.122287047841306, "grad_norm": 0.021484375, "learning_rate": 1.68826020753856e-05, "loss": 0.0002, "step": 17402 }, { "epoch": 8.122753792298717, "grad_norm": 0.00921630859375, "learning_rate": 1.687444433150801e-05, "loss": 0.0001, "step": 17403 }, { "epoch": 8.123220536756126, "grad_norm": 0.006622314453125, "learning_rate": 1.6866288377415894e-05, "loss": 0.0001, "step": 17404 }, { "epoch": 8.123687281213536, "grad_norm": 0.01116943359375, "learning_rate": 1.6858134213284804e-05, "loss": 0.0002, "step": 17405 }, { "epoch": 8.124154025670945, "grad_norm": 0.0478515625, "learning_rate": 1.6849981839290352e-05, "loss": 0.0014, "step": 17406 }, { "epoch": 8.124620770128354, "grad_norm": 0.12255859375, "learning_rate": 1.684183125560802e-05, "loss": 0.0003, "step": 17407 }, { "epoch": 8.125087514585765, "grad_norm": 0.10400390625, "learning_rate": 1.6833682462413326e-05, "loss": 0.0005, "step": 17408 }, { "epoch": 8.125554259043174, "grad_norm": 0.0142822265625, "learning_rate": 1.682553545988175e-05, "loss": 0.0002, "step": 17409 }, { "epoch": 8.126021003500583, "grad_norm": 0.0093994140625, "learning_rate": 1.6817390248188658e-05, "loss": 0.0001, "step": 17410 }, { "epoch": 8.126487747957993, "grad_norm": 0.01708984375, "learning_rate": 1.6809246827509438e-05, "loss": 0.0002, "step": 17411 }, { "epoch": 8.126954492415402, "grad_norm": 0.032958984375, "learning_rate": 1.6801105198019472e-05, "loss": 0.002, "step": 17412 }, { "epoch": 8.127421236872813, "grad_norm": 0.0181884765625, "learning_rate": 1.6792965359893997e-05, "loss": 0.0002, "step": 17413 }, { "epoch": 8.127887981330222, "grad_norm": 0.0126953125, "learning_rate": 1.6784827313308295e-05, "loss": 0.0001, "step": 17414 }, { "epoch": 8.12835472578763, "grad_norm": 0.01312255859375, "learning_rate": 1.677669105843761e-05, "loss": 0.0002, "step": 17415 }, { "epoch": 8.128821470245041, "grad_norm": 0.0087890625, "learning_rate": 1.6768556595457074e-05, "loss": 0.0001, "step": 17416 }, { "epoch": 8.12928821470245, "grad_norm": 0.01214599609375, "learning_rate": 1.676042392454187e-05, "loss": 0.0002, "step": 17417 }, { "epoch": 8.12975495915986, "grad_norm": 0.01025390625, "learning_rate": 1.675229304586712e-05, "loss": 0.0002, "step": 17418 }, { "epoch": 8.13022170361727, "grad_norm": 0.019287109375, "learning_rate": 1.6744163959607838e-05, "loss": 0.0002, "step": 17419 }, { "epoch": 8.130688448074679, "grad_norm": 0.007659912109375, "learning_rate": 1.673603666593908e-05, "loss": 0.0001, "step": 17420 }, { "epoch": 8.13115519253209, "grad_norm": 0.0172119140625, "learning_rate": 1.672791116503587e-05, "loss": 0.0002, "step": 17421 }, { "epoch": 8.131621936989498, "grad_norm": 0.0537109375, "learning_rate": 1.671978745707309e-05, "loss": 0.0003, "step": 17422 }, { "epoch": 8.132088681446907, "grad_norm": 0.0279541015625, "learning_rate": 1.6711665542225684e-05, "loss": 0.0002, "step": 17423 }, { "epoch": 8.132555425904318, "grad_norm": 0.0106201171875, "learning_rate": 1.6703545420668566e-05, "loss": 0.0001, "step": 17424 }, { "epoch": 8.133022170361727, "grad_norm": 0.01318359375, "learning_rate": 1.66954270925765e-05, "loss": 0.0002, "step": 17425 }, { "epoch": 8.133488914819136, "grad_norm": 0.006378173828125, "learning_rate": 1.6687310558124337e-05, "loss": 0.0001, "step": 17426 }, { "epoch": 8.133955659276547, "grad_norm": 0.09716796875, "learning_rate": 1.6679195817486792e-05, "loss": 0.0003, "step": 17427 }, { "epoch": 8.134422403733955, "grad_norm": 0.06298828125, "learning_rate": 1.6671082870838616e-05, "loss": 0.0021, "step": 17428 }, { "epoch": 8.134889148191366, "grad_norm": 0.007293701171875, "learning_rate": 1.6662971718354504e-05, "loss": 0.0001, "step": 17429 }, { "epoch": 8.135355892648775, "grad_norm": 0.00836181640625, "learning_rate": 1.6654862360209067e-05, "loss": 0.0002, "step": 17430 }, { "epoch": 8.135822637106184, "grad_norm": 0.0137939453125, "learning_rate": 1.66467547965769e-05, "loss": 0.0002, "step": 17431 }, { "epoch": 8.136289381563595, "grad_norm": 0.04443359375, "learning_rate": 1.663864902763258e-05, "loss": 0.0003, "step": 17432 }, { "epoch": 8.136756126021004, "grad_norm": 0.009033203125, "learning_rate": 1.6630545053550662e-05, "loss": 0.0002, "step": 17433 }, { "epoch": 8.137222870478412, "grad_norm": 0.006500244140625, "learning_rate": 1.6622442874505584e-05, "loss": 0.0001, "step": 17434 }, { "epoch": 8.137689614935823, "grad_norm": 0.00714111328125, "learning_rate": 1.661434249067182e-05, "loss": 0.0002, "step": 17435 }, { "epoch": 8.138156359393232, "grad_norm": 0.0177001953125, "learning_rate": 1.6606243902223806e-05, "loss": 0.0002, "step": 17436 }, { "epoch": 8.138623103850641, "grad_norm": 0.0272216796875, "learning_rate": 1.659814710933586e-05, "loss": 0.002, "step": 17437 }, { "epoch": 8.139089848308052, "grad_norm": 0.01226806640625, "learning_rate": 1.6590052112182376e-05, "loss": 0.0002, "step": 17438 }, { "epoch": 8.13955659276546, "grad_norm": 0.0093994140625, "learning_rate": 1.6581958910937578e-05, "loss": 0.0001, "step": 17439 }, { "epoch": 8.140023337222871, "grad_norm": 0.006011962890625, "learning_rate": 1.6573867505775764e-05, "loss": 0.0001, "step": 17440 }, { "epoch": 8.14049008168028, "grad_norm": 0.049560546875, "learning_rate": 1.656577789687117e-05, "loss": 0.0002, "step": 17441 }, { "epoch": 8.140956826137689, "grad_norm": 0.00482177734375, "learning_rate": 1.6557690084397924e-05, "loss": 0.0001, "step": 17442 }, { "epoch": 8.1414235705951, "grad_norm": 0.01416015625, "learning_rate": 1.65496040685302e-05, "loss": 0.0002, "step": 17443 }, { "epoch": 8.141890315052509, "grad_norm": 0.04931640625, "learning_rate": 1.654151984944211e-05, "loss": 0.0002, "step": 17444 }, { "epoch": 8.142357059509918, "grad_norm": 0.0123291015625, "learning_rate": 1.6533437427307662e-05, "loss": 0.0002, "step": 17445 }, { "epoch": 8.142823803967328, "grad_norm": 0.0185546875, "learning_rate": 1.652535680230093e-05, "loss": 0.0001, "step": 17446 }, { "epoch": 8.143290548424737, "grad_norm": 0.05908203125, "learning_rate": 1.65172779745959e-05, "loss": 0.0029, "step": 17447 }, { "epoch": 8.143757292882148, "grad_norm": 0.009521484375, "learning_rate": 1.6509200944366477e-05, "loss": 0.0002, "step": 17448 }, { "epoch": 8.144224037339557, "grad_norm": 0.11181640625, "learning_rate": 1.6501125711786592e-05, "loss": 0.0029, "step": 17449 }, { "epoch": 8.144690781796966, "grad_norm": 0.01312255859375, "learning_rate": 1.649305227703013e-05, "loss": 0.0002, "step": 17450 }, { "epoch": 8.145157526254376, "grad_norm": 0.0191650390625, "learning_rate": 1.6484980640270898e-05, "loss": 0.0001, "step": 17451 }, { "epoch": 8.145624270711785, "grad_norm": 0.006011962890625, "learning_rate": 1.6476910801682677e-05, "loss": 0.0001, "step": 17452 }, { "epoch": 8.146091015169194, "grad_norm": 0.0245361328125, "learning_rate": 1.6468842761439274e-05, "loss": 0.0002, "step": 17453 }, { "epoch": 8.146557759626605, "grad_norm": 0.011962890625, "learning_rate": 1.646077651971433e-05, "loss": 0.0002, "step": 17454 }, { "epoch": 8.147024504084014, "grad_norm": 0.0206298828125, "learning_rate": 1.6452712076681566e-05, "loss": 0.0002, "step": 17455 }, { "epoch": 8.147491248541424, "grad_norm": 0.01129150390625, "learning_rate": 1.6444649432514626e-05, "loss": 0.0002, "step": 17456 }, { "epoch": 8.147957992998833, "grad_norm": 0.006805419921875, "learning_rate": 1.643658858738707e-05, "loss": 0.0001, "step": 17457 }, { "epoch": 8.148424737456242, "grad_norm": 0.01348876953125, "learning_rate": 1.6428529541472503e-05, "loss": 0.0002, "step": 17458 }, { "epoch": 8.148891481913653, "grad_norm": 0.01165771484375, "learning_rate": 1.642047229494439e-05, "loss": 0.0001, "step": 17459 }, { "epoch": 8.149358226371062, "grad_norm": 0.01239013671875, "learning_rate": 1.6412416847976243e-05, "loss": 0.0002, "step": 17460 }, { "epoch": 8.14982497082847, "grad_norm": 0.0086669921875, "learning_rate": 1.6404363200741523e-05, "loss": 0.0001, "step": 17461 }, { "epoch": 8.150291715285881, "grad_norm": 0.006622314453125, "learning_rate": 1.6396311353413595e-05, "loss": 0.0001, "step": 17462 }, { "epoch": 8.15075845974329, "grad_norm": 0.052978515625, "learning_rate": 1.6388261306165843e-05, "loss": 0.0002, "step": 17463 }, { "epoch": 8.151225204200701, "grad_norm": 0.06396484375, "learning_rate": 1.6380213059171623e-05, "loss": 0.003, "step": 17464 }, { "epoch": 8.15169194865811, "grad_norm": 0.02001953125, "learning_rate": 1.6372166612604168e-05, "loss": 0.0002, "step": 17465 }, { "epoch": 8.152158693115519, "grad_norm": 0.044677734375, "learning_rate": 1.6364121966636746e-05, "loss": 0.0017, "step": 17466 }, { "epoch": 8.15262543757293, "grad_norm": 0.0400390625, "learning_rate": 1.6356079121442603e-05, "loss": 0.0015, "step": 17467 }, { "epoch": 8.153092182030338, "grad_norm": 0.045166015625, "learning_rate": 1.634803807719485e-05, "loss": 0.0003, "step": 17468 }, { "epoch": 8.153558926487747, "grad_norm": 0.01806640625, "learning_rate": 1.633999883406665e-05, "loss": 0.0002, "step": 17469 }, { "epoch": 8.154025670945158, "grad_norm": 0.0107421875, "learning_rate": 1.633196139223113e-05, "loss": 0.0001, "step": 17470 }, { "epoch": 8.154492415402567, "grad_norm": 0.01171875, "learning_rate": 1.632392575186129e-05, "loss": 0.0002, "step": 17471 }, { "epoch": 8.154959159859978, "grad_norm": 0.03955078125, "learning_rate": 1.6315891913130167e-05, "loss": 0.0014, "step": 17472 }, { "epoch": 8.155425904317386, "grad_norm": 0.00909423828125, "learning_rate": 1.630785987621075e-05, "loss": 0.0002, "step": 17473 }, { "epoch": 8.155892648774795, "grad_norm": 0.0108642578125, "learning_rate": 1.6299829641275965e-05, "loss": 0.0001, "step": 17474 }, { "epoch": 8.156359393232206, "grad_norm": 0.00469970703125, "learning_rate": 1.6291801208498723e-05, "loss": 0.0001, "step": 17475 }, { "epoch": 8.156826137689615, "grad_norm": 0.04345703125, "learning_rate": 1.628377457805186e-05, "loss": 0.0019, "step": 17476 }, { "epoch": 8.157292882147024, "grad_norm": 0.032470703125, "learning_rate": 1.6275749750108215e-05, "loss": 0.0002, "step": 17477 }, { "epoch": 8.157759626604435, "grad_norm": 0.052978515625, "learning_rate": 1.6267726724840592e-05, "loss": 0.0002, "step": 17478 }, { "epoch": 8.158226371061843, "grad_norm": 0.10888671875, "learning_rate": 1.625970550242172e-05, "loss": 0.0003, "step": 17479 }, { "epoch": 8.158693115519252, "grad_norm": 0.00946044921875, "learning_rate": 1.6251686083024276e-05, "loss": 0.0001, "step": 17480 }, { "epoch": 8.159159859976663, "grad_norm": 0.0096435546875, "learning_rate": 1.624366846682095e-05, "loss": 0.0002, "step": 17481 }, { "epoch": 8.159626604434072, "grad_norm": 0.050537109375, "learning_rate": 1.623565265398439e-05, "loss": 0.0038, "step": 17482 }, { "epoch": 8.160093348891483, "grad_norm": 0.00543212890625, "learning_rate": 1.6227638644687148e-05, "loss": 0.0001, "step": 17483 }, { "epoch": 8.160560093348892, "grad_norm": 0.020751953125, "learning_rate": 1.6219626439101786e-05, "loss": 0.0006, "step": 17484 }, { "epoch": 8.1610268378063, "grad_norm": 0.00738525390625, "learning_rate": 1.6211616037400858e-05, "loss": 0.0001, "step": 17485 }, { "epoch": 8.161493582263711, "grad_norm": 0.0185546875, "learning_rate": 1.6203607439756773e-05, "loss": 0.0001, "step": 17486 }, { "epoch": 8.16196032672112, "grad_norm": 0.0107421875, "learning_rate": 1.6195600646342014e-05, "loss": 0.0002, "step": 17487 }, { "epoch": 8.162427071178529, "grad_norm": 0.0284423828125, "learning_rate": 1.6187595657328935e-05, "loss": 0.0019, "step": 17488 }, { "epoch": 8.16289381563594, "grad_norm": 0.0115966796875, "learning_rate": 1.6179592472889914e-05, "loss": 0.0001, "step": 17489 }, { "epoch": 8.163360560093349, "grad_norm": 0.006317138671875, "learning_rate": 1.6171591093197292e-05, "loss": 0.0001, "step": 17490 }, { "epoch": 8.16382730455076, "grad_norm": 0.01129150390625, "learning_rate": 1.6163591518423294e-05, "loss": 0.0001, "step": 17491 }, { "epoch": 8.164294049008168, "grad_norm": 0.005950927734375, "learning_rate": 1.6155593748740194e-05, "loss": 0.0001, "step": 17492 }, { "epoch": 8.164760793465577, "grad_norm": 0.0086669921875, "learning_rate": 1.61475977843202e-05, "loss": 0.0001, "step": 17493 }, { "epoch": 8.165227537922988, "grad_norm": 0.006500244140625, "learning_rate": 1.613960362533545e-05, "loss": 0.0001, "step": 17494 }, { "epoch": 8.165694282380397, "grad_norm": 0.00836181640625, "learning_rate": 1.6131611271958068e-05, "loss": 0.0002, "step": 17495 }, { "epoch": 8.166161026837806, "grad_norm": 0.00701904296875, "learning_rate": 1.612362072436019e-05, "loss": 0.0002, "step": 17496 }, { "epoch": 8.166627771295216, "grad_norm": 0.0120849609375, "learning_rate": 1.6115631982713785e-05, "loss": 0.0002, "step": 17497 }, { "epoch": 8.167094515752625, "grad_norm": 0.008544921875, "learning_rate": 1.610764504719089e-05, "loss": 0.0001, "step": 17498 }, { "epoch": 8.167561260210036, "grad_norm": 0.06884765625, "learning_rate": 1.6099659917963505e-05, "loss": 0.0049, "step": 17499 }, { "epoch": 8.168028004667445, "grad_norm": 0.0263671875, "learning_rate": 1.6091676595203508e-05, "loss": 0.0022, "step": 17500 }, { "epoch": 8.168494749124854, "grad_norm": 0.00872802734375, "learning_rate": 1.6083695079082817e-05, "loss": 0.0001, "step": 17501 }, { "epoch": 8.168961493582264, "grad_norm": 0.00701904296875, "learning_rate": 1.6075715369773304e-05, "loss": 0.0001, "step": 17502 }, { "epoch": 8.169428238039673, "grad_norm": 0.00927734375, "learning_rate": 1.606773746744672e-05, "loss": 0.0001, "step": 17503 }, { "epoch": 8.169894982497082, "grad_norm": 0.022705078125, "learning_rate": 1.605976137227487e-05, "loss": 0.0001, "step": 17504 }, { "epoch": 8.170361726954493, "grad_norm": 0.00787353515625, "learning_rate": 1.6051787084429526e-05, "loss": 0.0002, "step": 17505 }, { "epoch": 8.170828471411902, "grad_norm": 0.00518798828125, "learning_rate": 1.6043814604082318e-05, "loss": 0.0001, "step": 17506 }, { "epoch": 8.171295215869312, "grad_norm": 0.0174560546875, "learning_rate": 1.6035843931404958e-05, "loss": 0.0002, "step": 17507 }, { "epoch": 8.171761960326721, "grad_norm": 0.00653076171875, "learning_rate": 1.6027875066569022e-05, "loss": 0.0001, "step": 17508 }, { "epoch": 8.17222870478413, "grad_norm": 0.00933837890625, "learning_rate": 1.6019908009746097e-05, "loss": 0.0002, "step": 17509 }, { "epoch": 8.172695449241541, "grad_norm": 0.005218505859375, "learning_rate": 1.6011942761107757e-05, "loss": 0.0001, "step": 17510 }, { "epoch": 8.17316219369895, "grad_norm": 0.004913330078125, "learning_rate": 1.6003979320825445e-05, "loss": 0.0001, "step": 17511 }, { "epoch": 8.173628938156359, "grad_norm": 0.0098876953125, "learning_rate": 1.599601768907065e-05, "loss": 0.0001, "step": 17512 }, { "epoch": 8.17409568261377, "grad_norm": 0.014404296875, "learning_rate": 1.598805786601484e-05, "loss": 0.0001, "step": 17513 }, { "epoch": 8.174562427071178, "grad_norm": 0.005126953125, "learning_rate": 1.5980099851829322e-05, "loss": 0.0001, "step": 17514 }, { "epoch": 8.175029171528589, "grad_norm": 0.0123291015625, "learning_rate": 1.5972143646685478e-05, "loss": 0.0002, "step": 17515 }, { "epoch": 8.175495915985998, "grad_norm": 0.0076904296875, "learning_rate": 1.5964189250754623e-05, "loss": 0.0002, "step": 17516 }, { "epoch": 8.175962660443407, "grad_norm": 0.01312255859375, "learning_rate": 1.5956236664208e-05, "loss": 0.0002, "step": 17517 }, { "epoch": 8.176429404900817, "grad_norm": 0.10205078125, "learning_rate": 1.594828588721685e-05, "loss": 0.0002, "step": 17518 }, { "epoch": 8.176896149358226, "grad_norm": 0.01055908203125, "learning_rate": 1.5940336919952393e-05, "loss": 0.0001, "step": 17519 }, { "epoch": 8.177362893815635, "grad_norm": 0.007171630859375, "learning_rate": 1.593238976258572e-05, "loss": 0.0001, "step": 17520 }, { "epoch": 8.177829638273046, "grad_norm": 0.008544921875, "learning_rate": 1.5924444415287965e-05, "loss": 0.0001, "step": 17521 }, { "epoch": 8.178296382730455, "grad_norm": 0.00543212890625, "learning_rate": 1.5916500878230224e-05, "loss": 0.0001, "step": 17522 }, { "epoch": 8.178763127187864, "grad_norm": 0.0152587890625, "learning_rate": 1.5908559151583502e-05, "loss": 0.0002, "step": 17523 }, { "epoch": 8.179229871645274, "grad_norm": 0.02294921875, "learning_rate": 1.590061923551881e-05, "loss": 0.0001, "step": 17524 }, { "epoch": 8.179696616102683, "grad_norm": 0.01202392578125, "learning_rate": 1.5892681130207086e-05, "loss": 0.0001, "step": 17525 }, { "epoch": 8.180163360560094, "grad_norm": 0.005706787109375, "learning_rate": 1.5884744835819276e-05, "loss": 0.0001, "step": 17526 }, { "epoch": 8.180630105017503, "grad_norm": 0.00604248046875, "learning_rate": 1.5876810352526206e-05, "loss": 0.0001, "step": 17527 }, { "epoch": 8.181096849474912, "grad_norm": 0.00958251953125, "learning_rate": 1.5868877680498772e-05, "loss": 0.0002, "step": 17528 }, { "epoch": 8.181563593932323, "grad_norm": 0.0089111328125, "learning_rate": 1.586094681990773e-05, "loss": 0.0001, "step": 17529 }, { "epoch": 8.182030338389731, "grad_norm": 0.015625, "learning_rate": 1.585301777092385e-05, "loss": 0.0002, "step": 17530 }, { "epoch": 8.18249708284714, "grad_norm": 0.0162353515625, "learning_rate": 1.5845090533717887e-05, "loss": 0.0002, "step": 17531 }, { "epoch": 8.182963827304551, "grad_norm": 0.01031494140625, "learning_rate": 1.583716510846047e-05, "loss": 0.0002, "step": 17532 }, { "epoch": 8.18343057176196, "grad_norm": 0.0322265625, "learning_rate": 1.582924149532228e-05, "loss": 0.0006, "step": 17533 }, { "epoch": 8.18389731621937, "grad_norm": 0.00897216796875, "learning_rate": 1.5821319694473924e-05, "loss": 0.0002, "step": 17534 }, { "epoch": 8.18436406067678, "grad_norm": 0.0059814453125, "learning_rate": 1.581339970608593e-05, "loss": 0.0001, "step": 17535 }, { "epoch": 8.184830805134188, "grad_norm": 0.0076904296875, "learning_rate": 1.5805481530328857e-05, "loss": 0.0001, "step": 17536 }, { "epoch": 8.1852975495916, "grad_norm": 0.07275390625, "learning_rate": 1.5797565167373175e-05, "loss": 0.0032, "step": 17537 }, { "epoch": 8.185764294049008, "grad_norm": 0.0751953125, "learning_rate": 1.578965061738934e-05, "loss": 0.0003, "step": 17538 }, { "epoch": 8.186231038506417, "grad_norm": 0.037353515625, "learning_rate": 1.5781737880547777e-05, "loss": 0.0012, "step": 17539 }, { "epoch": 8.186697782963828, "grad_norm": 0.033447265625, "learning_rate": 1.5773826957018823e-05, "loss": 0.0002, "step": 17540 }, { "epoch": 8.187164527421237, "grad_norm": 0.006378173828125, "learning_rate": 1.5765917846972812e-05, "loss": 0.0001, "step": 17541 }, { "epoch": 8.187631271878647, "grad_norm": 0.0673828125, "learning_rate": 1.5758010550580084e-05, "loss": 0.0021, "step": 17542 }, { "epoch": 8.188098016336056, "grad_norm": 0.01300048828125, "learning_rate": 1.5750105068010824e-05, "loss": 0.0001, "step": 17543 }, { "epoch": 8.188564760793465, "grad_norm": 0.0223388671875, "learning_rate": 1.5742201399435285e-05, "loss": 0.0019, "step": 17544 }, { "epoch": 8.189031505250876, "grad_norm": 0.01483154296875, "learning_rate": 1.573429954502367e-05, "loss": 0.0002, "step": 17545 }, { "epoch": 8.189498249708285, "grad_norm": 0.01104736328125, "learning_rate": 1.5726399504946033e-05, "loss": 0.0001, "step": 17546 }, { "epoch": 8.189964994165694, "grad_norm": 0.0167236328125, "learning_rate": 1.571850127937253e-05, "loss": 0.0002, "step": 17547 }, { "epoch": 8.190431738623104, "grad_norm": 0.006622314453125, "learning_rate": 1.5710604868473234e-05, "loss": 0.0001, "step": 17548 }, { "epoch": 8.190898483080513, "grad_norm": 0.007080078125, "learning_rate": 1.5702710272418107e-05, "loss": 0.0001, "step": 17549 }, { "epoch": 8.191365227537924, "grad_norm": 0.1240234375, "learning_rate": 1.5694817491377156e-05, "loss": 0.0025, "step": 17550 }, { "epoch": 8.191831971995333, "grad_norm": 0.0106201171875, "learning_rate": 1.568692652552035e-05, "loss": 0.0002, "step": 17551 }, { "epoch": 8.192298716452742, "grad_norm": 0.01007080078125, "learning_rate": 1.5679037375017547e-05, "loss": 0.0002, "step": 17552 }, { "epoch": 8.192765460910152, "grad_norm": 0.00750732421875, "learning_rate": 1.5671150040038607e-05, "loss": 0.0001, "step": 17553 }, { "epoch": 8.193232205367561, "grad_norm": 0.0517578125, "learning_rate": 1.5663264520753408e-05, "loss": 0.002, "step": 17554 }, { "epoch": 8.19369894982497, "grad_norm": 0.0174560546875, "learning_rate": 1.5655380817331667e-05, "loss": 0.0002, "step": 17555 }, { "epoch": 8.19416569428238, "grad_norm": 0.0223388671875, "learning_rate": 1.5647498929943182e-05, "loss": 0.0002, "step": 17556 }, { "epoch": 8.19463243873979, "grad_norm": 0.0147705078125, "learning_rate": 1.5639618858757622e-05, "loss": 0.0002, "step": 17557 }, { "epoch": 8.1950991831972, "grad_norm": 0.0086669921875, "learning_rate": 1.5631740603944657e-05, "loss": 0.0001, "step": 17558 }, { "epoch": 8.19556592765461, "grad_norm": 0.00933837890625, "learning_rate": 1.5623864165673952e-05, "loss": 0.0001, "step": 17559 }, { "epoch": 8.196032672112018, "grad_norm": 0.00738525390625, "learning_rate": 1.561598954411505e-05, "loss": 0.0002, "step": 17560 }, { "epoch": 8.196499416569429, "grad_norm": 0.0150146484375, "learning_rate": 1.5608116739437507e-05, "loss": 0.0002, "step": 17561 }, { "epoch": 8.196966161026838, "grad_norm": 0.0054931640625, "learning_rate": 1.5600245751810882e-05, "loss": 0.0001, "step": 17562 }, { "epoch": 8.197432905484247, "grad_norm": 0.0087890625, "learning_rate": 1.5592376581404578e-05, "loss": 0.0001, "step": 17563 }, { "epoch": 8.197899649941657, "grad_norm": 0.00958251953125, "learning_rate": 1.5584509228388056e-05, "loss": 0.0002, "step": 17564 }, { "epoch": 8.198366394399066, "grad_norm": 0.00616455078125, "learning_rate": 1.5576643692930736e-05, "loss": 0.0002, "step": 17565 }, { "epoch": 8.198833138856475, "grad_norm": 0.025390625, "learning_rate": 1.556877997520193e-05, "loss": 0.0002, "step": 17566 }, { "epoch": 8.199299883313886, "grad_norm": 0.0361328125, "learning_rate": 1.5560918075370955e-05, "loss": 0.002, "step": 17567 }, { "epoch": 8.199766627771295, "grad_norm": 0.028076171875, "learning_rate": 1.5553057993607134e-05, "loss": 0.0021, "step": 17568 }, { "epoch": 8.200233372228706, "grad_norm": 0.0126953125, "learning_rate": 1.554519973007965e-05, "loss": 0.0001, "step": 17569 }, { "epoch": 8.200700116686114, "grad_norm": 0.01324462890625, "learning_rate": 1.553734328495772e-05, "loss": 0.0001, "step": 17570 }, { "epoch": 8.201166861143523, "grad_norm": 0.0084228515625, "learning_rate": 1.552948865841052e-05, "loss": 0.0001, "step": 17571 }, { "epoch": 8.201633605600934, "grad_norm": 0.01123046875, "learning_rate": 1.5521635850607118e-05, "loss": 0.0001, "step": 17572 }, { "epoch": 8.202100350058343, "grad_norm": 0.00872802734375, "learning_rate": 1.5513784861716663e-05, "loss": 0.0002, "step": 17573 }, { "epoch": 8.202567094515752, "grad_norm": 0.05224609375, "learning_rate": 1.550593569190816e-05, "loss": 0.0022, "step": 17574 }, { "epoch": 8.203033838973163, "grad_norm": 0.05517578125, "learning_rate": 1.5498088341350582e-05, "loss": 0.0029, "step": 17575 }, { "epoch": 8.203500583430571, "grad_norm": 0.007080078125, "learning_rate": 1.5490242810212917e-05, "loss": 0.0002, "step": 17576 }, { "epoch": 8.203967327887982, "grad_norm": 0.03173828125, "learning_rate": 1.5482399098664112e-05, "loss": 0.0007, "step": 17577 }, { "epoch": 8.204434072345391, "grad_norm": 0.00885009765625, "learning_rate": 1.547455720687301e-05, "loss": 0.0001, "step": 17578 }, { "epoch": 8.2049008168028, "grad_norm": 0.01025390625, "learning_rate": 1.5466717135008468e-05, "loss": 0.0001, "step": 17579 }, { "epoch": 8.20536756126021, "grad_norm": 0.006561279296875, "learning_rate": 1.5458878883239325e-05, "loss": 0.0001, "step": 17580 }, { "epoch": 8.20583430571762, "grad_norm": 0.0086669921875, "learning_rate": 1.54510424517343e-05, "loss": 0.0001, "step": 17581 }, { "epoch": 8.206301050175028, "grad_norm": 0.0625, "learning_rate": 1.5443207840662132e-05, "loss": 0.003, "step": 17582 }, { "epoch": 8.206767794632439, "grad_norm": 0.05029296875, "learning_rate": 1.5435375050191547e-05, "loss": 0.0018, "step": 17583 }, { "epoch": 8.207234539089848, "grad_norm": 0.01263427734375, "learning_rate": 1.542754408049113e-05, "loss": 0.0002, "step": 17584 }, { "epoch": 8.207701283547259, "grad_norm": 0.01422119140625, "learning_rate": 1.5419714931729556e-05, "loss": 0.0002, "step": 17585 }, { "epoch": 8.208168028004668, "grad_norm": 0.021484375, "learning_rate": 1.5411887604075338e-05, "loss": 0.0002, "step": 17586 }, { "epoch": 8.208634772462077, "grad_norm": 0.054443359375, "learning_rate": 1.5404062097697024e-05, "loss": 0.0011, "step": 17587 }, { "epoch": 8.209101516919487, "grad_norm": 0.042236328125, "learning_rate": 1.5396238412763154e-05, "loss": 0.0026, "step": 17588 }, { "epoch": 8.209568261376896, "grad_norm": 0.00830078125, "learning_rate": 1.5388416549442098e-05, "loss": 0.0001, "step": 17589 }, { "epoch": 8.210035005834305, "grad_norm": 0.01458740234375, "learning_rate": 1.5380596507902323e-05, "loss": 0.0001, "step": 17590 }, { "epoch": 8.210501750291716, "grad_norm": 0.031982421875, "learning_rate": 1.5372778288312217e-05, "loss": 0.0016, "step": 17591 }, { "epoch": 8.210968494749125, "grad_norm": 0.0096435546875, "learning_rate": 1.5364961890840058e-05, "loss": 0.0002, "step": 17592 }, { "epoch": 8.211435239206535, "grad_norm": 0.052490234375, "learning_rate": 1.5357147315654184e-05, "loss": 0.0028, "step": 17593 }, { "epoch": 8.211901983663944, "grad_norm": 0.00872802734375, "learning_rate": 1.5349334562922855e-05, "loss": 0.0001, "step": 17594 }, { "epoch": 8.212368728121353, "grad_norm": 0.008056640625, "learning_rate": 1.5341523632814258e-05, "loss": 0.0001, "step": 17595 }, { "epoch": 8.212835472578764, "grad_norm": 0.03515625, "learning_rate": 1.5333714525496578e-05, "loss": 0.0017, "step": 17596 }, { "epoch": 8.213302217036173, "grad_norm": 0.01416015625, "learning_rate": 1.5325907241137992e-05, "loss": 0.0002, "step": 17597 }, { "epoch": 8.213768961493582, "grad_norm": 0.0576171875, "learning_rate": 1.5318101779906556e-05, "loss": 0.0016, "step": 17598 }, { "epoch": 8.214235705950992, "grad_norm": 0.05908203125, "learning_rate": 1.5310298141970338e-05, "loss": 0.0037, "step": 17599 }, { "epoch": 8.214702450408401, "grad_norm": 0.005889892578125, "learning_rate": 1.5302496327497385e-05, "loss": 0.0001, "step": 17600 }, { "epoch": 8.21516919486581, "grad_norm": 0.0164794921875, "learning_rate": 1.5294696336655634e-05, "loss": 0.0001, "step": 17601 }, { "epoch": 8.21563593932322, "grad_norm": 0.04248046875, "learning_rate": 1.528689816961306e-05, "loss": 0.0021, "step": 17602 }, { "epoch": 8.21610268378063, "grad_norm": 0.008544921875, "learning_rate": 1.527910182653758e-05, "loss": 0.0001, "step": 17603 }, { "epoch": 8.21656942823804, "grad_norm": 0.011962890625, "learning_rate": 1.5271307307597016e-05, "loss": 0.0001, "step": 17604 }, { "epoch": 8.21703617269545, "grad_norm": 0.0113525390625, "learning_rate": 1.526351461295924e-05, "loss": 0.0002, "step": 17605 }, { "epoch": 8.217502917152858, "grad_norm": 0.01177978515625, "learning_rate": 1.5255723742791984e-05, "loss": 0.0002, "step": 17606 }, { "epoch": 8.217969661610269, "grad_norm": 0.007568359375, "learning_rate": 1.5247934697263022e-05, "loss": 0.0001, "step": 17607 }, { "epoch": 8.218436406067678, "grad_norm": 0.0091552734375, "learning_rate": 1.5240147476540079e-05, "loss": 0.0001, "step": 17608 }, { "epoch": 8.218903150525087, "grad_norm": 0.05126953125, "learning_rate": 1.5232362080790785e-05, "loss": 0.0018, "step": 17609 }, { "epoch": 8.219369894982497, "grad_norm": 0.0220947265625, "learning_rate": 1.5224578510182786e-05, "loss": 0.0002, "step": 17610 }, { "epoch": 8.219836639439906, "grad_norm": 0.03466796875, "learning_rate": 1.521679676488369e-05, "loss": 0.0026, "step": 17611 }, { "epoch": 8.220303383897317, "grad_norm": 0.00567626953125, "learning_rate": 1.5209016845061009e-05, "loss": 0.0001, "step": 17612 }, { "epoch": 8.220770128354726, "grad_norm": 0.028564453125, "learning_rate": 1.5201238750882273e-05, "loss": 0.0002, "step": 17613 }, { "epoch": 8.221236872812135, "grad_norm": 0.007659912109375, "learning_rate": 1.5193462482514975e-05, "loss": 0.0001, "step": 17614 }, { "epoch": 8.221703617269545, "grad_norm": 0.00604248046875, "learning_rate": 1.51856880401265e-05, "loss": 0.0001, "step": 17615 }, { "epoch": 8.222170361726954, "grad_norm": 0.02294921875, "learning_rate": 1.517791542388427e-05, "loss": 0.0002, "step": 17616 }, { "epoch": 8.222637106184363, "grad_norm": 0.00982666015625, "learning_rate": 1.5170144633955652e-05, "loss": 0.0002, "step": 17617 }, { "epoch": 8.223103850641774, "grad_norm": 0.048095703125, "learning_rate": 1.516237567050791e-05, "loss": 0.0027, "step": 17618 }, { "epoch": 8.223570595099183, "grad_norm": 0.06689453125, "learning_rate": 1.515460853370836e-05, "loss": 0.0031, "step": 17619 }, { "epoch": 8.224037339556594, "grad_norm": 0.00701904296875, "learning_rate": 1.5146843223724249e-05, "loss": 0.0002, "step": 17620 }, { "epoch": 8.224504084014002, "grad_norm": 0.048828125, "learning_rate": 1.513907974072274e-05, "loss": 0.002, "step": 17621 }, { "epoch": 8.224970828471411, "grad_norm": 0.0079345703125, "learning_rate": 1.5131318084870983e-05, "loss": 0.0002, "step": 17622 }, { "epoch": 8.225437572928822, "grad_norm": 0.011962890625, "learning_rate": 1.5123558256336135e-05, "loss": 0.0002, "step": 17623 }, { "epoch": 8.225904317386231, "grad_norm": 0.041259765625, "learning_rate": 1.5115800255285217e-05, "loss": 0.0028, "step": 17624 }, { "epoch": 8.22637106184364, "grad_norm": 0.00970458984375, "learning_rate": 1.5108044081885308e-05, "loss": 0.0002, "step": 17625 }, { "epoch": 8.22683780630105, "grad_norm": 0.007720947265625, "learning_rate": 1.510028973630343e-05, "loss": 0.0001, "step": 17626 }, { "epoch": 8.22730455075846, "grad_norm": 0.00811767578125, "learning_rate": 1.5092537218706471e-05, "loss": 0.0001, "step": 17627 }, { "epoch": 8.22777129521587, "grad_norm": 0.044677734375, "learning_rate": 1.5084786529261408e-05, "loss": 0.0027, "step": 17628 }, { "epoch": 8.228238039673279, "grad_norm": 0.0281982421875, "learning_rate": 1.507703766813512e-05, "loss": 0.002, "step": 17629 }, { "epoch": 8.228704784130688, "grad_norm": 0.00836181640625, "learning_rate": 1.5069290635494415e-05, "loss": 0.0001, "step": 17630 }, { "epoch": 8.229171528588099, "grad_norm": 0.0908203125, "learning_rate": 1.5061545431506118e-05, "loss": 0.0041, "step": 17631 }, { "epoch": 8.229638273045508, "grad_norm": 0.005340576171875, "learning_rate": 1.5053802056337007e-05, "loss": 0.0001, "step": 17632 }, { "epoch": 8.230105017502916, "grad_norm": 0.023681640625, "learning_rate": 1.5046060510153758e-05, "loss": 0.0001, "step": 17633 }, { "epoch": 8.230571761960327, "grad_norm": 0.0302734375, "learning_rate": 1.5038320793123096e-05, "loss": 0.0002, "step": 17634 }, { "epoch": 8.231038506417736, "grad_norm": 0.007720947265625, "learning_rate": 1.5030582905411671e-05, "loss": 0.0001, "step": 17635 }, { "epoch": 8.231505250875147, "grad_norm": 0.01275634765625, "learning_rate": 1.5022846847186045e-05, "loss": 0.0002, "step": 17636 }, { "epoch": 8.231971995332556, "grad_norm": 0.01373291015625, "learning_rate": 1.5015112618612836e-05, "loss": 0.0001, "step": 17637 }, { "epoch": 8.232438739789965, "grad_norm": 0.00823974609375, "learning_rate": 1.5007380219858525e-05, "loss": 0.0001, "step": 17638 }, { "epoch": 8.232905484247375, "grad_norm": 0.0167236328125, "learning_rate": 1.499964965108962e-05, "loss": 0.0001, "step": 17639 }, { "epoch": 8.233372228704784, "grad_norm": 0.046630859375, "learning_rate": 1.499192091247259e-05, "loss": 0.0025, "step": 17640 }, { "epoch": 8.233838973162193, "grad_norm": 0.006683349609375, "learning_rate": 1.498419400417379e-05, "loss": 0.0001, "step": 17641 }, { "epoch": 8.234305717619604, "grad_norm": 0.0064697265625, "learning_rate": 1.4976468926359633e-05, "loss": 0.0001, "step": 17642 }, { "epoch": 8.234772462077013, "grad_norm": 0.007293701171875, "learning_rate": 1.496874567919645e-05, "loss": 0.0001, "step": 17643 }, { "epoch": 8.235239206534422, "grad_norm": 0.0283203125, "learning_rate": 1.4961024262850487e-05, "loss": 0.0002, "step": 17644 }, { "epoch": 8.235705950991832, "grad_norm": 0.006866455078125, "learning_rate": 1.4953304677488033e-05, "loss": 0.0001, "step": 17645 }, { "epoch": 8.236172695449241, "grad_norm": 0.013427734375, "learning_rate": 1.4945586923275313e-05, "loss": 0.0001, "step": 17646 }, { "epoch": 8.236639439906652, "grad_norm": 0.07666015625, "learning_rate": 1.4937871000378445e-05, "loss": 0.0065, "step": 17647 }, { "epoch": 8.23710618436406, "grad_norm": 0.006500244140625, "learning_rate": 1.49301569089636e-05, "loss": 0.0001, "step": 17648 }, { "epoch": 8.23757292882147, "grad_norm": 0.0096435546875, "learning_rate": 1.4922444649196888e-05, "loss": 0.0001, "step": 17649 }, { "epoch": 8.23803967327888, "grad_norm": 0.09033203125, "learning_rate": 1.4914734221244309e-05, "loss": 0.0028, "step": 17650 }, { "epoch": 8.23850641773629, "grad_norm": 0.005340576171875, "learning_rate": 1.4907025625271908e-05, "loss": 0.0001, "step": 17651 }, { "epoch": 8.238973162193698, "grad_norm": 0.08447265625, "learning_rate": 1.4899318861445688e-05, "loss": 0.003, "step": 17652 }, { "epoch": 8.239439906651109, "grad_norm": 0.0086669921875, "learning_rate": 1.4891613929931525e-05, "loss": 0.0001, "step": 17653 }, { "epoch": 8.239906651108518, "grad_norm": 0.006591796875, "learning_rate": 1.4883910830895364e-05, "loss": 0.0001, "step": 17654 }, { "epoch": 8.240373395565928, "grad_norm": 0.01055908203125, "learning_rate": 1.4876209564503019e-05, "loss": 0.0002, "step": 17655 }, { "epoch": 8.240840140023337, "grad_norm": 0.00885009765625, "learning_rate": 1.4868510130920322e-05, "loss": 0.0001, "step": 17656 }, { "epoch": 8.241306884480746, "grad_norm": 0.0654296875, "learning_rate": 1.4860812530313096e-05, "loss": 0.002, "step": 17657 }, { "epoch": 8.241773628938157, "grad_norm": 0.0230712890625, "learning_rate": 1.4853116762846997e-05, "loss": 0.0002, "step": 17658 }, { "epoch": 8.242240373395566, "grad_norm": 0.3828125, "learning_rate": 1.4845422828687783e-05, "loss": 0.001, "step": 17659 }, { "epoch": 8.242707117852975, "grad_norm": 0.0081787109375, "learning_rate": 1.4837730728001109e-05, "loss": 0.0001, "step": 17660 }, { "epoch": 8.243173862310385, "grad_norm": 0.01153564453125, "learning_rate": 1.4830040460952565e-05, "loss": 0.0002, "step": 17661 }, { "epoch": 8.243640606767794, "grad_norm": 0.006927490234375, "learning_rate": 1.4822352027707743e-05, "loss": 0.0001, "step": 17662 }, { "epoch": 8.244107351225205, "grad_norm": 0.00421142578125, "learning_rate": 1.4814665428432217e-05, "loss": 0.0001, "step": 17663 }, { "epoch": 8.244574095682614, "grad_norm": 0.0137939453125, "learning_rate": 1.4806980663291426e-05, "loss": 0.0002, "step": 17664 }, { "epoch": 8.245040840140023, "grad_norm": 0.047607421875, "learning_rate": 1.4799297732450878e-05, "loss": 0.0013, "step": 17665 }, { "epoch": 8.245507584597433, "grad_norm": 0.01019287109375, "learning_rate": 1.479161663607599e-05, "loss": 0.0002, "step": 17666 }, { "epoch": 8.245974329054842, "grad_norm": 0.0091552734375, "learning_rate": 1.4783937374332124e-05, "loss": 0.0001, "step": 17667 }, { "epoch": 8.246441073512251, "grad_norm": 0.00762939453125, "learning_rate": 1.4776259947384629e-05, "loss": 0.0001, "step": 17668 }, { "epoch": 8.246907817969662, "grad_norm": 0.00799560546875, "learning_rate": 1.4768584355398862e-05, "loss": 0.0002, "step": 17669 }, { "epoch": 8.247374562427071, "grad_norm": 0.01287841796875, "learning_rate": 1.4760910598540001e-05, "loss": 0.0002, "step": 17670 }, { "epoch": 8.247841306884482, "grad_norm": 0.039306640625, "learning_rate": 1.4753238676973302e-05, "loss": 0.0002, "step": 17671 }, { "epoch": 8.24830805134189, "grad_norm": 0.06787109375, "learning_rate": 1.474556859086398e-05, "loss": 0.0048, "step": 17672 }, { "epoch": 8.2487747957993, "grad_norm": 0.01092529296875, "learning_rate": 1.473790034037713e-05, "loss": 0.0002, "step": 17673 }, { "epoch": 8.24924154025671, "grad_norm": 0.0150146484375, "learning_rate": 1.4730233925677895e-05, "loss": 0.0001, "step": 17674 }, { "epoch": 8.249708284714119, "grad_norm": 0.034423828125, "learning_rate": 1.4722569346931358e-05, "loss": 0.0015, "step": 17675 }, { "epoch": 8.250175029171528, "grad_norm": 0.0517578125, "learning_rate": 1.4714906604302492e-05, "loss": 0.0003, "step": 17676 }, { "epoch": 8.250641773628939, "grad_norm": 0.006866455078125, "learning_rate": 1.4707245697956307e-05, "loss": 0.0001, "step": 17677 }, { "epoch": 8.251108518086347, "grad_norm": 0.046142578125, "learning_rate": 1.4699586628057782e-05, "loss": 0.0023, "step": 17678 }, { "epoch": 8.251575262543756, "grad_norm": 0.0069580078125, "learning_rate": 1.4691929394771775e-05, "loss": 0.0001, "step": 17679 }, { "epoch": 8.252042007001167, "grad_norm": 0.0079345703125, "learning_rate": 1.4684273998263188e-05, "loss": 0.0002, "step": 17680 }, { "epoch": 8.252508751458576, "grad_norm": 0.07177734375, "learning_rate": 1.4676620438696853e-05, "loss": 0.0022, "step": 17681 }, { "epoch": 8.252975495915987, "grad_norm": 0.013916015625, "learning_rate": 1.466896871623753e-05, "loss": 0.0002, "step": 17682 }, { "epoch": 8.253442240373396, "grad_norm": 0.041748046875, "learning_rate": 1.4661318831049987e-05, "loss": 0.0031, "step": 17683 }, { "epoch": 8.253908984830804, "grad_norm": 0.00616455078125, "learning_rate": 1.4653670783298956e-05, "loss": 0.0001, "step": 17684 }, { "epoch": 8.254375729288215, "grad_norm": 0.0218505859375, "learning_rate": 1.4646024573149064e-05, "loss": 0.0002, "step": 17685 }, { "epoch": 8.254842473745624, "grad_norm": 0.01031494140625, "learning_rate": 1.463838020076499e-05, "loss": 0.0001, "step": 17686 }, { "epoch": 8.255309218203033, "grad_norm": 0.009521484375, "learning_rate": 1.4630737666311278e-05, "loss": 0.0001, "step": 17687 }, { "epoch": 8.255775962660444, "grad_norm": 0.005279541015625, "learning_rate": 1.46230969699525e-05, "loss": 0.0001, "step": 17688 }, { "epoch": 8.255775962660444, "eval_loss": 2.4444942474365234, "eval_runtime": 84.4859, "eval_samples_per_second": 21.353, "eval_steps_per_second": 2.675, "step": 17688 }, { "epoch": 8.256242707117853, "grad_norm": 0.005767822265625, "learning_rate": 1.4615458111853197e-05, "loss": 0.0001, "step": 17689 }, { "epoch": 8.256709451575263, "grad_norm": 0.00830078125, "learning_rate": 1.4607821092177786e-05, "loss": 0.0001, "step": 17690 }, { "epoch": 8.257176196032672, "grad_norm": 0.05126953125, "learning_rate": 1.4600185911090736e-05, "loss": 0.0021, "step": 17691 }, { "epoch": 8.257642940490081, "grad_norm": 0.005950927734375, "learning_rate": 1.4592552568756456e-05, "loss": 0.0001, "step": 17692 }, { "epoch": 8.258109684947492, "grad_norm": 0.006072998046875, "learning_rate": 1.4584921065339263e-05, "loss": 0.0001, "step": 17693 }, { "epoch": 8.2585764294049, "grad_norm": 0.0245361328125, "learning_rate": 1.457729140100349e-05, "loss": 0.0019, "step": 17694 }, { "epoch": 8.25904317386231, "grad_norm": 0.0118408203125, "learning_rate": 1.456966357591344e-05, "loss": 0.0002, "step": 17695 }, { "epoch": 8.25950991831972, "grad_norm": 0.00921630859375, "learning_rate": 1.456203759023328e-05, "loss": 0.0001, "step": 17696 }, { "epoch": 8.25997666277713, "grad_norm": 0.00830078125, "learning_rate": 1.4554413444127257e-05, "loss": 0.0001, "step": 17697 }, { "epoch": 8.26044340723454, "grad_norm": 0.01019287109375, "learning_rate": 1.4546791137759541e-05, "loss": 0.0002, "step": 17698 }, { "epoch": 8.260910151691949, "grad_norm": 0.109375, "learning_rate": 1.453917067129419e-05, "loss": 0.0034, "step": 17699 }, { "epoch": 8.261376896149358, "grad_norm": 0.0057373046875, "learning_rate": 1.4531552044895325e-05, "loss": 0.0001, "step": 17700 }, { "epoch": 8.261843640606768, "grad_norm": 0.0115966796875, "learning_rate": 1.4523935258726995e-05, "loss": 0.0002, "step": 17701 }, { "epoch": 8.262310385064177, "grad_norm": 0.00848388671875, "learning_rate": 1.4516320312953146e-05, "loss": 0.0002, "step": 17702 }, { "epoch": 8.262777129521586, "grad_norm": 0.013427734375, "learning_rate": 1.4508707207737793e-05, "loss": 0.0001, "step": 17703 }, { "epoch": 8.263243873978997, "grad_norm": 0.007110595703125, "learning_rate": 1.4501095943244803e-05, "loss": 0.0001, "step": 17704 }, { "epoch": 8.263710618436406, "grad_norm": 0.00823974609375, "learning_rate": 1.4493486519638089e-05, "loss": 0.0001, "step": 17705 }, { "epoch": 8.264177362893816, "grad_norm": 0.00738525390625, "learning_rate": 1.4485878937081499e-05, "loss": 0.0002, "step": 17706 }, { "epoch": 8.264644107351225, "grad_norm": 0.0120849609375, "learning_rate": 1.447827319573879e-05, "loss": 0.0002, "step": 17707 }, { "epoch": 8.265110851808634, "grad_norm": 0.032470703125, "learning_rate": 1.4470669295773753e-05, "loss": 0.0001, "step": 17708 }, { "epoch": 8.265577596266045, "grad_norm": 0.014404296875, "learning_rate": 1.4463067237350114e-05, "loss": 0.0002, "step": 17709 }, { "epoch": 8.266044340723454, "grad_norm": 0.035400390625, "learning_rate": 1.4455467020631531e-05, "loss": 0.0001, "step": 17710 }, { "epoch": 8.266511085180863, "grad_norm": 0.005645751953125, "learning_rate": 1.4447868645781648e-05, "loss": 0.0001, "step": 17711 }, { "epoch": 8.266977829638273, "grad_norm": 0.032958984375, "learning_rate": 1.4440272112964103e-05, "loss": 0.0016, "step": 17712 }, { "epoch": 8.267444574095682, "grad_norm": 0.01104736328125, "learning_rate": 1.44326774223424e-05, "loss": 0.0001, "step": 17713 }, { "epoch": 8.267911318553093, "grad_norm": 0.006134033203125, "learning_rate": 1.4425084574080105e-05, "loss": 0.0001, "step": 17714 }, { "epoch": 8.268378063010502, "grad_norm": 0.004547119140625, "learning_rate": 1.4417493568340701e-05, "loss": 0.0001, "step": 17715 }, { "epoch": 8.26884480746791, "grad_norm": 0.0033416748046875, "learning_rate": 1.440990440528759e-05, "loss": 0.0001, "step": 17716 }, { "epoch": 8.269311551925322, "grad_norm": 0.006072998046875, "learning_rate": 1.4402317085084227e-05, "loss": 0.0001, "step": 17717 }, { "epoch": 8.26977829638273, "grad_norm": 0.055908203125, "learning_rate": 1.4394731607893941e-05, "loss": 0.0002, "step": 17718 }, { "epoch": 8.27024504084014, "grad_norm": 0.00531005859375, "learning_rate": 1.4387147973880055e-05, "loss": 0.0001, "step": 17719 }, { "epoch": 8.27071178529755, "grad_norm": 0.023681640625, "learning_rate": 1.437956618320585e-05, "loss": 0.0002, "step": 17720 }, { "epoch": 8.271178529754959, "grad_norm": 0.01611328125, "learning_rate": 1.4371986236034618e-05, "loss": 0.0002, "step": 17721 }, { "epoch": 8.271645274212368, "grad_norm": 0.0186767578125, "learning_rate": 1.4364408132529495e-05, "loss": 0.0002, "step": 17722 }, { "epoch": 8.272112018669779, "grad_norm": 0.01336669921875, "learning_rate": 1.4356831872853682e-05, "loss": 0.0001, "step": 17723 }, { "epoch": 8.272578763127187, "grad_norm": 0.053466796875, "learning_rate": 1.4349257457170328e-05, "loss": 0.0033, "step": 17724 }, { "epoch": 8.273045507584598, "grad_norm": 0.01080322265625, "learning_rate": 1.4341684885642459e-05, "loss": 0.0001, "step": 17725 }, { "epoch": 8.273512252042007, "grad_norm": 0.014404296875, "learning_rate": 1.4334114158433165e-05, "loss": 0.0001, "step": 17726 }, { "epoch": 8.273978996499416, "grad_norm": 0.01300048828125, "learning_rate": 1.432654527570546e-05, "loss": 0.0002, "step": 17727 }, { "epoch": 8.274445740956827, "grad_norm": 0.0140380859375, "learning_rate": 1.4318978237622272e-05, "loss": 0.0002, "step": 17728 }, { "epoch": 8.274912485414236, "grad_norm": 0.00799560546875, "learning_rate": 1.4311413044346544e-05, "loss": 0.0001, "step": 17729 }, { "epoch": 8.275379229871644, "grad_norm": 0.05615234375, "learning_rate": 1.4303849696041182e-05, "loss": 0.0032, "step": 17730 }, { "epoch": 8.275845974329055, "grad_norm": 0.0059814453125, "learning_rate": 1.4296288192869012e-05, "loss": 0.0001, "step": 17731 }, { "epoch": 8.276312718786464, "grad_norm": 0.0125732421875, "learning_rate": 1.4288728534992834e-05, "loss": 0.0001, "step": 17732 }, { "epoch": 8.276779463243875, "grad_norm": 0.00970458984375, "learning_rate": 1.4281170722575455e-05, "loss": 0.0001, "step": 17733 }, { "epoch": 8.277246207701284, "grad_norm": 0.0174560546875, "learning_rate": 1.4273614755779552e-05, "loss": 0.0001, "step": 17734 }, { "epoch": 8.277712952158693, "grad_norm": 0.0234375, "learning_rate": 1.4266060634767864e-05, "loss": 0.0002, "step": 17735 }, { "epoch": 8.278179696616103, "grad_norm": 0.013427734375, "learning_rate": 1.4258508359702993e-05, "loss": 0.0002, "step": 17736 }, { "epoch": 8.278646441073512, "grad_norm": 0.04736328125, "learning_rate": 1.4250957930747567e-05, "loss": 0.0013, "step": 17737 }, { "epoch": 8.279113185530921, "grad_norm": 0.08349609375, "learning_rate": 1.4243409348064174e-05, "loss": 0.0041, "step": 17738 }, { "epoch": 8.279579929988332, "grad_norm": 0.0079345703125, "learning_rate": 1.423586261181531e-05, "loss": 0.0001, "step": 17739 }, { "epoch": 8.28004667444574, "grad_norm": 0.031005859375, "learning_rate": 1.4228317722163476e-05, "loss": 0.0002, "step": 17740 }, { "epoch": 8.280513418903151, "grad_norm": 0.00537109375, "learning_rate": 1.4220774679271153e-05, "loss": 0.0001, "step": 17741 }, { "epoch": 8.28098016336056, "grad_norm": 0.00390625, "learning_rate": 1.4213233483300703e-05, "loss": 0.0001, "step": 17742 }, { "epoch": 8.281446907817969, "grad_norm": 0.014404296875, "learning_rate": 1.4205694134414527e-05, "loss": 0.0002, "step": 17743 }, { "epoch": 8.28191365227538, "grad_norm": 0.01068115234375, "learning_rate": 1.419815663277495e-05, "loss": 0.0001, "step": 17744 }, { "epoch": 8.282380396732789, "grad_norm": 0.01116943359375, "learning_rate": 1.4190620978544256e-05, "loss": 0.0001, "step": 17745 }, { "epoch": 8.282847141190198, "grad_norm": 0.0380859375, "learning_rate": 1.418308717188469e-05, "loss": 0.002, "step": 17746 }, { "epoch": 8.283313885647608, "grad_norm": 0.0546875, "learning_rate": 1.4175555212958491e-05, "loss": 0.0041, "step": 17747 }, { "epoch": 8.283780630105017, "grad_norm": 0.0224609375, "learning_rate": 1.4168025101927795e-05, "loss": 0.0001, "step": 17748 }, { "epoch": 8.284247374562428, "grad_norm": 0.004486083984375, "learning_rate": 1.4160496838954752e-05, "loss": 0.0001, "step": 17749 }, { "epoch": 8.284714119019837, "grad_norm": 0.055419921875, "learning_rate": 1.4152970424201472e-05, "loss": 0.0033, "step": 17750 }, { "epoch": 8.285180863477246, "grad_norm": 0.0072021484375, "learning_rate": 1.4145445857829964e-05, "loss": 0.0002, "step": 17751 }, { "epoch": 8.285647607934656, "grad_norm": 0.01165771484375, "learning_rate": 1.4137923140002285e-05, "loss": 0.0001, "step": 17752 }, { "epoch": 8.286114352392065, "grad_norm": 0.008056640625, "learning_rate": 1.4130402270880372e-05, "loss": 0.0002, "step": 17753 }, { "epoch": 8.286581096849474, "grad_norm": 0.010986328125, "learning_rate": 1.4122883250626161e-05, "loss": 0.0001, "step": 17754 }, { "epoch": 8.287047841306885, "grad_norm": 0.00604248046875, "learning_rate": 1.4115366079401593e-05, "loss": 0.0001, "step": 17755 }, { "epoch": 8.287514585764294, "grad_norm": 0.03271484375, "learning_rate": 1.4107850757368446e-05, "loss": 0.0024, "step": 17756 }, { "epoch": 8.287981330221704, "grad_norm": 0.046875, "learning_rate": 1.410033728468858e-05, "loss": 0.0002, "step": 17757 }, { "epoch": 8.288448074679113, "grad_norm": 0.0303955078125, "learning_rate": 1.4092825661523779e-05, "loss": 0.0003, "step": 17758 }, { "epoch": 8.288914819136522, "grad_norm": 0.0103759765625, "learning_rate": 1.4085315888035744e-05, "loss": 0.0001, "step": 17759 }, { "epoch": 8.289381563593933, "grad_norm": 0.00628662109375, "learning_rate": 1.4077807964386169e-05, "loss": 0.0001, "step": 17760 }, { "epoch": 8.289848308051342, "grad_norm": 0.01336669921875, "learning_rate": 1.4070301890736748e-05, "loss": 0.0002, "step": 17761 }, { "epoch": 8.29031505250875, "grad_norm": 0.060791015625, "learning_rate": 1.4062797667249051e-05, "loss": 0.0018, "step": 17762 }, { "epoch": 8.290781796966161, "grad_norm": 0.00640869140625, "learning_rate": 1.4055295294084658e-05, "loss": 0.0001, "step": 17763 }, { "epoch": 8.29124854142357, "grad_norm": 0.0078125, "learning_rate": 1.4047794771405143e-05, "loss": 0.0002, "step": 17764 }, { "epoch": 8.29171528588098, "grad_norm": 0.0068359375, "learning_rate": 1.4040296099371975e-05, "loss": 0.0001, "step": 17765 }, { "epoch": 8.29218203033839, "grad_norm": 0.00909423828125, "learning_rate": 1.403279927814658e-05, "loss": 0.0001, "step": 17766 }, { "epoch": 8.292648774795799, "grad_norm": 0.005157470703125, "learning_rate": 1.402530430789043e-05, "loss": 0.0001, "step": 17767 }, { "epoch": 8.29311551925321, "grad_norm": 0.01275634765625, "learning_rate": 1.401781118876484e-05, "loss": 0.0001, "step": 17768 }, { "epoch": 8.293582263710618, "grad_norm": 0.0089111328125, "learning_rate": 1.401031992093117e-05, "loss": 0.0002, "step": 17769 }, { "epoch": 8.294049008168027, "grad_norm": 0.0245361328125, "learning_rate": 1.4002830504550746e-05, "loss": 0.0002, "step": 17770 }, { "epoch": 8.294515752625438, "grad_norm": 0.0087890625, "learning_rate": 1.3995342939784772e-05, "loss": 0.0002, "step": 17771 }, { "epoch": 8.294982497082847, "grad_norm": 0.0103759765625, "learning_rate": 1.3987857226794487e-05, "loss": 0.0002, "step": 17772 }, { "epoch": 8.295449241540256, "grad_norm": 0.005035400390625, "learning_rate": 1.3980373365741095e-05, "loss": 0.0001, "step": 17773 }, { "epoch": 8.295915985997667, "grad_norm": 0.035888671875, "learning_rate": 1.3972891356785678e-05, "loss": 0.0002, "step": 17774 }, { "epoch": 8.296382730455075, "grad_norm": 0.00836181640625, "learning_rate": 1.3965411200089362e-05, "loss": 0.0001, "step": 17775 }, { "epoch": 8.296849474912486, "grad_norm": 0.010009765625, "learning_rate": 1.3957932895813219e-05, "loss": 0.0001, "step": 17776 }, { "epoch": 8.297316219369895, "grad_norm": 0.0101318359375, "learning_rate": 1.395045644411822e-05, "loss": 0.0001, "step": 17777 }, { "epoch": 8.297782963827304, "grad_norm": 0.0118408203125, "learning_rate": 1.3942981845165382e-05, "loss": 0.0001, "step": 17778 }, { "epoch": 8.298249708284715, "grad_norm": 0.005889892578125, "learning_rate": 1.393550909911564e-05, "loss": 0.0001, "step": 17779 }, { "epoch": 8.298716452742124, "grad_norm": 0.00830078125, "learning_rate": 1.3928038206129845e-05, "loss": 0.0001, "step": 17780 }, { "epoch": 8.299183197199532, "grad_norm": 0.11376953125, "learning_rate": 1.3920569166368903e-05, "loss": 0.0002, "step": 17781 }, { "epoch": 8.299649941656943, "grad_norm": 0.0101318359375, "learning_rate": 1.3913101979993625e-05, "loss": 0.0002, "step": 17782 }, { "epoch": 8.300116686114352, "grad_norm": 0.0400390625, "learning_rate": 1.3905636647164766e-05, "loss": 0.0022, "step": 17783 }, { "epoch": 8.300583430571763, "grad_norm": 0.007110595703125, "learning_rate": 1.3898173168043083e-05, "loss": 0.0001, "step": 17784 }, { "epoch": 8.301050175029172, "grad_norm": 0.006317138671875, "learning_rate": 1.3890711542789247e-05, "loss": 0.0001, "step": 17785 }, { "epoch": 8.30151691948658, "grad_norm": 0.02197265625, "learning_rate": 1.3883251771563932e-05, "loss": 0.0002, "step": 17786 }, { "epoch": 8.301983663943991, "grad_norm": 0.01025390625, "learning_rate": 1.3875793854527774e-05, "loss": 0.0001, "step": 17787 }, { "epoch": 8.3024504084014, "grad_norm": 0.01312255859375, "learning_rate": 1.3868337791841302e-05, "loss": 0.0002, "step": 17788 }, { "epoch": 8.302917152858809, "grad_norm": 0.005767822265625, "learning_rate": 1.3860883583665096e-05, "loss": 0.0001, "step": 17789 }, { "epoch": 8.30338389731622, "grad_norm": 0.008056640625, "learning_rate": 1.3853431230159653e-05, "loss": 0.0002, "step": 17790 }, { "epoch": 8.303850641773629, "grad_norm": 0.01123046875, "learning_rate": 1.3845980731485398e-05, "loss": 0.0001, "step": 17791 }, { "epoch": 8.30431738623104, "grad_norm": 0.017822265625, "learning_rate": 1.3838532087802768e-05, "loss": 0.0002, "step": 17792 }, { "epoch": 8.304784130688448, "grad_norm": 0.0093994140625, "learning_rate": 1.3831085299272162e-05, "loss": 0.0002, "step": 17793 }, { "epoch": 8.305250875145857, "grad_norm": 0.0033416748046875, "learning_rate": 1.3823640366053869e-05, "loss": 0.0001, "step": 17794 }, { "epoch": 8.305717619603268, "grad_norm": 0.019287109375, "learning_rate": 1.381619728830822e-05, "loss": 0.0001, "step": 17795 }, { "epoch": 8.306184364060677, "grad_norm": 0.0111083984375, "learning_rate": 1.3808756066195483e-05, "loss": 0.0002, "step": 17796 }, { "epoch": 8.306651108518086, "grad_norm": 0.01214599609375, "learning_rate": 1.3801316699875833e-05, "loss": 0.0002, "step": 17797 }, { "epoch": 8.307117852975496, "grad_norm": 0.039306640625, "learning_rate": 1.3793879189509484e-05, "loss": 0.0032, "step": 17798 }, { "epoch": 8.307584597432905, "grad_norm": 0.008544921875, "learning_rate": 1.3786443535256577e-05, "loss": 0.0001, "step": 17799 }, { "epoch": 8.308051341890316, "grad_norm": 0.032470703125, "learning_rate": 1.377900973727717e-05, "loss": 0.0002, "step": 17800 }, { "epoch": 8.308518086347725, "grad_norm": 0.0206298828125, "learning_rate": 1.3771577795731361e-05, "loss": 0.0002, "step": 17801 }, { "epoch": 8.308984830805134, "grad_norm": 0.083984375, "learning_rate": 1.3764147710779162e-05, "loss": 0.0002, "step": 17802 }, { "epoch": 8.309451575262544, "grad_norm": 0.0068359375, "learning_rate": 1.3756719482580539e-05, "loss": 0.0001, "step": 17803 }, { "epoch": 8.309918319719953, "grad_norm": 0.004364013671875, "learning_rate": 1.3749293111295437e-05, "loss": 0.0001, "step": 17804 }, { "epoch": 8.310385064177362, "grad_norm": 0.012451171875, "learning_rate": 1.374186859708374e-05, "loss": 0.0002, "step": 17805 }, { "epoch": 8.310851808634773, "grad_norm": 0.0081787109375, "learning_rate": 1.3734445940105312e-05, "loss": 0.0001, "step": 17806 }, { "epoch": 8.311318553092182, "grad_norm": 0.03076171875, "learning_rate": 1.372702514052001e-05, "loss": 0.0022, "step": 17807 }, { "epoch": 8.31178529754959, "grad_norm": 0.0439453125, "learning_rate": 1.3719606198487544e-05, "loss": 0.003, "step": 17808 }, { "epoch": 8.312252042007001, "grad_norm": 0.06201171875, "learning_rate": 1.3712189114167684e-05, "loss": 0.0025, "step": 17809 }, { "epoch": 8.31271878646441, "grad_norm": 0.024658203125, "learning_rate": 1.3704773887720169e-05, "loss": 0.0002, "step": 17810 }, { "epoch": 8.313185530921821, "grad_norm": 0.0233154296875, "learning_rate": 1.3697360519304581e-05, "loss": 0.0002, "step": 17811 }, { "epoch": 8.31365227537923, "grad_norm": 0.02001953125, "learning_rate": 1.3689949009080605e-05, "loss": 0.0002, "step": 17812 }, { "epoch": 8.314119019836639, "grad_norm": 0.005615234375, "learning_rate": 1.3682539357207758e-05, "loss": 0.0001, "step": 17813 }, { "epoch": 8.31458576429405, "grad_norm": 0.008544921875, "learning_rate": 1.3675131563845644e-05, "loss": 0.0001, "step": 17814 }, { "epoch": 8.315052508751458, "grad_norm": 0.01171875, "learning_rate": 1.3667725629153694e-05, "loss": 0.0002, "step": 17815 }, { "epoch": 8.315519253208867, "grad_norm": 0.0084228515625, "learning_rate": 1.3660321553291423e-05, "loss": 0.0002, "step": 17816 }, { "epoch": 8.315985997666278, "grad_norm": 0.01129150390625, "learning_rate": 1.3652919336418201e-05, "loss": 0.0001, "step": 17817 }, { "epoch": 8.316452742123687, "grad_norm": 0.0167236328125, "learning_rate": 1.3645518978693439e-05, "loss": 0.0002, "step": 17818 }, { "epoch": 8.316919486581098, "grad_norm": 0.0390625, "learning_rate": 1.3638120480276484e-05, "loss": 0.0028, "step": 17819 }, { "epoch": 8.317386231038506, "grad_norm": 0.0087890625, "learning_rate": 1.3630723841326587e-05, "loss": 0.0001, "step": 17820 }, { "epoch": 8.317852975495915, "grad_norm": 0.01123046875, "learning_rate": 1.3623329062003032e-05, "loss": 0.0001, "step": 17821 }, { "epoch": 8.318319719953326, "grad_norm": 0.007659912109375, "learning_rate": 1.3615936142465068e-05, "loss": 0.0001, "step": 17822 }, { "epoch": 8.318786464410735, "grad_norm": 0.032470703125, "learning_rate": 1.3608545082871827e-05, "loss": 0.0003, "step": 17823 }, { "epoch": 8.319253208868144, "grad_norm": 0.01007080078125, "learning_rate": 1.3601155883382454e-05, "loss": 0.0002, "step": 17824 }, { "epoch": 8.319719953325555, "grad_norm": 0.009033203125, "learning_rate": 1.3593768544156082e-05, "loss": 0.0001, "step": 17825 }, { "epoch": 8.320186697782963, "grad_norm": 0.012451171875, "learning_rate": 1.3586383065351715e-05, "loss": 0.0002, "step": 17826 }, { "epoch": 8.320653442240374, "grad_norm": 0.0137939453125, "learning_rate": 1.3578999447128404e-05, "loss": 0.0003, "step": 17827 }, { "epoch": 8.321120186697783, "grad_norm": 0.03564453125, "learning_rate": 1.3571617689645144e-05, "loss": 0.0015, "step": 17828 }, { "epoch": 8.321586931155192, "grad_norm": 0.01214599609375, "learning_rate": 1.3564237793060819e-05, "loss": 0.0002, "step": 17829 }, { "epoch": 8.322053675612603, "grad_norm": 0.02001953125, "learning_rate": 1.3556859757534357e-05, "loss": 0.0001, "step": 17830 }, { "epoch": 8.322520420070012, "grad_norm": 0.007598876953125, "learning_rate": 1.3549483583224642e-05, "loss": 0.0001, "step": 17831 }, { "epoch": 8.32298716452742, "grad_norm": 0.0167236328125, "learning_rate": 1.3542109270290448e-05, "loss": 0.0001, "step": 17832 }, { "epoch": 8.323453908984831, "grad_norm": 0.00738525390625, "learning_rate": 1.353473681889058e-05, "loss": 0.0001, "step": 17833 }, { "epoch": 8.32392065344224, "grad_norm": 0.016357421875, "learning_rate": 1.3527366229183747e-05, "loss": 0.0002, "step": 17834 }, { "epoch": 8.32438739789965, "grad_norm": 0.0142822265625, "learning_rate": 1.3519997501328652e-05, "loss": 0.0001, "step": 17835 }, { "epoch": 8.32485414235706, "grad_norm": 0.00762939453125, "learning_rate": 1.3512630635483992e-05, "loss": 0.0001, "step": 17836 }, { "epoch": 8.325320886814469, "grad_norm": 0.039306640625, "learning_rate": 1.3505265631808338e-05, "loss": 0.0015, "step": 17837 }, { "epoch": 8.32578763127188, "grad_norm": 0.01080322265625, "learning_rate": 1.3497902490460268e-05, "loss": 0.0001, "step": 17838 }, { "epoch": 8.326254375729288, "grad_norm": 0.0255126953125, "learning_rate": 1.3490541211598362e-05, "loss": 0.0002, "step": 17839 }, { "epoch": 8.326721120186697, "grad_norm": 0.0084228515625, "learning_rate": 1.3483181795381073e-05, "loss": 0.0002, "step": 17840 }, { "epoch": 8.327187864644108, "grad_norm": 0.30859375, "learning_rate": 1.3475824241966862e-05, "loss": 0.0005, "step": 17841 }, { "epoch": 8.327654609101517, "grad_norm": 0.00732421875, "learning_rate": 1.346846855151418e-05, "loss": 0.0002, "step": 17842 }, { "epoch": 8.328121353558927, "grad_norm": 0.1201171875, "learning_rate": 1.3461114724181356e-05, "loss": 0.0003, "step": 17843 }, { "epoch": 8.328588098016336, "grad_norm": 0.00518798828125, "learning_rate": 1.3453762760126753e-05, "loss": 0.0001, "step": 17844 }, { "epoch": 8.329054842473745, "grad_norm": 0.00860595703125, "learning_rate": 1.3446412659508679e-05, "loss": 0.0001, "step": 17845 }, { "epoch": 8.329521586931156, "grad_norm": 0.034912109375, "learning_rate": 1.343906442248536e-05, "loss": 0.0016, "step": 17846 }, { "epoch": 8.329988331388565, "grad_norm": 0.0064697265625, "learning_rate": 1.3431718049215014e-05, "loss": 0.0001, "step": 17847 }, { "epoch": 8.330455075845974, "grad_norm": 0.0034332275390625, "learning_rate": 1.342437353985585e-05, "loss": 0.0001, "step": 17848 }, { "epoch": 8.330921820303384, "grad_norm": 0.0164794921875, "learning_rate": 1.3417030894565962e-05, "loss": 0.0002, "step": 17849 }, { "epoch": 8.331388564760793, "grad_norm": 0.01007080078125, "learning_rate": 1.3409690113503471e-05, "loss": 0.0001, "step": 17850 }, { "epoch": 8.331855309218202, "grad_norm": 0.0281982421875, "learning_rate": 1.3402351196826445e-05, "loss": 0.0019, "step": 17851 }, { "epoch": 8.332322053675613, "grad_norm": 0.09228515625, "learning_rate": 1.3395014144692853e-05, "loss": 0.0051, "step": 17852 }, { "epoch": 8.332788798133022, "grad_norm": 0.046142578125, "learning_rate": 1.3387678957260718e-05, "loss": 0.0016, "step": 17853 }, { "epoch": 8.333255542590432, "grad_norm": 0.017578125, "learning_rate": 1.338034563468793e-05, "loss": 0.0016, "step": 17854 }, { "epoch": 8.333722287047841, "grad_norm": 0.0042724609375, "learning_rate": 1.3373014177132415e-05, "loss": 0.0001, "step": 17855 }, { "epoch": 8.33418903150525, "grad_norm": 0.00885009765625, "learning_rate": 1.3365684584752035e-05, "loss": 0.0001, "step": 17856 }, { "epoch": 8.334655775962661, "grad_norm": 0.0057373046875, "learning_rate": 1.3358356857704568e-05, "loss": 0.0001, "step": 17857 }, { "epoch": 8.33512252042007, "grad_norm": 0.007537841796875, "learning_rate": 1.3351030996147807e-05, "loss": 0.0002, "step": 17858 }, { "epoch": 8.335589264877479, "grad_norm": 0.01123046875, "learning_rate": 1.3343707000239502e-05, "loss": 0.0002, "step": 17859 }, { "epoch": 8.33605600933489, "grad_norm": 0.015869140625, "learning_rate": 1.3336384870137342e-05, "loss": 0.0002, "step": 17860 }, { "epoch": 8.336522753792298, "grad_norm": 0.01361083984375, "learning_rate": 1.3329064605998954e-05, "loss": 0.0001, "step": 17861 }, { "epoch": 8.336989498249709, "grad_norm": 0.0052490234375, "learning_rate": 1.3321746207981956e-05, "loss": 0.0001, "step": 17862 }, { "epoch": 8.337456242707118, "grad_norm": 0.005218505859375, "learning_rate": 1.3314429676243956e-05, "loss": 0.0001, "step": 17863 }, { "epoch": 8.337922987164527, "grad_norm": 0.038818359375, "learning_rate": 1.330711501094245e-05, "loss": 0.0002, "step": 17864 }, { "epoch": 8.338389731621938, "grad_norm": 0.038818359375, "learning_rate": 1.3299802212234957e-05, "loss": 0.002, "step": 17865 }, { "epoch": 8.338856476079346, "grad_norm": 0.0079345703125, "learning_rate": 1.3292491280278907e-05, "loss": 0.0001, "step": 17866 }, { "epoch": 8.339323220536755, "grad_norm": 0.0159912109375, "learning_rate": 1.3285182215231718e-05, "loss": 0.0002, "step": 17867 }, { "epoch": 8.339789964994166, "grad_norm": 0.00732421875, "learning_rate": 1.3277875017250795e-05, "loss": 0.0001, "step": 17868 }, { "epoch": 8.340256709451575, "grad_norm": 0.07470703125, "learning_rate": 1.3270569686493417e-05, "loss": 0.0022, "step": 17869 }, { "epoch": 8.340723453908986, "grad_norm": 0.00933837890625, "learning_rate": 1.3263266223116899e-05, "loss": 0.0002, "step": 17870 }, { "epoch": 8.341190198366395, "grad_norm": 0.011474609375, "learning_rate": 1.3255964627278516e-05, "loss": 0.0001, "step": 17871 }, { "epoch": 8.341656942823803, "grad_norm": 0.0142822265625, "learning_rate": 1.3248664899135444e-05, "loss": 0.0002, "step": 17872 }, { "epoch": 8.342123687281214, "grad_norm": 0.006500244140625, "learning_rate": 1.3241367038844865e-05, "loss": 0.0001, "step": 17873 }, { "epoch": 8.342590431738623, "grad_norm": 0.043212890625, "learning_rate": 1.3234071046563934e-05, "loss": 0.002, "step": 17874 }, { "epoch": 8.343057176196032, "grad_norm": 0.00592041015625, "learning_rate": 1.3226776922449703e-05, "loss": 0.0001, "step": 17875 }, { "epoch": 8.343523920653443, "grad_norm": 0.0079345703125, "learning_rate": 1.3219484666659232e-05, "loss": 0.0001, "step": 17876 }, { "epoch": 8.343990665110852, "grad_norm": 0.0177001953125, "learning_rate": 1.3212194279349576e-05, "loss": 0.0001, "step": 17877 }, { "epoch": 8.344457409568262, "grad_norm": 0.005645751953125, "learning_rate": 1.3204905760677643e-05, "loss": 0.0001, "step": 17878 }, { "epoch": 8.344924154025671, "grad_norm": 0.00732421875, "learning_rate": 1.3197619110800386e-05, "loss": 0.0001, "step": 17879 }, { "epoch": 8.34539089848308, "grad_norm": 0.004913330078125, "learning_rate": 1.319033432987472e-05, "loss": 0.0001, "step": 17880 }, { "epoch": 8.34585764294049, "grad_norm": 0.05126953125, "learning_rate": 1.3183051418057457e-05, "loss": 0.0014, "step": 17881 }, { "epoch": 8.3463243873979, "grad_norm": 0.033935546875, "learning_rate": 1.3175770375505448e-05, "loss": 0.0018, "step": 17882 }, { "epoch": 8.346791131855309, "grad_norm": 0.02197265625, "learning_rate": 1.316849120237541e-05, "loss": 0.0002, "step": 17883 }, { "epoch": 8.34725787631272, "grad_norm": 0.01361083984375, "learning_rate": 1.3161213898824088e-05, "loss": 0.0001, "step": 17884 }, { "epoch": 8.347724620770128, "grad_norm": 0.0087890625, "learning_rate": 1.3153938465008208e-05, "loss": 0.0002, "step": 17885 }, { "epoch": 8.348191365227539, "grad_norm": 0.0140380859375, "learning_rate": 1.3146664901084372e-05, "loss": 0.0001, "step": 17886 }, { "epoch": 8.348658109684948, "grad_norm": 0.0107421875, "learning_rate": 1.3139393207209205e-05, "loss": 0.0002, "step": 17887 }, { "epoch": 8.349124854142357, "grad_norm": 0.0093994140625, "learning_rate": 1.3132123383539297e-05, "loss": 0.0002, "step": 17888 }, { "epoch": 8.349591598599767, "grad_norm": 0.046630859375, "learning_rate": 1.3124855430231131e-05, "loss": 0.0023, "step": 17889 }, { "epoch": 8.350058343057176, "grad_norm": 0.00946044921875, "learning_rate": 1.3117589347441217e-05, "loss": 0.0001, "step": 17890 }, { "epoch": 8.350525087514585, "grad_norm": 0.01214599609375, "learning_rate": 1.311032513532603e-05, "loss": 0.0001, "step": 17891 }, { "epoch": 8.350991831971996, "grad_norm": 0.00958251953125, "learning_rate": 1.310306279404192e-05, "loss": 0.0001, "step": 17892 }, { "epoch": 8.351458576429405, "grad_norm": 0.040283203125, "learning_rate": 1.3095802323745277e-05, "loss": 0.0011, "step": 17893 }, { "epoch": 8.351925320886814, "grad_norm": 0.0439453125, "learning_rate": 1.3088543724592462e-05, "loss": 0.002, "step": 17894 }, { "epoch": 8.352392065344224, "grad_norm": 0.05029296875, "learning_rate": 1.3081286996739705e-05, "loss": 0.0031, "step": 17895 }, { "epoch": 8.352858809801633, "grad_norm": 0.033203125, "learning_rate": 1.3074032140343284e-05, "loss": 0.0002, "step": 17896 }, { "epoch": 8.353325554259044, "grad_norm": 0.003753662109375, "learning_rate": 1.3066779155559416e-05, "loss": 0.0001, "step": 17897 }, { "epoch": 8.353792298716453, "grad_norm": 0.006011962890625, "learning_rate": 1.3059528042544223e-05, "loss": 0.0001, "step": 17898 }, { "epoch": 8.354259043173862, "grad_norm": 0.006072998046875, "learning_rate": 1.3052278801453854e-05, "loss": 0.0001, "step": 17899 }, { "epoch": 8.354725787631272, "grad_norm": 0.02099609375, "learning_rate": 1.304503143244441e-05, "loss": 0.0001, "step": 17900 }, { "epoch": 8.355192532088681, "grad_norm": 0.010498046875, "learning_rate": 1.3037785935671897e-05, "loss": 0.0001, "step": 17901 }, { "epoch": 8.35565927654609, "grad_norm": 0.00836181640625, "learning_rate": 1.303054231129236e-05, "loss": 0.0001, "step": 17902 }, { "epoch": 8.3561260210035, "grad_norm": 0.00958251953125, "learning_rate": 1.3023300559461726e-05, "loss": 0.0001, "step": 17903 }, { "epoch": 8.35659276546091, "grad_norm": 0.0140380859375, "learning_rate": 1.301606068033593e-05, "loss": 0.0001, "step": 17904 }, { "epoch": 8.35705950991832, "grad_norm": 0.011474609375, "learning_rate": 1.3008822674070874e-05, "loss": 0.0002, "step": 17905 }, { "epoch": 8.35752625437573, "grad_norm": 0.00634765625, "learning_rate": 1.3001586540822374e-05, "loss": 0.0002, "step": 17906 }, { "epoch": 8.357992998833138, "grad_norm": 0.055908203125, "learning_rate": 1.2994352280746235e-05, "loss": 0.0015, "step": 17907 }, { "epoch": 8.358459743290549, "grad_norm": 0.005645751953125, "learning_rate": 1.2987119893998246e-05, "loss": 0.0001, "step": 17908 }, { "epoch": 8.358926487747958, "grad_norm": 0.0103759765625, "learning_rate": 1.297988938073411e-05, "loss": 0.0001, "step": 17909 }, { "epoch": 8.359393232205367, "grad_norm": 0.006866455078125, "learning_rate": 1.2972660741109488e-05, "loss": 0.0001, "step": 17910 }, { "epoch": 8.359859976662777, "grad_norm": 0.00537109375, "learning_rate": 1.2965433975280039e-05, "loss": 0.0001, "step": 17911 }, { "epoch": 8.360326721120186, "grad_norm": 0.00701904296875, "learning_rate": 1.2958209083401385e-05, "loss": 0.0001, "step": 17912 }, { "epoch": 8.360793465577597, "grad_norm": 0.076171875, "learning_rate": 1.2950986065629035e-05, "loss": 0.0044, "step": 17913 }, { "epoch": 8.361260210035006, "grad_norm": 0.009765625, "learning_rate": 1.2943764922118562e-05, "loss": 0.0002, "step": 17914 }, { "epoch": 8.361726954492415, "grad_norm": 0.09228515625, "learning_rate": 1.2936545653025401e-05, "loss": 0.0026, "step": 17915 }, { "epoch": 8.362193698949826, "grad_norm": 0.01116943359375, "learning_rate": 1.2929328258505013e-05, "loss": 0.0002, "step": 17916 }, { "epoch": 8.362660443407234, "grad_norm": 0.01611328125, "learning_rate": 1.2922112738712811e-05, "loss": 0.0002, "step": 17917 }, { "epoch": 8.363127187864643, "grad_norm": 0.0140380859375, "learning_rate": 1.2914899093804111e-05, "loss": 0.0002, "step": 17918 }, { "epoch": 8.363593932322054, "grad_norm": 0.07861328125, "learning_rate": 1.2907687323934259e-05, "loss": 0.0029, "step": 17919 }, { "epoch": 8.364060676779463, "grad_norm": 0.006195068359375, "learning_rate": 1.2900477429258562e-05, "loss": 0.0001, "step": 17920 }, { "epoch": 8.364527421236874, "grad_norm": 0.006103515625, "learning_rate": 1.2893269409932196e-05, "loss": 0.0001, "step": 17921 }, { "epoch": 8.364994165694283, "grad_norm": 0.0615234375, "learning_rate": 1.2886063266110382e-05, "loss": 0.0002, "step": 17922 }, { "epoch": 8.365460910151691, "grad_norm": 0.010009765625, "learning_rate": 1.2878858997948306e-05, "loss": 0.0002, "step": 17923 }, { "epoch": 8.365927654609102, "grad_norm": 0.0093994140625, "learning_rate": 1.2871656605601034e-05, "loss": 0.0002, "step": 17924 }, { "epoch": 8.366394399066511, "grad_norm": 0.007171630859375, "learning_rate": 1.2864456089223664e-05, "loss": 0.0001, "step": 17925 }, { "epoch": 8.36686114352392, "grad_norm": 0.007080078125, "learning_rate": 1.285725744897126e-05, "loss": 0.0001, "step": 17926 }, { "epoch": 8.36732788798133, "grad_norm": 0.046630859375, "learning_rate": 1.2850060684998766e-05, "loss": 0.0026, "step": 17927 }, { "epoch": 8.36779463243874, "grad_norm": 0.00921630859375, "learning_rate": 1.2842865797461157e-05, "loss": 0.0001, "step": 17928 }, { "epoch": 8.36826137689615, "grad_norm": 0.0072021484375, "learning_rate": 1.2835672786513375e-05, "loss": 0.0001, "step": 17929 }, { "epoch": 8.36872812135356, "grad_norm": 0.005462646484375, "learning_rate": 1.2828481652310253e-05, "loss": 0.0001, "step": 17930 }, { "epoch": 8.369194865810968, "grad_norm": 0.00885009765625, "learning_rate": 1.2821292395006646e-05, "loss": 0.0002, "step": 17931 }, { "epoch": 8.369661610268379, "grad_norm": 0.012939453125, "learning_rate": 1.2814105014757327e-05, "loss": 0.0002, "step": 17932 }, { "epoch": 8.370128354725788, "grad_norm": 0.009033203125, "learning_rate": 1.2806919511717064e-05, "loss": 0.0002, "step": 17933 }, { "epoch": 8.370595099183197, "grad_norm": 0.0048828125, "learning_rate": 1.2799735886040588e-05, "loss": 0.0001, "step": 17934 }, { "epoch": 8.371061843640607, "grad_norm": 0.00921630859375, "learning_rate": 1.2792554137882518e-05, "loss": 0.0002, "step": 17935 }, { "epoch": 8.371528588098016, "grad_norm": 0.0125732421875, "learning_rate": 1.2785374267397532e-05, "loss": 0.0002, "step": 17936 }, { "epoch": 8.371995332555425, "grad_norm": 0.004852294921875, "learning_rate": 1.2778196274740207e-05, "loss": 0.0001, "step": 17937 }, { "epoch": 8.372462077012836, "grad_norm": 0.00799560546875, "learning_rate": 1.2771020160065083e-05, "loss": 0.0001, "step": 17938 }, { "epoch": 8.372928821470245, "grad_norm": 0.01171875, "learning_rate": 1.2763845923526674e-05, "loss": 0.0002, "step": 17939 }, { "epoch": 8.373395565927655, "grad_norm": 0.0130615234375, "learning_rate": 1.2756673565279464e-05, "loss": 0.0002, "step": 17940 }, { "epoch": 8.373862310385064, "grad_norm": 0.006744384765625, "learning_rate": 1.2749503085477864e-05, "loss": 0.0001, "step": 17941 }, { "epoch": 8.374329054842473, "grad_norm": 0.006195068359375, "learning_rate": 1.2742334484276264e-05, "loss": 0.0001, "step": 17942 }, { "epoch": 8.374795799299884, "grad_norm": 0.017578125, "learning_rate": 1.2735167761829037e-05, "loss": 0.0002, "step": 17943 }, { "epoch": 8.375262543757293, "grad_norm": 0.01318359375, "learning_rate": 1.2728002918290439e-05, "loss": 0.0001, "step": 17944 }, { "epoch": 8.375729288214702, "grad_norm": 0.007781982421875, "learning_rate": 1.2720839953814778e-05, "loss": 0.0001, "step": 17945 }, { "epoch": 8.376196032672112, "grad_norm": 0.0162353515625, "learning_rate": 1.271367886855629e-05, "loss": 0.0001, "step": 17946 }, { "epoch": 8.376662777129521, "grad_norm": 0.040283203125, "learning_rate": 1.2706519662669124e-05, "loss": 0.0022, "step": 17947 }, { "epoch": 8.377129521586932, "grad_norm": 0.006195068359375, "learning_rate": 1.269936233630744e-05, "loss": 0.0001, "step": 17948 }, { "epoch": 8.37759626604434, "grad_norm": 0.04638671875, "learning_rate": 1.2692206889625369e-05, "loss": 0.0013, "step": 17949 }, { "epoch": 8.37806301050175, "grad_norm": 0.007568359375, "learning_rate": 1.2685053322776929e-05, "loss": 0.0001, "step": 17950 }, { "epoch": 8.37852975495916, "grad_norm": 0.01287841796875, "learning_rate": 1.267790163591619e-05, "loss": 0.0002, "step": 17951 }, { "epoch": 8.37899649941657, "grad_norm": 0.00787353515625, "learning_rate": 1.2670751829197103e-05, "loss": 0.0001, "step": 17952 }, { "epoch": 8.379463243873978, "grad_norm": 0.017578125, "learning_rate": 1.2663603902773612e-05, "loss": 0.0002, "step": 17953 }, { "epoch": 8.379929988331389, "grad_norm": 0.041748046875, "learning_rate": 1.2656457856799653e-05, "loss": 0.0015, "step": 17954 }, { "epoch": 8.380396732788798, "grad_norm": 0.01007080078125, "learning_rate": 1.2649313691429042e-05, "loss": 0.0001, "step": 17955 }, { "epoch": 8.380863477246209, "grad_norm": 0.006256103515625, "learning_rate": 1.264217140681565e-05, "loss": 0.0001, "step": 17956 }, { "epoch": 8.381330221703617, "grad_norm": 0.0123291015625, "learning_rate": 1.2635031003113219e-05, "loss": 0.0002, "step": 17957 }, { "epoch": 8.381796966161026, "grad_norm": 0.024658203125, "learning_rate": 1.2627892480475512e-05, "loss": 0.0002, "step": 17958 }, { "epoch": 8.382263710618437, "grad_norm": 0.00830078125, "learning_rate": 1.2620755839056209e-05, "loss": 0.0001, "step": 17959 }, { "epoch": 8.382730455075846, "grad_norm": 0.006927490234375, "learning_rate": 1.2613621079008975e-05, "loss": 0.0001, "step": 17960 }, { "epoch": 8.383197199533255, "grad_norm": 0.0103759765625, "learning_rate": 1.2606488200487465e-05, "loss": 0.0002, "step": 17961 }, { "epoch": 8.383663943990666, "grad_norm": 0.01251220703125, "learning_rate": 1.2599357203645202e-05, "loss": 0.0002, "step": 17962 }, { "epoch": 8.384130688448074, "grad_norm": 0.023681640625, "learning_rate": 1.2592228088635772e-05, "loss": 0.002, "step": 17963 }, { "epoch": 8.384597432905485, "grad_norm": 0.024658203125, "learning_rate": 1.2585100855612631e-05, "loss": 0.0002, "step": 17964 }, { "epoch": 8.385064177362894, "grad_norm": 0.037353515625, "learning_rate": 1.2577975504729256e-05, "loss": 0.0018, "step": 17965 }, { "epoch": 8.385530921820303, "grad_norm": 0.0098876953125, "learning_rate": 1.257085203613908e-05, "loss": 0.0001, "step": 17966 }, { "epoch": 8.385997666277714, "grad_norm": 0.006591796875, "learning_rate": 1.2563730449995459e-05, "loss": 0.0002, "step": 17967 }, { "epoch": 8.386464410735122, "grad_norm": 0.0439453125, "learning_rate": 1.2556610746451725e-05, "loss": 0.0003, "step": 17968 }, { "epoch": 8.386931155192531, "grad_norm": 0.02880859375, "learning_rate": 1.2549492925661199e-05, "loss": 0.0015, "step": 17969 }, { "epoch": 8.387397899649942, "grad_norm": 0.02685546875, "learning_rate": 1.2542376987777105e-05, "loss": 0.0023, "step": 17970 }, { "epoch": 8.387864644107351, "grad_norm": 0.01165771484375, "learning_rate": 1.2535262932952662e-05, "loss": 0.0001, "step": 17971 }, { "epoch": 8.388331388564762, "grad_norm": 0.00543212890625, "learning_rate": 1.2528150761341073e-05, "loss": 0.0001, "step": 17972 }, { "epoch": 8.38879813302217, "grad_norm": 0.04736328125, "learning_rate": 1.2521040473095435e-05, "loss": 0.0002, "step": 17973 }, { "epoch": 8.38926487747958, "grad_norm": 0.00909423828125, "learning_rate": 1.251393206836885e-05, "loss": 0.0001, "step": 17974 }, { "epoch": 8.38973162193699, "grad_norm": 0.02783203125, "learning_rate": 1.2506825547314405e-05, "loss": 0.002, "step": 17975 }, { "epoch": 8.390198366394399, "grad_norm": 0.01239013671875, "learning_rate": 1.2499720910085055e-05, "loss": 0.0001, "step": 17976 }, { "epoch": 8.390665110851808, "grad_norm": 0.0064697265625, "learning_rate": 1.249261815683379e-05, "loss": 0.0001, "step": 17977 }, { "epoch": 8.391131855309219, "grad_norm": 0.004058837890625, "learning_rate": 1.2485517287713578e-05, "loss": 0.0001, "step": 17978 }, { "epoch": 8.391598599766628, "grad_norm": 0.037841796875, "learning_rate": 1.2478418302877259e-05, "loss": 0.0002, "step": 17979 }, { "epoch": 8.392065344224036, "grad_norm": 0.00946044921875, "learning_rate": 1.2471321202477693e-05, "loss": 0.0001, "step": 17980 }, { "epoch": 8.392532088681447, "grad_norm": 0.007293701171875, "learning_rate": 1.2464225986667721e-05, "loss": 0.0001, "step": 17981 }, { "epoch": 8.392998833138856, "grad_norm": 0.41796875, "learning_rate": 1.2457132655600056e-05, "loss": 0.0006, "step": 17982 }, { "epoch": 8.393465577596267, "grad_norm": 0.045654296875, "learning_rate": 1.2450041209427488e-05, "loss": 0.0018, "step": 17983 }, { "epoch": 8.393932322053676, "grad_norm": 0.01019287109375, "learning_rate": 1.2442951648302648e-05, "loss": 0.0002, "step": 17984 }, { "epoch": 8.394399066511085, "grad_norm": 0.00823974609375, "learning_rate": 1.2435863972378204e-05, "loss": 0.0001, "step": 17985 }, { "epoch": 8.394865810968495, "grad_norm": 0.01531982421875, "learning_rate": 1.2428778181806788e-05, "loss": 0.0002, "step": 17986 }, { "epoch": 8.395332555425904, "grad_norm": 0.007781982421875, "learning_rate": 1.2421694276740914e-05, "loss": 0.0002, "step": 17987 }, { "epoch": 8.395799299883313, "grad_norm": 0.005401611328125, "learning_rate": 1.2414612257333125e-05, "loss": 0.0001, "step": 17988 }, { "epoch": 8.396266044340724, "grad_norm": 0.009521484375, "learning_rate": 1.2407532123735944e-05, "loss": 0.0001, "step": 17989 }, { "epoch": 8.396732788798133, "grad_norm": 0.009521484375, "learning_rate": 1.2400453876101758e-05, "loss": 0.0001, "step": 17990 }, { "epoch": 8.397199533255543, "grad_norm": 0.00726318359375, "learning_rate": 1.2393377514582993e-05, "loss": 0.0001, "step": 17991 }, { "epoch": 8.397666277712952, "grad_norm": 0.039306640625, "learning_rate": 1.2386303039332026e-05, "loss": 0.0003, "step": 17992 }, { "epoch": 8.398133022170361, "grad_norm": 0.03662109375, "learning_rate": 1.2379230450501145e-05, "loss": 0.0018, "step": 17993 }, { "epoch": 8.398599766627772, "grad_norm": 0.01312255859375, "learning_rate": 1.237215974824265e-05, "loss": 0.0002, "step": 17994 }, { "epoch": 8.39906651108518, "grad_norm": 0.00872802734375, "learning_rate": 1.2365090932708811e-05, "loss": 0.0001, "step": 17995 }, { "epoch": 8.39953325554259, "grad_norm": 0.00482177734375, "learning_rate": 1.2358024004051772e-05, "loss": 0.0001, "step": 17996 }, { "epoch": 8.4, "grad_norm": 0.01275634765625, "learning_rate": 1.235095896242371e-05, "loss": 0.0001, "step": 17997 }, { "epoch": 8.40046674445741, "grad_norm": 0.01287841796875, "learning_rate": 1.2343895807976768e-05, "loss": 0.0002, "step": 17998 }, { "epoch": 8.40093348891482, "grad_norm": 0.0301513671875, "learning_rate": 1.2336834540862997e-05, "loss": 0.0002, "step": 17999 }, { "epoch": 8.401400233372229, "grad_norm": 0.08203125, "learning_rate": 1.2329775161234458e-05, "loss": 0.006, "step": 18000 }, { "epoch": 8.401866977829638, "grad_norm": 0.006591796875, "learning_rate": 1.2322717669243111e-05, "loss": 0.0002, "step": 18001 }, { "epoch": 8.402333722287048, "grad_norm": 0.02294921875, "learning_rate": 1.2315662065040923e-05, "loss": 0.0002, "step": 18002 }, { "epoch": 8.402800466744457, "grad_norm": 0.0235595703125, "learning_rate": 1.230860834877985e-05, "loss": 0.0002, "step": 18003 }, { "epoch": 8.403267211201866, "grad_norm": 0.006378173828125, "learning_rate": 1.2301556520611734e-05, "loss": 0.0001, "step": 18004 }, { "epoch": 8.403733955659277, "grad_norm": 0.011474609375, "learning_rate": 1.229450658068838e-05, "loss": 0.0001, "step": 18005 }, { "epoch": 8.404200700116686, "grad_norm": 0.00860595703125, "learning_rate": 1.2287458529161622e-05, "loss": 0.0002, "step": 18006 }, { "epoch": 8.404667444574097, "grad_norm": 0.0311279296875, "learning_rate": 1.2280412366183214e-05, "loss": 0.002, "step": 18007 }, { "epoch": 8.405134189031505, "grad_norm": 0.044677734375, "learning_rate": 1.2273368091904836e-05, "loss": 0.002, "step": 18008 }, { "epoch": 8.405600933488914, "grad_norm": 0.004913330078125, "learning_rate": 1.2266325706478177e-05, "loss": 0.0001, "step": 18009 }, { "epoch": 8.406067677946325, "grad_norm": 0.03515625, "learning_rate": 1.2259285210054883e-05, "loss": 0.0002, "step": 18010 }, { "epoch": 8.406534422403734, "grad_norm": 0.00628662109375, "learning_rate": 1.225224660278651e-05, "loss": 0.0001, "step": 18011 }, { "epoch": 8.407001166861143, "grad_norm": 0.01031494140625, "learning_rate": 1.224520988482465e-05, "loss": 0.0001, "step": 18012 }, { "epoch": 8.407467911318554, "grad_norm": 0.0050048828125, "learning_rate": 1.2238175056320766e-05, "loss": 0.0001, "step": 18013 }, { "epoch": 8.407934655775962, "grad_norm": 0.10791015625, "learning_rate": 1.2231142117426341e-05, "loss": 0.0003, "step": 18014 }, { "epoch": 8.408401400233373, "grad_norm": 0.006927490234375, "learning_rate": 1.2224111068292831e-05, "loss": 0.0001, "step": 18015 }, { "epoch": 8.408868144690782, "grad_norm": 0.00762939453125, "learning_rate": 1.2217081909071582e-05, "loss": 0.0001, "step": 18016 }, { "epoch": 8.409334889148191, "grad_norm": 0.0084228515625, "learning_rate": 1.221005463991396e-05, "loss": 0.0002, "step": 18017 }, { "epoch": 8.409801633605602, "grad_norm": 0.005401611328125, "learning_rate": 1.2203029260971277e-05, "loss": 0.0001, "step": 18018 }, { "epoch": 8.41026837806301, "grad_norm": 0.005859375, "learning_rate": 1.2196005772394769e-05, "loss": 0.0001, "step": 18019 }, { "epoch": 8.41073512252042, "grad_norm": 0.024169921875, "learning_rate": 1.2188984174335672e-05, "loss": 0.0002, "step": 18020 }, { "epoch": 8.41120186697783, "grad_norm": 0.005523681640625, "learning_rate": 1.2181964466945206e-05, "loss": 0.0001, "step": 18021 }, { "epoch": 8.411668611435239, "grad_norm": 0.007232666015625, "learning_rate": 1.2174946650374452e-05, "loss": 0.0001, "step": 18022 }, { "epoch": 8.412135355892648, "grad_norm": 0.037353515625, "learning_rate": 1.2167930724774546e-05, "loss": 0.0002, "step": 18023 }, { "epoch": 8.412602100350059, "grad_norm": 0.007049560546875, "learning_rate": 1.2160916690296554e-05, "loss": 0.0001, "step": 18024 }, { "epoch": 8.413068844807468, "grad_norm": 0.0791015625, "learning_rate": 1.2153904547091465e-05, "loss": 0.0051, "step": 18025 }, { "epoch": 8.413535589264878, "grad_norm": 0.0281982421875, "learning_rate": 1.2146894295310284e-05, "loss": 0.001, "step": 18026 }, { "epoch": 8.414002333722287, "grad_norm": 0.01092529296875, "learning_rate": 1.2139885935103968e-05, "loss": 0.0002, "step": 18027 }, { "epoch": 8.414469078179696, "grad_norm": 0.050537109375, "learning_rate": 1.2132879466623359e-05, "loss": 0.0015, "step": 18028 }, { "epoch": 8.414935822637107, "grad_norm": 0.018798828125, "learning_rate": 1.212587489001935e-05, "loss": 0.0002, "step": 18029 }, { "epoch": 8.415402567094516, "grad_norm": 0.00604248046875, "learning_rate": 1.2118872205442778e-05, "loss": 0.0001, "step": 18030 }, { "epoch": 8.415869311551925, "grad_norm": 0.005645751953125, "learning_rate": 1.2111871413044374e-05, "loss": 0.0001, "step": 18031 }, { "epoch": 8.416336056009335, "grad_norm": 0.004852294921875, "learning_rate": 1.2104872512974908e-05, "loss": 0.0001, "step": 18032 }, { "epoch": 8.416802800466744, "grad_norm": 0.01141357421875, "learning_rate": 1.2097875505385025e-05, "loss": 0.0002, "step": 18033 }, { "epoch": 8.417269544924155, "grad_norm": 0.005401611328125, "learning_rate": 1.2090880390425429e-05, "loss": 0.0001, "step": 18034 }, { "epoch": 8.417736289381564, "grad_norm": 0.04833984375, "learning_rate": 1.2083887168246733e-05, "loss": 0.0018, "step": 18035 }, { "epoch": 8.418203033838973, "grad_norm": 0.0106201171875, "learning_rate": 1.2076895838999469e-05, "loss": 0.0002, "step": 18036 }, { "epoch": 8.418669778296383, "grad_norm": 0.06591796875, "learning_rate": 1.2069906402834186e-05, "loss": 0.0048, "step": 18037 }, { "epoch": 8.419136522753792, "grad_norm": 0.01092529296875, "learning_rate": 1.2062918859901396e-05, "loss": 0.0001, "step": 18038 }, { "epoch": 8.419603267211201, "grad_norm": 0.043701171875, "learning_rate": 1.2055933210351523e-05, "loss": 0.002, "step": 18039 }, { "epoch": 8.420070011668612, "grad_norm": 0.048828125, "learning_rate": 1.2048949454334968e-05, "loss": 0.0002, "step": 18040 }, { "epoch": 8.42053675612602, "grad_norm": 0.0419921875, "learning_rate": 1.2041967592002146e-05, "loss": 0.0031, "step": 18041 }, { "epoch": 8.421003500583431, "grad_norm": 0.058837890625, "learning_rate": 1.2034987623503324e-05, "loss": 0.003, "step": 18042 }, { "epoch": 8.42147024504084, "grad_norm": 0.004302978515625, "learning_rate": 1.2028009548988828e-05, "loss": 0.0001, "step": 18043 }, { "epoch": 8.42193698949825, "grad_norm": 0.0089111328125, "learning_rate": 1.2021033368608902e-05, "loss": 0.0002, "step": 18044 }, { "epoch": 8.42240373395566, "grad_norm": 0.0172119140625, "learning_rate": 1.2014059082513728e-05, "loss": 0.0002, "step": 18045 }, { "epoch": 8.422870478413069, "grad_norm": 0.0072021484375, "learning_rate": 1.2007086690853486e-05, "loss": 0.0001, "step": 18046 }, { "epoch": 8.423337222870478, "grad_norm": 0.007049560546875, "learning_rate": 1.2000116193778322e-05, "loss": 0.0002, "step": 18047 }, { "epoch": 8.423803967327888, "grad_norm": 0.00823974609375, "learning_rate": 1.1993147591438259e-05, "loss": 0.0001, "step": 18048 }, { "epoch": 8.424270711785297, "grad_norm": 0.036376953125, "learning_rate": 1.1986180883983412e-05, "loss": 0.001, "step": 18049 }, { "epoch": 8.424737456242708, "grad_norm": 0.011474609375, "learning_rate": 1.1979216071563704e-05, "loss": 0.0002, "step": 18050 }, { "epoch": 8.425204200700117, "grad_norm": 0.0087890625, "learning_rate": 1.1972253154329171e-05, "loss": 0.0001, "step": 18051 }, { "epoch": 8.425670945157526, "grad_norm": 0.034912109375, "learning_rate": 1.1965292132429661e-05, "loss": 0.0026, "step": 18052 }, { "epoch": 8.426137689614936, "grad_norm": 0.00616455078125, "learning_rate": 1.195833300601512e-05, "loss": 0.0001, "step": 18053 }, { "epoch": 8.426604434072345, "grad_norm": 0.0107421875, "learning_rate": 1.195137577523533e-05, "loss": 0.0002, "step": 18054 }, { "epoch": 8.427071178529754, "grad_norm": 0.0712890625, "learning_rate": 1.1944420440240111e-05, "loss": 0.0048, "step": 18055 }, { "epoch": 8.427537922987165, "grad_norm": 0.00860595703125, "learning_rate": 1.1937467001179236e-05, "loss": 0.0002, "step": 18056 }, { "epoch": 8.428004667444574, "grad_norm": 0.00604248046875, "learning_rate": 1.1930515458202384e-05, "loss": 0.0001, "step": 18057 }, { "epoch": 8.428471411901985, "grad_norm": 0.0198974609375, "learning_rate": 1.1923565811459259e-05, "loss": 0.0002, "step": 18058 }, { "epoch": 8.428938156359393, "grad_norm": 0.03759765625, "learning_rate": 1.1916618061099493e-05, "loss": 0.0002, "step": 18059 }, { "epoch": 8.429404900816802, "grad_norm": 0.00811767578125, "learning_rate": 1.1909672207272649e-05, "loss": 0.0001, "step": 18060 }, { "epoch": 8.429871645274213, "grad_norm": 0.01123046875, "learning_rate": 1.1902728250128325e-05, "loss": 0.0002, "step": 18061 }, { "epoch": 8.430338389731622, "grad_norm": 0.0169677734375, "learning_rate": 1.1895786189815995e-05, "loss": 0.0002, "step": 18062 }, { "epoch": 8.43080513418903, "grad_norm": 0.060791015625, "learning_rate": 1.1888846026485134e-05, "loss": 0.0011, "step": 18063 }, { "epoch": 8.431271878646442, "grad_norm": 0.00787353515625, "learning_rate": 1.1881907760285194e-05, "loss": 0.0001, "step": 18064 }, { "epoch": 8.43173862310385, "grad_norm": 0.00921630859375, "learning_rate": 1.187497139136553e-05, "loss": 0.0001, "step": 18065 }, { "epoch": 8.43220536756126, "grad_norm": 0.01611328125, "learning_rate": 1.1868036919875513e-05, "loss": 0.0001, "step": 18066 }, { "epoch": 8.43267211201867, "grad_norm": 0.032958984375, "learning_rate": 1.186110434596447e-05, "loss": 0.0002, "step": 18067 }, { "epoch": 8.433138856476079, "grad_norm": 0.01544189453125, "learning_rate": 1.185417366978162e-05, "loss": 0.0002, "step": 18068 }, { "epoch": 8.43360560093349, "grad_norm": 0.00579833984375, "learning_rate": 1.1847244891476206e-05, "loss": 0.0001, "step": 18069 }, { "epoch": 8.434072345390899, "grad_norm": 0.0101318359375, "learning_rate": 1.1840318011197438e-05, "loss": 0.0002, "step": 18070 }, { "epoch": 8.434539089848307, "grad_norm": 0.0186767578125, "learning_rate": 1.183339302909442e-05, "loss": 0.0002, "step": 18071 }, { "epoch": 8.435005834305718, "grad_norm": 0.0751953125, "learning_rate": 1.1826469945316265e-05, "loss": 0.0018, "step": 18072 }, { "epoch": 8.435472578763127, "grad_norm": 0.01019287109375, "learning_rate": 1.1819548760012068e-05, "loss": 0.0001, "step": 18073 }, { "epoch": 8.435939323220536, "grad_norm": 0.0030364990234375, "learning_rate": 1.1812629473330805e-05, "loss": 0.0001, "step": 18074 }, { "epoch": 8.436406067677947, "grad_norm": 0.0042724609375, "learning_rate": 1.1805712085421473e-05, "loss": 0.0001, "step": 18075 }, { "epoch": 8.436872812135356, "grad_norm": 0.004791259765625, "learning_rate": 1.1798796596433037e-05, "loss": 0.0001, "step": 18076 }, { "epoch": 8.437339556592766, "grad_norm": 0.0274658203125, "learning_rate": 1.1791883006514349e-05, "loss": 0.0001, "step": 18077 }, { "epoch": 8.437806301050175, "grad_norm": 0.00970458984375, "learning_rate": 1.1784971315814287e-05, "loss": 0.0001, "step": 18078 }, { "epoch": 8.438273045507584, "grad_norm": 0.007720947265625, "learning_rate": 1.1778061524481688e-05, "loss": 0.0001, "step": 18079 }, { "epoch": 8.438739789964995, "grad_norm": 0.006195068359375, "learning_rate": 1.1771153632665288e-05, "loss": 0.0001, "step": 18080 }, { "epoch": 8.439206534422404, "grad_norm": 0.00946044921875, "learning_rate": 1.1764247640513858e-05, "loss": 0.0002, "step": 18081 }, { "epoch": 8.439673278879813, "grad_norm": 0.02978515625, "learning_rate": 1.175734354817607e-05, "loss": 0.0019, "step": 18082 }, { "epoch": 8.440140023337223, "grad_norm": 0.0106201171875, "learning_rate": 1.1750441355800568e-05, "loss": 0.0002, "step": 18083 }, { "epoch": 8.440606767794632, "grad_norm": 0.07080078125, "learning_rate": 1.1743541063536001e-05, "loss": 0.0002, "step": 18084 }, { "epoch": 8.441073512252043, "grad_norm": 0.00885009765625, "learning_rate": 1.1736642671530906e-05, "loss": 0.0001, "step": 18085 }, { "epoch": 8.441540256709452, "grad_norm": 0.007415771484375, "learning_rate": 1.172974617993381e-05, "loss": 0.0001, "step": 18086 }, { "epoch": 8.44200700116686, "grad_norm": 0.007537841796875, "learning_rate": 1.1722851588893246e-05, "loss": 0.0002, "step": 18087 }, { "epoch": 8.442473745624271, "grad_norm": 0.0166015625, "learning_rate": 1.171595889855761e-05, "loss": 0.0003, "step": 18088 }, { "epoch": 8.44294049008168, "grad_norm": 0.0252685546875, "learning_rate": 1.1709068109075338e-05, "loss": 0.0021, "step": 18089 }, { "epoch": 8.44340723453909, "grad_norm": 0.054931640625, "learning_rate": 1.17021792205948e-05, "loss": 0.003, "step": 18090 }, { "epoch": 8.4438739789965, "grad_norm": 0.00872802734375, "learning_rate": 1.16952922332643e-05, "loss": 0.0001, "step": 18091 }, { "epoch": 8.444340723453909, "grad_norm": 0.005523681640625, "learning_rate": 1.1688407147232128e-05, "loss": 0.0001, "step": 18092 }, { "epoch": 8.444807467911318, "grad_norm": 0.03857421875, "learning_rate": 1.1681523962646557e-05, "loss": 0.0026, "step": 18093 }, { "epoch": 8.445274212368728, "grad_norm": 0.06396484375, "learning_rate": 1.1674642679655745e-05, "loss": 0.0021, "step": 18094 }, { "epoch": 8.445740956826137, "grad_norm": 0.054931640625, "learning_rate": 1.1667763298407875e-05, "loss": 0.0002, "step": 18095 }, { "epoch": 8.446207701283548, "grad_norm": 0.00909423828125, "learning_rate": 1.1660885819051081e-05, "loss": 0.0001, "step": 18096 }, { "epoch": 8.446674445740957, "grad_norm": 0.0751953125, "learning_rate": 1.1654010241733414e-05, "loss": 0.0019, "step": 18097 }, { "epoch": 8.447141190198366, "grad_norm": 0.00494384765625, "learning_rate": 1.1647136566602935e-05, "loss": 0.0001, "step": 18098 }, { "epoch": 8.447607934655776, "grad_norm": 0.00885009765625, "learning_rate": 1.1640264793807676e-05, "loss": 0.0002, "step": 18099 }, { "epoch": 8.448074679113185, "grad_norm": 0.09814453125, "learning_rate": 1.16333949234955e-05, "loss": 0.003, "step": 18100 }, { "epoch": 8.448541423570596, "grad_norm": 0.01336669921875, "learning_rate": 1.1626526955814376e-05, "loss": 0.0001, "step": 18101 }, { "epoch": 8.449008168028005, "grad_norm": 0.0341796875, "learning_rate": 1.1619660890912209e-05, "loss": 0.0017, "step": 18102 }, { "epoch": 8.449474912485414, "grad_norm": 0.014404296875, "learning_rate": 1.161279672893678e-05, "loss": 0.0002, "step": 18103 }, { "epoch": 8.449941656942825, "grad_norm": 0.0086669921875, "learning_rate": 1.1605934470035906e-05, "loss": 0.0001, "step": 18104 }, { "epoch": 8.450408401400233, "grad_norm": 0.035400390625, "learning_rate": 1.1599074114357355e-05, "loss": 0.0023, "step": 18105 }, { "epoch": 8.450875145857642, "grad_norm": 0.043701171875, "learning_rate": 1.15922156620488e-05, "loss": 0.0042, "step": 18106 }, { "epoch": 8.451341890315053, "grad_norm": 0.006988525390625, "learning_rate": 1.1585359113257931e-05, "loss": 0.0001, "step": 18107 }, { "epoch": 8.451808634772462, "grad_norm": 0.0361328125, "learning_rate": 1.1578504468132401e-05, "loss": 0.0002, "step": 18108 }, { "epoch": 8.45227537922987, "grad_norm": 0.01055908203125, "learning_rate": 1.1571651726819755e-05, "loss": 0.0001, "step": 18109 }, { "epoch": 8.452742123687282, "grad_norm": 0.01007080078125, "learning_rate": 1.1564800889467553e-05, "loss": 0.0002, "step": 18110 }, { "epoch": 8.45320886814469, "grad_norm": 0.004425048828125, "learning_rate": 1.1557951956223334e-05, "loss": 0.0001, "step": 18111 }, { "epoch": 8.453675612602101, "grad_norm": 0.00787353515625, "learning_rate": 1.1551104927234524e-05, "loss": 0.0001, "step": 18112 }, { "epoch": 8.45414235705951, "grad_norm": 0.01312255859375, "learning_rate": 1.1544259802648572e-05, "loss": 0.0002, "step": 18113 }, { "epoch": 8.454609101516919, "grad_norm": 0.00927734375, "learning_rate": 1.1537416582612825e-05, "loss": 0.0002, "step": 18114 }, { "epoch": 8.45507584597433, "grad_norm": 0.007720947265625, "learning_rate": 1.1530575267274658e-05, "loss": 0.0001, "step": 18115 }, { "epoch": 8.455542590431739, "grad_norm": 0.02587890625, "learning_rate": 1.1523735856781392e-05, "loss": 0.0002, "step": 18116 }, { "epoch": 8.456009334889147, "grad_norm": 0.0205078125, "learning_rate": 1.1516898351280226e-05, "loss": 0.0002, "step": 18117 }, { "epoch": 8.456476079346558, "grad_norm": 0.00958251953125, "learning_rate": 1.1510062750918416e-05, "loss": 0.0001, "step": 18118 }, { "epoch": 8.456942823803967, "grad_norm": 0.0247802734375, "learning_rate": 1.1503229055843157e-05, "loss": 0.0019, "step": 18119 }, { "epoch": 8.457409568261378, "grad_norm": 0.008056640625, "learning_rate": 1.1496397266201541e-05, "loss": 0.0002, "step": 18120 }, { "epoch": 8.457876312718787, "grad_norm": 0.0257568359375, "learning_rate": 1.1489567382140699e-05, "loss": 0.0004, "step": 18121 }, { "epoch": 8.458343057176196, "grad_norm": 0.00604248046875, "learning_rate": 1.1482739403807685e-05, "loss": 0.0001, "step": 18122 }, { "epoch": 8.458809801633606, "grad_norm": 0.0096435546875, "learning_rate": 1.1475913331349486e-05, "loss": 0.0001, "step": 18123 }, { "epoch": 8.459276546091015, "grad_norm": 0.030517578125, "learning_rate": 1.1469089164913093e-05, "loss": 0.0025, "step": 18124 }, { "epoch": 8.459743290548424, "grad_norm": 0.013671875, "learning_rate": 1.1462266904645457e-05, "loss": 0.0002, "step": 18125 }, { "epoch": 8.460210035005835, "grad_norm": 0.01544189453125, "learning_rate": 1.1455446550693427e-05, "loss": 0.0002, "step": 18126 }, { "epoch": 8.460676779463244, "grad_norm": 0.052001953125, "learning_rate": 1.1448628103203884e-05, "loss": 0.0018, "step": 18127 }, { "epoch": 8.461143523920654, "grad_norm": 0.014892578125, "learning_rate": 1.1441811562323635e-05, "loss": 0.0001, "step": 18128 }, { "epoch": 8.461610268378063, "grad_norm": 0.04833984375, "learning_rate": 1.1434996928199426e-05, "loss": 0.0002, "step": 18129 }, { "epoch": 8.462077012835472, "grad_norm": 0.00836181640625, "learning_rate": 1.1428184200978021e-05, "loss": 0.0001, "step": 18130 }, { "epoch": 8.462543757292883, "grad_norm": 0.0458984375, "learning_rate": 1.1421373380806055e-05, "loss": 0.0007, "step": 18131 }, { "epoch": 8.463010501750292, "grad_norm": 0.02099609375, "learning_rate": 1.1414564467830202e-05, "loss": 0.0002, "step": 18132 }, { "epoch": 8.4634772462077, "grad_norm": 0.07861328125, "learning_rate": 1.1407757462197077e-05, "loss": 0.0021, "step": 18133 }, { "epoch": 8.463943990665111, "grad_norm": 0.00958251953125, "learning_rate": 1.140095236405322e-05, "loss": 0.0001, "step": 18134 }, { "epoch": 8.46441073512252, "grad_norm": 0.01129150390625, "learning_rate": 1.1394149173545154e-05, "loss": 0.0001, "step": 18135 }, { "epoch": 8.464877479579929, "grad_norm": 0.02978515625, "learning_rate": 1.138734789081939e-05, "loss": 0.002, "step": 18136 }, { "epoch": 8.46534422403734, "grad_norm": 0.009521484375, "learning_rate": 1.1380548516022326e-05, "loss": 0.0001, "step": 18137 }, { "epoch": 8.465810968494749, "grad_norm": 0.01080322265625, "learning_rate": 1.1373751049300374e-05, "loss": 0.0001, "step": 18138 }, { "epoch": 8.46627771295216, "grad_norm": 0.036376953125, "learning_rate": 1.1366955490799912e-05, "loss": 0.0018, "step": 18139 }, { "epoch": 8.466744457409568, "grad_norm": 0.00897216796875, "learning_rate": 1.1360161840667217e-05, "loss": 0.0002, "step": 18140 }, { "epoch": 8.467211201866977, "grad_norm": 0.0093994140625, "learning_rate": 1.1353370099048589e-05, "loss": 0.0001, "step": 18141 }, { "epoch": 8.467677946324388, "grad_norm": 0.056640625, "learning_rate": 1.1346580266090279e-05, "loss": 0.0023, "step": 18142 }, { "epoch": 8.468144690781797, "grad_norm": 0.0115966796875, "learning_rate": 1.1339792341938438e-05, "loss": 0.0001, "step": 18143 }, { "epoch": 8.468611435239206, "grad_norm": 0.0089111328125, "learning_rate": 1.1333006326739237e-05, "loss": 0.0001, "step": 18144 }, { "epoch": 8.469078179696616, "grad_norm": 0.00439453125, "learning_rate": 1.1326222220638805e-05, "loss": 0.0001, "step": 18145 }, { "epoch": 8.469544924154025, "grad_norm": 0.01953125, "learning_rate": 1.1319440023783179e-05, "loss": 0.0002, "step": 18146 }, { "epoch": 8.470011668611436, "grad_norm": 0.00848388671875, "learning_rate": 1.131265973631842e-05, "loss": 0.0001, "step": 18147 }, { "epoch": 8.470478413068845, "grad_norm": 0.016845703125, "learning_rate": 1.1305881358390491e-05, "loss": 0.0002, "step": 18148 }, { "epoch": 8.470945157526254, "grad_norm": 0.01287841796875, "learning_rate": 1.129910489014534e-05, "loss": 0.0002, "step": 18149 }, { "epoch": 8.471411901983664, "grad_norm": 0.004852294921875, "learning_rate": 1.1292330331728862e-05, "loss": 0.0001, "step": 18150 }, { "epoch": 8.471878646441073, "grad_norm": 0.007415771484375, "learning_rate": 1.1285557683286962e-05, "loss": 0.0001, "step": 18151 }, { "epoch": 8.472345390898482, "grad_norm": 0.0174560546875, "learning_rate": 1.1278786944965402e-05, "loss": 0.0002, "step": 18152 }, { "epoch": 8.472812135355893, "grad_norm": 0.01190185546875, "learning_rate": 1.127201811691001e-05, "loss": 0.0001, "step": 18153 }, { "epoch": 8.473278879813302, "grad_norm": 0.0260009765625, "learning_rate": 1.1265251199266535e-05, "loss": 0.0005, "step": 18154 }, { "epoch": 8.473745624270713, "grad_norm": 0.0498046875, "learning_rate": 1.1258486192180628e-05, "loss": 0.0035, "step": 18155 }, { "epoch": 8.474212368728121, "grad_norm": 0.006103515625, "learning_rate": 1.1251723095797972e-05, "loss": 0.0001, "step": 18156 }, { "epoch": 8.47467911318553, "grad_norm": 0.01708984375, "learning_rate": 1.1244961910264218e-05, "loss": 0.0002, "step": 18157 }, { "epoch": 8.475145857642941, "grad_norm": 0.0291748046875, "learning_rate": 1.123820263572487e-05, "loss": 0.0002, "step": 18158 }, { "epoch": 8.47561260210035, "grad_norm": 0.006744384765625, "learning_rate": 1.1231445272325514e-05, "loss": 0.0002, "step": 18159 }, { "epoch": 8.476079346557759, "grad_norm": 0.04541015625, "learning_rate": 1.1224689820211653e-05, "loss": 0.0022, "step": 18160 }, { "epoch": 8.47654609101517, "grad_norm": 0.005218505859375, "learning_rate": 1.1217936279528696e-05, "loss": 0.0001, "step": 18161 }, { "epoch": 8.477012835472578, "grad_norm": 0.019287109375, "learning_rate": 1.1211184650422102e-05, "loss": 0.0002, "step": 18162 }, { "epoch": 8.47747957992999, "grad_norm": 0.034423828125, "learning_rate": 1.1204434933037188e-05, "loss": 0.0004, "step": 18163 }, { "epoch": 8.477946324387398, "grad_norm": 0.00555419921875, "learning_rate": 1.1197687127519307e-05, "loss": 0.0001, "step": 18164 }, { "epoch": 8.478413068844807, "grad_norm": 0.0123291015625, "learning_rate": 1.1190941234013774e-05, "loss": 0.0002, "step": 18165 }, { "epoch": 8.478879813302218, "grad_norm": 0.00897216796875, "learning_rate": 1.1184197252665795e-05, "loss": 0.0002, "step": 18166 }, { "epoch": 8.479346557759627, "grad_norm": 0.006256103515625, "learning_rate": 1.117745518362059e-05, "loss": 0.0001, "step": 18167 }, { "epoch": 8.479813302217035, "grad_norm": 0.00750732421875, "learning_rate": 1.1170715027023348e-05, "loss": 0.0001, "step": 18168 }, { "epoch": 8.480280046674446, "grad_norm": 0.00775146484375, "learning_rate": 1.1163976783019137e-05, "loss": 0.0001, "step": 18169 }, { "epoch": 8.480746791131855, "grad_norm": 0.051025390625, "learning_rate": 1.1157240451753081e-05, "loss": 0.002, "step": 18170 }, { "epoch": 8.481213535589266, "grad_norm": 0.00927734375, "learning_rate": 1.1150506033370234e-05, "loss": 0.0001, "step": 18171 }, { "epoch": 8.481680280046675, "grad_norm": 0.0107421875, "learning_rate": 1.1143773528015544e-05, "loss": 0.0001, "step": 18172 }, { "epoch": 8.482147024504084, "grad_norm": 0.00970458984375, "learning_rate": 1.1137042935834008e-05, "loss": 0.0001, "step": 18173 }, { "epoch": 8.482613768961494, "grad_norm": 0.0093994140625, "learning_rate": 1.1130314256970542e-05, "loss": 0.0001, "step": 18174 }, { "epoch": 8.483080513418903, "grad_norm": 0.00604248046875, "learning_rate": 1.1123587491569998e-05, "loss": 0.0001, "step": 18175 }, { "epoch": 8.483547257876312, "grad_norm": 0.0245361328125, "learning_rate": 1.1116862639777225e-05, "loss": 0.0002, "step": 18176 }, { "epoch": 8.484014002333723, "grad_norm": 0.06884765625, "learning_rate": 1.1110139701737033e-05, "loss": 0.0018, "step": 18177 }, { "epoch": 8.484480746791132, "grad_norm": 0.006683349609375, "learning_rate": 1.1103418677594145e-05, "loss": 0.0001, "step": 18178 }, { "epoch": 8.48494749124854, "grad_norm": 0.057861328125, "learning_rate": 1.1096699567493307e-05, "loss": 0.0018, "step": 18179 }, { "epoch": 8.485414235705951, "grad_norm": 0.0146484375, "learning_rate": 1.1089982371579134e-05, "loss": 0.0002, "step": 18180 }, { "epoch": 8.48588098016336, "grad_norm": 0.0712890625, "learning_rate": 1.1083267089996296e-05, "loss": 0.0043, "step": 18181 }, { "epoch": 8.48634772462077, "grad_norm": 0.007354736328125, "learning_rate": 1.1076553722889393e-05, "loss": 0.0001, "step": 18182 }, { "epoch": 8.48681446907818, "grad_norm": 0.00531005859375, "learning_rate": 1.106984227040293e-05, "loss": 0.0001, "step": 18183 }, { "epoch": 8.487281213535589, "grad_norm": 0.0108642578125, "learning_rate": 1.1063132732681436e-05, "loss": 0.0001, "step": 18184 }, { "epoch": 8.487747957993, "grad_norm": 0.00994873046875, "learning_rate": 1.1056425109869384e-05, "loss": 0.0002, "step": 18185 }, { "epoch": 8.488214702450408, "grad_norm": 0.04833984375, "learning_rate": 1.1049719402111169e-05, "loss": 0.0002, "step": 18186 }, { "epoch": 8.488681446907817, "grad_norm": 0.006256103515625, "learning_rate": 1.1043015609551189e-05, "loss": 0.0001, "step": 18187 }, { "epoch": 8.489148191365228, "grad_norm": 0.11572265625, "learning_rate": 1.1036313732333802e-05, "loss": 0.0003, "step": 18188 }, { "epoch": 8.489614935822637, "grad_norm": 0.05126953125, "learning_rate": 1.1029613770603264e-05, "loss": 0.0031, "step": 18189 }, { "epoch": 8.490081680280047, "grad_norm": 0.01043701171875, "learning_rate": 1.1022915724503857e-05, "loss": 0.0001, "step": 18190 }, { "epoch": 8.490548424737456, "grad_norm": 0.00677490234375, "learning_rate": 1.1016219594179811e-05, "loss": 0.0001, "step": 18191 }, { "epoch": 8.491015169194865, "grad_norm": 0.00811767578125, "learning_rate": 1.1009525379775276e-05, "loss": 0.0001, "step": 18192 }, { "epoch": 8.491481913652276, "grad_norm": 0.0106201171875, "learning_rate": 1.1002833081434394e-05, "loss": 0.0001, "step": 18193 }, { "epoch": 8.491948658109685, "grad_norm": 0.0081787109375, "learning_rate": 1.0996142699301271e-05, "loss": 0.0001, "step": 18194 }, { "epoch": 8.492415402567094, "grad_norm": 0.00714111328125, "learning_rate": 1.0989454233519958e-05, "loss": 0.0001, "step": 18195 }, { "epoch": 8.492882147024504, "grad_norm": 0.010498046875, "learning_rate": 1.098276768423443e-05, "loss": 0.0001, "step": 18196 }, { "epoch": 8.493348891481913, "grad_norm": 0.00994873046875, "learning_rate": 1.0976083051588704e-05, "loss": 0.0001, "step": 18197 }, { "epoch": 8.493815635939324, "grad_norm": 0.006561279296875, "learning_rate": 1.0969400335726654e-05, "loss": 0.0001, "step": 18198 }, { "epoch": 8.494282380396733, "grad_norm": 0.01025390625, "learning_rate": 1.0962719536792198e-05, "loss": 0.0001, "step": 18199 }, { "epoch": 8.494749124854142, "grad_norm": 0.00408935546875, "learning_rate": 1.0956040654929211e-05, "loss": 0.0001, "step": 18200 }, { "epoch": 8.495215869311552, "grad_norm": 0.007476806640625, "learning_rate": 1.0949363690281433e-05, "loss": 0.0001, "step": 18201 }, { "epoch": 8.495682613768961, "grad_norm": 0.0186767578125, "learning_rate": 1.094268864299266e-05, "loss": 0.0002, "step": 18202 }, { "epoch": 8.49614935822637, "grad_norm": 0.0048828125, "learning_rate": 1.0936015513206632e-05, "loss": 0.0001, "step": 18203 }, { "epoch": 8.496616102683781, "grad_norm": 0.0966796875, "learning_rate": 1.0929344301066991e-05, "loss": 0.0058, "step": 18204 }, { "epoch": 8.49708284714119, "grad_norm": 0.01202392578125, "learning_rate": 1.0922675006717386e-05, "loss": 0.0001, "step": 18205 }, { "epoch": 8.4975495915986, "grad_norm": 0.012451171875, "learning_rate": 1.091600763030145e-05, "loss": 0.0001, "step": 18206 }, { "epoch": 8.49801633605601, "grad_norm": 0.0189208984375, "learning_rate": 1.0909342171962688e-05, "loss": 0.0002, "step": 18207 }, { "epoch": 8.498483080513418, "grad_norm": 0.0625, "learning_rate": 1.0902678631844632e-05, "loss": 0.0027, "step": 18208 }, { "epoch": 8.498949824970829, "grad_norm": 0.006988525390625, "learning_rate": 1.0896017010090787e-05, "loss": 0.0001, "step": 18209 }, { "epoch": 8.499416569428238, "grad_norm": 0.00567626953125, "learning_rate": 1.0889357306844528e-05, "loss": 0.0001, "step": 18210 }, { "epoch": 8.499883313885647, "grad_norm": 0.01116943359375, "learning_rate": 1.0882699522249306e-05, "loss": 0.0001, "step": 18211 }, { "epoch": 8.500350058343058, "grad_norm": 0.0048828125, "learning_rate": 1.087604365644842e-05, "loss": 0.0001, "step": 18212 }, { "epoch": 8.500816802800466, "grad_norm": 0.00750732421875, "learning_rate": 1.0869389709585187e-05, "loss": 0.0001, "step": 18213 }, { "epoch": 8.501283547257877, "grad_norm": 0.0106201171875, "learning_rate": 1.0862737681802914e-05, "loss": 0.0001, "step": 18214 }, { "epoch": 8.501750291715286, "grad_norm": 0.00994873046875, "learning_rate": 1.0856087573244789e-05, "loss": 0.0001, "step": 18215 }, { "epoch": 8.502217036172695, "grad_norm": 0.005828857421875, "learning_rate": 1.0849439384053995e-05, "loss": 0.0001, "step": 18216 }, { "epoch": 8.502683780630106, "grad_norm": 0.01025390625, "learning_rate": 1.0842793114373707e-05, "loss": 0.0001, "step": 18217 }, { "epoch": 8.503150525087515, "grad_norm": 0.0106201171875, "learning_rate": 1.0836148764346976e-05, "loss": 0.0002, "step": 18218 }, { "epoch": 8.503617269544923, "grad_norm": 0.003997802734375, "learning_rate": 1.0829506334116901e-05, "loss": 0.0001, "step": 18219 }, { "epoch": 8.504084014002334, "grad_norm": 0.006805419921875, "learning_rate": 1.0822865823826511e-05, "loss": 0.0001, "step": 18220 }, { "epoch": 8.504550758459743, "grad_norm": 0.046142578125, "learning_rate": 1.0816227233618736e-05, "loss": 0.0027, "step": 18221 }, { "epoch": 8.505017502917152, "grad_norm": 0.047119140625, "learning_rate": 1.0809590563636552e-05, "loss": 0.0015, "step": 18222 }, { "epoch": 8.505484247374563, "grad_norm": 0.006591796875, "learning_rate": 1.0802955814022852e-05, "loss": 0.0001, "step": 18223 }, { "epoch": 8.505950991831972, "grad_norm": 0.0196533203125, "learning_rate": 1.0796322984920459e-05, "loss": 0.0002, "step": 18224 }, { "epoch": 8.505950991831972, "eval_loss": 2.4529030323028564, "eval_runtime": 83.7213, "eval_samples_per_second": 21.548, "eval_steps_per_second": 2.699, "step": 18224 }, { "epoch": 8.506417736289382, "grad_norm": 0.01171875, "learning_rate": 1.0789692076472202e-05, "loss": 0.0002, "step": 18225 }, { "epoch": 8.506884480746791, "grad_norm": 0.00921630859375, "learning_rate": 1.0783063088820889e-05, "loss": 0.0001, "step": 18226 }, { "epoch": 8.5073512252042, "grad_norm": 0.0113525390625, "learning_rate": 1.0776436022109182e-05, "loss": 0.0001, "step": 18227 }, { "epoch": 8.50781796966161, "grad_norm": 0.00689697265625, "learning_rate": 1.0769810876479836e-05, "loss": 0.0001, "step": 18228 }, { "epoch": 8.50828471411902, "grad_norm": 0.091796875, "learning_rate": 1.0763187652075434e-05, "loss": 0.0027, "step": 18229 }, { "epoch": 8.50875145857643, "grad_norm": 0.052001953125, "learning_rate": 1.075656634903861e-05, "loss": 0.0002, "step": 18230 }, { "epoch": 8.50921820303384, "grad_norm": 0.0037689208984375, "learning_rate": 1.0749946967511959e-05, "loss": 0.0001, "step": 18231 }, { "epoch": 8.509684947491248, "grad_norm": 0.01190185546875, "learning_rate": 1.0743329507637945e-05, "loss": 0.0002, "step": 18232 }, { "epoch": 8.510151691948659, "grad_norm": 0.053955078125, "learning_rate": 1.0736713969559086e-05, "loss": 0.0029, "step": 18233 }, { "epoch": 8.510618436406068, "grad_norm": 0.00787353515625, "learning_rate": 1.0730100353417839e-05, "loss": 0.0001, "step": 18234 }, { "epoch": 8.511085180863477, "grad_norm": 0.031982421875, "learning_rate": 1.072348865935654e-05, "loss": 0.0001, "step": 18235 }, { "epoch": 8.511551925320887, "grad_norm": 0.0172119140625, "learning_rate": 1.07168788875176e-05, "loss": 0.0002, "step": 18236 }, { "epoch": 8.512018669778296, "grad_norm": 0.0166015625, "learning_rate": 1.071027103804334e-05, "loss": 0.0002, "step": 18237 }, { "epoch": 8.512485414235705, "grad_norm": 0.00909423828125, "learning_rate": 1.0703665111075977e-05, "loss": 0.0001, "step": 18238 }, { "epoch": 8.512952158693116, "grad_norm": 0.005218505859375, "learning_rate": 1.06970611067578e-05, "loss": 0.0001, "step": 18239 }, { "epoch": 8.513418903150525, "grad_norm": 0.0089111328125, "learning_rate": 1.0690459025230992e-05, "loss": 0.0001, "step": 18240 }, { "epoch": 8.513885647607935, "grad_norm": 0.04296875, "learning_rate": 1.0683858866637663e-05, "loss": 0.0016, "step": 18241 }, { "epoch": 8.514352392065344, "grad_norm": 0.010498046875, "learning_rate": 1.0677260631119979e-05, "loss": 0.0002, "step": 18242 }, { "epoch": 8.514819136522753, "grad_norm": 0.060791015625, "learning_rate": 1.0670664318819978e-05, "loss": 0.0002, "step": 18243 }, { "epoch": 8.515285880980164, "grad_norm": 0.0634765625, "learning_rate": 1.0664069929879671e-05, "loss": 0.0015, "step": 18244 }, { "epoch": 8.515752625437573, "grad_norm": 0.006561279296875, "learning_rate": 1.0657477464441056e-05, "loss": 0.0001, "step": 18245 }, { "epoch": 8.516219369894982, "grad_norm": 0.054931640625, "learning_rate": 1.0650886922646098e-05, "loss": 0.0002, "step": 18246 }, { "epoch": 8.516686114352392, "grad_norm": 0.060791015625, "learning_rate": 1.0644298304636657e-05, "loss": 0.0018, "step": 18247 }, { "epoch": 8.517152858809801, "grad_norm": 0.0078125, "learning_rate": 1.0637711610554613e-05, "loss": 0.0002, "step": 18248 }, { "epoch": 8.517619603267212, "grad_norm": 0.0125732421875, "learning_rate": 1.0631126840541817e-05, "loss": 0.0001, "step": 18249 }, { "epoch": 8.518086347724621, "grad_norm": 0.09375, "learning_rate": 1.0624543994739988e-05, "loss": 0.0047, "step": 18250 }, { "epoch": 8.51855309218203, "grad_norm": 0.007659912109375, "learning_rate": 1.0617963073290892e-05, "loss": 0.0001, "step": 18251 }, { "epoch": 8.51901983663944, "grad_norm": 0.007171630859375, "learning_rate": 1.061138407633624e-05, "loss": 0.0001, "step": 18252 }, { "epoch": 8.51948658109685, "grad_norm": 0.00714111328125, "learning_rate": 1.0604807004017658e-05, "loss": 0.0001, "step": 18253 }, { "epoch": 8.519953325554258, "grad_norm": 0.051513671875, "learning_rate": 1.059823185647676e-05, "loss": 0.0028, "step": 18254 }, { "epoch": 8.520420070011669, "grad_norm": 0.01409912109375, "learning_rate": 1.059165863385514e-05, "loss": 0.0002, "step": 18255 }, { "epoch": 8.520886814469078, "grad_norm": 0.01226806640625, "learning_rate": 1.0585087336294287e-05, "loss": 0.0002, "step": 18256 }, { "epoch": 8.521353558926489, "grad_norm": 0.02001953125, "learning_rate": 1.0578517963935719e-05, "loss": 0.0002, "step": 18257 }, { "epoch": 8.521820303383898, "grad_norm": 0.009765625, "learning_rate": 1.0571950516920892e-05, "loss": 0.0001, "step": 18258 }, { "epoch": 8.522287047841306, "grad_norm": 0.007659912109375, "learning_rate": 1.056538499539117e-05, "loss": 0.0001, "step": 18259 }, { "epoch": 8.522753792298717, "grad_norm": 0.009521484375, "learning_rate": 1.0558821399487962e-05, "loss": 0.0001, "step": 18260 }, { "epoch": 8.523220536756126, "grad_norm": 0.01141357421875, "learning_rate": 1.0552259729352542e-05, "loss": 0.0002, "step": 18261 }, { "epoch": 8.523687281213535, "grad_norm": 0.0311279296875, "learning_rate": 1.0545699985126223e-05, "loss": 0.0003, "step": 18262 }, { "epoch": 8.524154025670946, "grad_norm": 0.06591796875, "learning_rate": 1.0539142166950245e-05, "loss": 0.0025, "step": 18263 }, { "epoch": 8.524620770128355, "grad_norm": 0.058837890625, "learning_rate": 1.0532586274965771e-05, "loss": 0.0029, "step": 18264 }, { "epoch": 8.525087514585763, "grad_norm": 0.00762939453125, "learning_rate": 1.0526032309313994e-05, "loss": 0.0001, "step": 18265 }, { "epoch": 8.525554259043174, "grad_norm": 0.06689453125, "learning_rate": 1.0519480270136029e-05, "loss": 0.0002, "step": 18266 }, { "epoch": 8.526021003500583, "grad_norm": 0.0206298828125, "learning_rate": 1.0512930157572908e-05, "loss": 0.0002, "step": 18267 }, { "epoch": 8.526487747957994, "grad_norm": 0.018310546875, "learning_rate": 1.0506381971765699e-05, "loss": 0.0002, "step": 18268 }, { "epoch": 8.526954492415403, "grad_norm": 0.052734375, "learning_rate": 1.0499835712855388e-05, "loss": 0.0023, "step": 18269 }, { "epoch": 8.527421236872812, "grad_norm": 0.009521484375, "learning_rate": 1.0493291380982906e-05, "loss": 0.0002, "step": 18270 }, { "epoch": 8.527887981330222, "grad_norm": 0.00616455078125, "learning_rate": 1.0486748976289162e-05, "loss": 0.0001, "step": 18271 }, { "epoch": 8.528354725787631, "grad_norm": 0.00531005859375, "learning_rate": 1.0480208498915056e-05, "loss": 0.0001, "step": 18272 }, { "epoch": 8.528821470245042, "grad_norm": 0.00592041015625, "learning_rate": 1.0473669949001363e-05, "loss": 0.0001, "step": 18273 }, { "epoch": 8.52928821470245, "grad_norm": 0.0194091796875, "learning_rate": 1.0467133326688894e-05, "loss": 0.0001, "step": 18274 }, { "epoch": 8.52975495915986, "grad_norm": 0.037841796875, "learning_rate": 1.046059863211839e-05, "loss": 0.001, "step": 18275 }, { "epoch": 8.53022170361727, "grad_norm": 0.0242919921875, "learning_rate": 1.045406586543053e-05, "loss": 0.0002, "step": 18276 }, { "epoch": 8.53068844807468, "grad_norm": 0.023681640625, "learning_rate": 1.0447535026765987e-05, "loss": 0.0002, "step": 18277 }, { "epoch": 8.531155192532088, "grad_norm": 0.0062255859375, "learning_rate": 1.0441006116265394e-05, "loss": 0.0001, "step": 18278 }, { "epoch": 8.531621936989499, "grad_norm": 0.037109375, "learning_rate": 1.0434479134069285e-05, "loss": 0.0023, "step": 18279 }, { "epoch": 8.532088681446908, "grad_norm": 0.046630859375, "learning_rate": 1.0427954080318236e-05, "loss": 0.0024, "step": 18280 }, { "epoch": 8.532555425904317, "grad_norm": 0.01220703125, "learning_rate": 1.04214309551527e-05, "loss": 0.0002, "step": 18281 }, { "epoch": 8.533022170361727, "grad_norm": 0.00946044921875, "learning_rate": 1.0414909758713142e-05, "loss": 0.0001, "step": 18282 }, { "epoch": 8.533488914819136, "grad_norm": 0.013916015625, "learning_rate": 1.0408390491139985e-05, "loss": 0.0001, "step": 18283 }, { "epoch": 8.533955659276547, "grad_norm": 0.0155029296875, "learning_rate": 1.0401873152573572e-05, "loss": 0.0002, "step": 18284 }, { "epoch": 8.534422403733956, "grad_norm": 0.01544189453125, "learning_rate": 1.0395357743154244e-05, "loss": 0.0001, "step": 18285 }, { "epoch": 8.534889148191365, "grad_norm": 0.013916015625, "learning_rate": 1.0388844263022302e-05, "loss": 0.0002, "step": 18286 }, { "epoch": 8.535355892648775, "grad_norm": 0.00897216796875, "learning_rate": 1.0382332712317944e-05, "loss": 0.0001, "step": 18287 }, { "epoch": 8.535822637106184, "grad_norm": 0.0089111328125, "learning_rate": 1.0375823091181392e-05, "loss": 0.0001, "step": 18288 }, { "epoch": 8.536289381563593, "grad_norm": 0.004913330078125, "learning_rate": 1.0369315399752832e-05, "loss": 0.0001, "step": 18289 }, { "epoch": 8.536756126021004, "grad_norm": 0.00628662109375, "learning_rate": 1.0362809638172366e-05, "loss": 0.0002, "step": 18290 }, { "epoch": 8.537222870478413, "grad_norm": 0.029052734375, "learning_rate": 1.0356305806580036e-05, "loss": 0.0002, "step": 18291 }, { "epoch": 8.537689614935823, "grad_norm": 0.007720947265625, "learning_rate": 1.0349803905115918e-05, "loss": 0.0001, "step": 18292 }, { "epoch": 8.538156359393232, "grad_norm": 0.02880859375, "learning_rate": 1.0343303933919979e-05, "loss": 0.0003, "step": 18293 }, { "epoch": 8.538623103850641, "grad_norm": 0.015625, "learning_rate": 1.0336805893132173e-05, "loss": 0.0002, "step": 18294 }, { "epoch": 8.539089848308052, "grad_norm": 0.0157470703125, "learning_rate": 1.0330309782892444e-05, "loss": 0.0001, "step": 18295 }, { "epoch": 8.53955659276546, "grad_norm": 0.01043701171875, "learning_rate": 1.0323815603340625e-05, "loss": 0.0001, "step": 18296 }, { "epoch": 8.54002333722287, "grad_norm": 0.0113525390625, "learning_rate": 1.0317323354616537e-05, "loss": 0.0001, "step": 18297 }, { "epoch": 8.54049008168028, "grad_norm": 0.00732421875, "learning_rate": 1.0310833036860012e-05, "loss": 0.0001, "step": 18298 }, { "epoch": 8.54095682613769, "grad_norm": 0.011962890625, "learning_rate": 1.030434465021074e-05, "loss": 0.0001, "step": 18299 }, { "epoch": 8.5414235705951, "grad_norm": 0.0106201171875, "learning_rate": 1.0297858194808452e-05, "loss": 0.0001, "step": 18300 }, { "epoch": 8.541890315052509, "grad_norm": 0.010498046875, "learning_rate": 1.0291373670792815e-05, "loss": 0.0001, "step": 18301 }, { "epoch": 8.542357059509918, "grad_norm": 0.014404296875, "learning_rate": 1.0284891078303426e-05, "loss": 0.0001, "step": 18302 }, { "epoch": 8.542823803967329, "grad_norm": 0.0311279296875, "learning_rate": 1.0278410417479867e-05, "loss": 0.0002, "step": 18303 }, { "epoch": 8.543290548424737, "grad_norm": 0.0186767578125, "learning_rate": 1.02719316884617e-05, "loss": 0.0002, "step": 18304 }, { "epoch": 8.543757292882146, "grad_norm": 0.0191650390625, "learning_rate": 1.0265454891388383e-05, "loss": 0.0002, "step": 18305 }, { "epoch": 8.544224037339557, "grad_norm": 0.0264892578125, "learning_rate": 1.0258980026399379e-05, "loss": 0.0002, "step": 18306 }, { "epoch": 8.544690781796966, "grad_norm": 0.0286865234375, "learning_rate": 1.0252507093634123e-05, "loss": 0.002, "step": 18307 }, { "epoch": 8.545157526254375, "grad_norm": 0.01055908203125, "learning_rate": 1.0246036093231947e-05, "loss": 0.0001, "step": 18308 }, { "epoch": 8.545624270711786, "grad_norm": 0.01434326171875, "learning_rate": 1.023956702533222e-05, "loss": 0.0002, "step": 18309 }, { "epoch": 8.546091015169194, "grad_norm": 0.00811767578125, "learning_rate": 1.0233099890074182e-05, "loss": 0.0001, "step": 18310 }, { "epoch": 8.546557759626605, "grad_norm": 0.0045166015625, "learning_rate": 1.0226634687597114e-05, "loss": 0.0001, "step": 18311 }, { "epoch": 8.547024504084014, "grad_norm": 0.06982421875, "learning_rate": 1.0220171418040214e-05, "loss": 0.0023, "step": 18312 }, { "epoch": 8.547491248541423, "grad_norm": 0.00811767578125, "learning_rate": 1.0213710081542616e-05, "loss": 0.0001, "step": 18313 }, { "epoch": 8.547957992998834, "grad_norm": 0.00579833984375, "learning_rate": 1.0207250678243463e-05, "loss": 0.0001, "step": 18314 }, { "epoch": 8.548424737456243, "grad_norm": 0.0081787109375, "learning_rate": 1.0200793208281844e-05, "loss": 0.0002, "step": 18315 }, { "epoch": 8.548891481913651, "grad_norm": 0.126953125, "learning_rate": 1.0194337671796772e-05, "loss": 0.0017, "step": 18316 }, { "epoch": 8.549358226371062, "grad_norm": 0.0086669921875, "learning_rate": 1.0187884068927244e-05, "loss": 0.0001, "step": 18317 }, { "epoch": 8.549824970828471, "grad_norm": 0.01336669921875, "learning_rate": 1.0181432399812241e-05, "loss": 0.0002, "step": 18318 }, { "epoch": 8.550291715285882, "grad_norm": 0.01263427734375, "learning_rate": 1.0174982664590638e-05, "loss": 0.0002, "step": 18319 }, { "epoch": 8.55075845974329, "grad_norm": 0.030029296875, "learning_rate": 1.0168534863401314e-05, "loss": 0.0025, "step": 18320 }, { "epoch": 8.5512252042007, "grad_norm": 0.0634765625, "learning_rate": 1.0162088996383124e-05, "loss": 0.0032, "step": 18321 }, { "epoch": 8.55169194865811, "grad_norm": 0.005340576171875, "learning_rate": 1.0155645063674824e-05, "loss": 0.0001, "step": 18322 }, { "epoch": 8.55215869311552, "grad_norm": 0.04150390625, "learning_rate": 1.0149203065415158e-05, "loss": 0.0002, "step": 18323 }, { "epoch": 8.552625437572928, "grad_norm": 0.08984375, "learning_rate": 1.0142763001742873e-05, "loss": 0.0051, "step": 18324 }, { "epoch": 8.553092182030339, "grad_norm": 0.0888671875, "learning_rate": 1.0136324872796576e-05, "loss": 0.0044, "step": 18325 }, { "epoch": 8.553558926487748, "grad_norm": 0.00799560546875, "learning_rate": 1.0129888678714905e-05, "loss": 0.0002, "step": 18326 }, { "epoch": 8.554025670945158, "grad_norm": 0.0167236328125, "learning_rate": 1.0123454419636458e-05, "loss": 0.0002, "step": 18327 }, { "epoch": 8.554492415402567, "grad_norm": 0.0157470703125, "learning_rate": 1.0117022095699746e-05, "loss": 0.0001, "step": 18328 }, { "epoch": 8.554959159859976, "grad_norm": 0.006866455078125, "learning_rate": 1.0110591707043293e-05, "loss": 0.0002, "step": 18329 }, { "epoch": 8.555425904317387, "grad_norm": 0.0107421875, "learning_rate": 1.0104163253805521e-05, "loss": 0.0001, "step": 18330 }, { "epoch": 8.555892648774796, "grad_norm": 0.00848388671875, "learning_rate": 1.0097736736124852e-05, "loss": 0.0001, "step": 18331 }, { "epoch": 8.556359393232205, "grad_norm": 0.0125732421875, "learning_rate": 1.0091312154139677e-05, "loss": 0.0001, "step": 18332 }, { "epoch": 8.556826137689615, "grad_norm": 0.006988525390625, "learning_rate": 1.0084889507988282e-05, "loss": 0.0001, "step": 18333 }, { "epoch": 8.557292882147024, "grad_norm": 0.0380859375, "learning_rate": 1.0078468797808982e-05, "loss": 0.0017, "step": 18334 }, { "epoch": 8.557759626604435, "grad_norm": 0.0162353515625, "learning_rate": 1.0072050023740043e-05, "loss": 0.0001, "step": 18335 }, { "epoch": 8.558226371061844, "grad_norm": 0.00634765625, "learning_rate": 1.0065633185919621e-05, "loss": 0.0001, "step": 18336 }, { "epoch": 8.558693115519253, "grad_norm": 0.00616455078125, "learning_rate": 1.0059218284485895e-05, "loss": 0.0001, "step": 18337 }, { "epoch": 8.559159859976663, "grad_norm": 0.013916015625, "learning_rate": 1.0052805319577018e-05, "loss": 0.0001, "step": 18338 }, { "epoch": 8.559626604434072, "grad_norm": 0.00872802734375, "learning_rate": 1.0046394291331029e-05, "loss": 0.0001, "step": 18339 }, { "epoch": 8.560093348891481, "grad_norm": 0.005126953125, "learning_rate": 1.003998519988597e-05, "loss": 0.0001, "step": 18340 }, { "epoch": 8.560560093348892, "grad_norm": 0.00885009765625, "learning_rate": 1.0033578045379854e-05, "loss": 0.0001, "step": 18341 }, { "epoch": 8.5610268378063, "grad_norm": 0.005584716796875, "learning_rate": 1.0027172827950604e-05, "loss": 0.0001, "step": 18342 }, { "epoch": 8.561493582263711, "grad_norm": 0.00653076171875, "learning_rate": 1.0020769547736153e-05, "loss": 0.0001, "step": 18343 }, { "epoch": 8.56196032672112, "grad_norm": 0.060302734375, "learning_rate": 1.001436820487438e-05, "loss": 0.0013, "step": 18344 }, { "epoch": 8.56242707117853, "grad_norm": 0.0107421875, "learning_rate": 1.0007968799503086e-05, "loss": 0.0001, "step": 18345 }, { "epoch": 8.56289381563594, "grad_norm": 0.00787353515625, "learning_rate": 1.0001571331760073e-05, "loss": 0.0001, "step": 18346 }, { "epoch": 8.563360560093349, "grad_norm": 0.0107421875, "learning_rate": 9.995175801783108e-06, "loss": 0.0001, "step": 18347 }, { "epoch": 8.563827304550758, "grad_norm": 0.007110595703125, "learning_rate": 9.988782209709846e-06, "loss": 0.0002, "step": 18348 }, { "epoch": 8.564294049008168, "grad_norm": 0.050537109375, "learning_rate": 9.982390555677968e-06, "loss": 0.0012, "step": 18349 }, { "epoch": 8.564760793465577, "grad_norm": 0.09716796875, "learning_rate": 9.976000839825118e-06, "loss": 0.0038, "step": 18350 }, { "epoch": 8.565227537922986, "grad_norm": 0.07958984375, "learning_rate": 9.96961306228883e-06, "loss": 0.0023, "step": 18351 }, { "epoch": 8.565694282380397, "grad_norm": 0.025146484375, "learning_rate": 9.963227223206672e-06, "loss": 0.0001, "step": 18352 }, { "epoch": 8.566161026837806, "grad_norm": 0.00616455078125, "learning_rate": 9.956843322716148e-06, "loss": 0.0001, "step": 18353 }, { "epoch": 8.566627771295217, "grad_norm": 0.03857421875, "learning_rate": 9.950461360954667e-06, "loss": 0.0022, "step": 18354 }, { "epoch": 8.567094515752625, "grad_norm": 0.01055908203125, "learning_rate": 9.944081338059663e-06, "loss": 0.0002, "step": 18355 }, { "epoch": 8.567561260210034, "grad_norm": 0.01519775390625, "learning_rate": 9.93770325416854e-06, "loss": 0.0002, "step": 18356 }, { "epoch": 8.568028004667445, "grad_norm": 0.007598876953125, "learning_rate": 9.931327109418554e-06, "loss": 0.0001, "step": 18357 }, { "epoch": 8.568494749124854, "grad_norm": 0.007415771484375, "learning_rate": 9.92495290394706e-06, "loss": 0.0001, "step": 18358 }, { "epoch": 8.568961493582263, "grad_norm": 0.0286865234375, "learning_rate": 9.91858063789125e-06, "loss": 0.0002, "step": 18359 }, { "epoch": 8.569428238039674, "grad_norm": 0.005218505859375, "learning_rate": 9.912210311388336e-06, "loss": 0.0001, "step": 18360 }, { "epoch": 8.569894982497082, "grad_norm": 0.0888671875, "learning_rate": 9.90584192457552e-06, "loss": 0.005, "step": 18361 }, { "epoch": 8.570361726954493, "grad_norm": 0.00726318359375, "learning_rate": 9.899475477589858e-06, "loss": 0.0001, "step": 18362 }, { "epoch": 8.570828471411902, "grad_norm": 0.04248046875, "learning_rate": 9.893110970568465e-06, "loss": 0.0014, "step": 18363 }, { "epoch": 8.571295215869311, "grad_norm": 0.0208740234375, "learning_rate": 9.886748403648383e-06, "loss": 0.0004, "step": 18364 }, { "epoch": 8.571761960326722, "grad_norm": 0.00537109375, "learning_rate": 9.880387776966582e-06, "loss": 0.0001, "step": 18365 }, { "epoch": 8.57222870478413, "grad_norm": 0.05810546875, "learning_rate": 9.874029090660008e-06, "loss": 0.0031, "step": 18366 }, { "epoch": 8.57269544924154, "grad_norm": 0.0159912109375, "learning_rate": 9.867672344865608e-06, "loss": 0.0001, "step": 18367 }, { "epoch": 8.57316219369895, "grad_norm": 0.0517578125, "learning_rate": 9.861317539720205e-06, "loss": 0.0034, "step": 18368 }, { "epoch": 8.573628938156359, "grad_norm": 0.00714111328125, "learning_rate": 9.854964675360646e-06, "loss": 0.0001, "step": 18369 }, { "epoch": 8.57409568261377, "grad_norm": 0.007080078125, "learning_rate": 9.84861375192374e-06, "loss": 0.0001, "step": 18370 }, { "epoch": 8.574562427071179, "grad_norm": 0.05859375, "learning_rate": 9.842264769546184e-06, "loss": 0.0027, "step": 18371 }, { "epoch": 8.575029171528588, "grad_norm": 0.007171630859375, "learning_rate": 9.835917728364697e-06, "loss": 0.0001, "step": 18372 }, { "epoch": 8.575495915985998, "grad_norm": 0.005889892578125, "learning_rate": 9.82957262851596e-06, "loss": 0.0002, "step": 18373 }, { "epoch": 8.575962660443407, "grad_norm": 0.01483154296875, "learning_rate": 9.82322947013654e-06, "loss": 0.0001, "step": 18374 }, { "epoch": 8.576429404900816, "grad_norm": 0.042724609375, "learning_rate": 9.816888253363055e-06, "loss": 0.0016, "step": 18375 }, { "epoch": 8.576896149358227, "grad_norm": 0.058837890625, "learning_rate": 9.810548978332046e-06, "loss": 0.0029, "step": 18376 }, { "epoch": 8.577362893815636, "grad_norm": 0.004913330078125, "learning_rate": 9.804211645179961e-06, "loss": 0.0001, "step": 18377 }, { "epoch": 8.577829638273045, "grad_norm": 0.01263427734375, "learning_rate": 9.797876254043293e-06, "loss": 0.0001, "step": 18378 }, { "epoch": 8.578296382730455, "grad_norm": 0.04931640625, "learning_rate": 9.791542805058419e-06, "loss": 0.0028, "step": 18379 }, { "epoch": 8.578763127187864, "grad_norm": 0.044921875, "learning_rate": 9.785211298361707e-06, "loss": 0.0002, "step": 18380 }, { "epoch": 8.579229871645275, "grad_norm": 0.0189208984375, "learning_rate": 9.778881734089507e-06, "loss": 0.0002, "step": 18381 }, { "epoch": 8.579696616102684, "grad_norm": 0.005584716796875, "learning_rate": 9.772554112378075e-06, "loss": 0.0001, "step": 18382 }, { "epoch": 8.580163360560093, "grad_norm": 0.01129150390625, "learning_rate": 9.766228433363655e-06, "loss": 0.0001, "step": 18383 }, { "epoch": 8.580630105017503, "grad_norm": 0.0177001953125, "learning_rate": 9.759904697182475e-06, "loss": 0.0002, "step": 18384 }, { "epoch": 8.581096849474912, "grad_norm": 0.01416015625, "learning_rate": 9.753582903970648e-06, "loss": 0.0001, "step": 18385 }, { "epoch": 8.581563593932323, "grad_norm": 0.1044921875, "learning_rate": 9.74726305386433e-06, "loss": 0.0042, "step": 18386 }, { "epoch": 8.582030338389732, "grad_norm": 0.0927734375, "learning_rate": 9.740945146999558e-06, "loss": 0.0046, "step": 18387 }, { "epoch": 8.58249708284714, "grad_norm": 0.0068359375, "learning_rate": 9.734629183512411e-06, "loss": 0.0001, "step": 18388 }, { "epoch": 8.582963827304551, "grad_norm": 0.01458740234375, "learning_rate": 9.728315163538815e-06, "loss": 0.0001, "step": 18389 }, { "epoch": 8.58343057176196, "grad_norm": 0.01611328125, "learning_rate": 9.722003087214782e-06, "loss": 0.0002, "step": 18390 }, { "epoch": 8.58389731621937, "grad_norm": 0.0198974609375, "learning_rate": 9.715692954676158e-06, "loss": 0.0001, "step": 18391 }, { "epoch": 8.58436406067678, "grad_norm": 0.04736328125, "learning_rate": 9.709384766058837e-06, "loss": 0.0006, "step": 18392 }, { "epoch": 8.584830805134189, "grad_norm": 0.0123291015625, "learning_rate": 9.703078521498665e-06, "loss": 0.0001, "step": 18393 }, { "epoch": 8.585297549591598, "grad_norm": 0.009765625, "learning_rate": 9.696774221131389e-06, "loss": 0.0001, "step": 18394 }, { "epoch": 8.585764294049008, "grad_norm": 0.0057373046875, "learning_rate": 9.690471865092742e-06, "loss": 0.0001, "step": 18395 }, { "epoch": 8.586231038506417, "grad_norm": 0.017822265625, "learning_rate": 9.684171453518464e-06, "loss": 0.0002, "step": 18396 }, { "epoch": 8.586697782963828, "grad_norm": 0.01531982421875, "learning_rate": 9.677872986544157e-06, "loss": 0.0002, "step": 18397 }, { "epoch": 8.587164527421237, "grad_norm": 0.1640625, "learning_rate": 9.671576464305466e-06, "loss": 0.0004, "step": 18398 }, { "epoch": 8.587631271878646, "grad_norm": 0.0059814453125, "learning_rate": 9.66528188693796e-06, "loss": 0.0001, "step": 18399 }, { "epoch": 8.588098016336057, "grad_norm": 0.004852294921875, "learning_rate": 9.658989254577155e-06, "loss": 0.0001, "step": 18400 }, { "epoch": 8.588564760793465, "grad_norm": 0.01513671875, "learning_rate": 9.652698567358543e-06, "loss": 0.0001, "step": 18401 }, { "epoch": 8.589031505250874, "grad_norm": 0.015380859375, "learning_rate": 9.6464098254176e-06, "loss": 0.0002, "step": 18402 }, { "epoch": 8.589498249708285, "grad_norm": 0.0093994140625, "learning_rate": 9.640123028889669e-06, "loss": 0.0001, "step": 18403 }, { "epoch": 8.589964994165694, "grad_norm": 0.00909423828125, "learning_rate": 9.63383817791016e-06, "loss": 0.0001, "step": 18404 }, { "epoch": 8.590431738623105, "grad_norm": 0.00592041015625, "learning_rate": 9.627555272614386e-06, "loss": 0.0001, "step": 18405 }, { "epoch": 8.590898483080514, "grad_norm": 0.060546875, "learning_rate": 9.621274313137607e-06, "loss": 0.0026, "step": 18406 }, { "epoch": 8.591365227537922, "grad_norm": 0.0634765625, "learning_rate": 9.614995299615081e-06, "loss": 0.0037, "step": 18407 }, { "epoch": 8.591831971995333, "grad_norm": 0.01104736328125, "learning_rate": 9.608718232181967e-06, "loss": 0.0002, "step": 18408 }, { "epoch": 8.592298716452742, "grad_norm": 0.054931640625, "learning_rate": 9.602443110973446e-06, "loss": 0.0011, "step": 18409 }, { "epoch": 8.592765460910151, "grad_norm": 0.01409912109375, "learning_rate": 9.596169936124643e-06, "loss": 0.0002, "step": 18410 }, { "epoch": 8.593232205367562, "grad_norm": 0.010498046875, "learning_rate": 9.589898707770594e-06, "loss": 0.0002, "step": 18411 }, { "epoch": 8.59369894982497, "grad_norm": 0.0289306640625, "learning_rate": 9.583629426046326e-06, "loss": 0.0004, "step": 18412 }, { "epoch": 8.594165694282381, "grad_norm": 0.006927490234375, "learning_rate": 9.57736209108686e-06, "loss": 0.0002, "step": 18413 }, { "epoch": 8.59463243873979, "grad_norm": 0.064453125, "learning_rate": 9.571096703027093e-06, "loss": 0.0026, "step": 18414 }, { "epoch": 8.595099183197199, "grad_norm": 0.0054931640625, "learning_rate": 9.564833262001948e-06, "loss": 0.0001, "step": 18415 }, { "epoch": 8.59556592765461, "grad_norm": 0.01080322265625, "learning_rate": 9.558571768146296e-06, "loss": 0.0002, "step": 18416 }, { "epoch": 8.596032672112019, "grad_norm": 0.01129150390625, "learning_rate": 9.552312221594928e-06, "loss": 0.0001, "step": 18417 }, { "epoch": 8.596499416569428, "grad_norm": 0.0286865234375, "learning_rate": 9.546054622482625e-06, "loss": 0.0002, "step": 18418 }, { "epoch": 8.596966161026838, "grad_norm": 0.03173828125, "learning_rate": 9.539798970944148e-06, "loss": 0.0014, "step": 18419 }, { "epoch": 8.597432905484247, "grad_norm": 0.0076904296875, "learning_rate": 9.533545267114152e-06, "loss": 0.0001, "step": 18420 }, { "epoch": 8.597899649941656, "grad_norm": 0.005157470703125, "learning_rate": 9.527293511127289e-06, "loss": 0.0001, "step": 18421 }, { "epoch": 8.598366394399067, "grad_norm": 0.0242919921875, "learning_rate": 9.521043703118204e-06, "loss": 0.0003, "step": 18422 }, { "epoch": 8.598833138856476, "grad_norm": 0.040771484375, "learning_rate": 9.514795843221425e-06, "loss": 0.0003, "step": 18423 }, { "epoch": 8.599299883313886, "grad_norm": 0.03955078125, "learning_rate": 9.508549931571475e-06, "loss": 0.0026, "step": 18424 }, { "epoch": 8.599766627771295, "grad_norm": 0.08837890625, "learning_rate": 9.502305968302872e-06, "loss": 0.0049, "step": 18425 }, { "epoch": 8.600233372228704, "grad_norm": 0.0096435546875, "learning_rate": 9.496063953550004e-06, "loss": 0.0001, "step": 18426 }, { "epoch": 8.600700116686115, "grad_norm": 0.0135498046875, "learning_rate": 9.489823887447325e-06, "loss": 0.0002, "step": 18427 }, { "epoch": 8.601166861143524, "grad_norm": 0.06689453125, "learning_rate": 9.483585770129133e-06, "loss": 0.0018, "step": 18428 }, { "epoch": 8.601633605600934, "grad_norm": 0.0157470703125, "learning_rate": 9.477349601729779e-06, "loss": 0.0001, "step": 18429 }, { "epoch": 8.602100350058343, "grad_norm": 0.01361083984375, "learning_rate": 9.471115382383533e-06, "loss": 0.0001, "step": 18430 }, { "epoch": 8.602567094515752, "grad_norm": 0.004913330078125, "learning_rate": 9.464883112224598e-06, "loss": 0.0001, "step": 18431 }, { "epoch": 8.603033838973163, "grad_norm": 0.013916015625, "learning_rate": 9.45865279138719e-06, "loss": 0.0001, "step": 18432 }, { "epoch": 8.603500583430572, "grad_norm": 0.1025390625, "learning_rate": 9.452424420005456e-06, "loss": 0.003, "step": 18433 }, { "epoch": 8.60396732788798, "grad_norm": 0.00555419921875, "learning_rate": 9.446197998213491e-06, "loss": 0.0001, "step": 18434 }, { "epoch": 8.604434072345391, "grad_norm": 0.01470947265625, "learning_rate": 9.439973526145329e-06, "loss": 0.0001, "step": 18435 }, { "epoch": 8.6049008168028, "grad_norm": 0.006439208984375, "learning_rate": 9.43375100393502e-06, "loss": 0.0002, "step": 18436 }, { "epoch": 8.60536756126021, "grad_norm": 0.013427734375, "learning_rate": 9.427530431716558e-06, "loss": 0.0001, "step": 18437 }, { "epoch": 8.60583430571762, "grad_norm": 0.0078125, "learning_rate": 9.421311809623835e-06, "loss": 0.0001, "step": 18438 }, { "epoch": 8.606301050175029, "grad_norm": 0.017578125, "learning_rate": 9.415095137790785e-06, "loss": 0.0002, "step": 18439 }, { "epoch": 8.60676779463244, "grad_norm": 0.00689697265625, "learning_rate": 9.40888041635123e-06, "loss": 0.0001, "step": 18440 }, { "epoch": 8.607234539089848, "grad_norm": 0.00909423828125, "learning_rate": 9.40266764543899e-06, "loss": 0.0001, "step": 18441 }, { "epoch": 8.607701283547257, "grad_norm": 0.0615234375, "learning_rate": 9.39645682518785e-06, "loss": 0.0031, "step": 18442 }, { "epoch": 8.608168028004668, "grad_norm": 0.0242919921875, "learning_rate": 9.390247955731512e-06, "loss": 0.0002, "step": 18443 }, { "epoch": 8.608634772462077, "grad_norm": 0.01123046875, "learning_rate": 9.384041037203661e-06, "loss": 0.0001, "step": 18444 }, { "epoch": 8.609101516919486, "grad_norm": 0.007354736328125, "learning_rate": 9.377836069737966e-06, "loss": 0.0001, "step": 18445 }, { "epoch": 8.609568261376896, "grad_norm": 0.0162353515625, "learning_rate": 9.371633053467988e-06, "loss": 0.0002, "step": 18446 }, { "epoch": 8.610035005834305, "grad_norm": 0.0272216796875, "learning_rate": 9.365431988527307e-06, "loss": 0.0002, "step": 18447 }, { "epoch": 8.610501750291716, "grad_norm": 0.00811767578125, "learning_rate": 9.35923287504945e-06, "loss": 0.0002, "step": 18448 }, { "epoch": 8.610968494749125, "grad_norm": 0.006591796875, "learning_rate": 9.353035713167858e-06, "loss": 0.0001, "step": 18449 }, { "epoch": 8.611435239206534, "grad_norm": 0.009033203125, "learning_rate": 9.346840503015985e-06, "loss": 0.0002, "step": 18450 }, { "epoch": 8.611901983663945, "grad_norm": 0.0301513671875, "learning_rate": 9.340647244727241e-06, "loss": 0.0002, "step": 18451 }, { "epoch": 8.612368728121353, "grad_norm": 0.032958984375, "learning_rate": 9.334455938434916e-06, "loss": 0.0002, "step": 18452 }, { "epoch": 8.612835472578762, "grad_norm": 0.01116943359375, "learning_rate": 9.32826658427236e-06, "loss": 0.0002, "step": 18453 }, { "epoch": 8.613302217036173, "grad_norm": 0.06591796875, "learning_rate": 9.322079182372845e-06, "loss": 0.006, "step": 18454 }, { "epoch": 8.613768961493582, "grad_norm": 0.007781982421875, "learning_rate": 9.315893732869563e-06, "loss": 0.0001, "step": 18455 }, { "epoch": 8.614235705950993, "grad_norm": 0.0123291015625, "learning_rate": 9.309710235895697e-06, "loss": 0.0001, "step": 18456 }, { "epoch": 8.614702450408402, "grad_norm": 0.0296630859375, "learning_rate": 9.303528691584407e-06, "loss": 0.0002, "step": 18457 }, { "epoch": 8.61516919486581, "grad_norm": 0.005706787109375, "learning_rate": 9.297349100068764e-06, "loss": 0.0001, "step": 18458 }, { "epoch": 8.615635939323221, "grad_norm": 0.0079345703125, "learning_rate": 9.29117146148184e-06, "loss": 0.0001, "step": 18459 }, { "epoch": 8.61610268378063, "grad_norm": 0.0130615234375, "learning_rate": 9.284995775956628e-06, "loss": 0.0002, "step": 18460 }, { "epoch": 8.616569428238039, "grad_norm": 0.00689697265625, "learning_rate": 9.2788220436261e-06, "loss": 0.0001, "step": 18461 }, { "epoch": 8.61703617269545, "grad_norm": 0.0042724609375, "learning_rate": 9.272650264623206e-06, "loss": 0.0001, "step": 18462 }, { "epoch": 8.617502917152859, "grad_norm": 0.007354736328125, "learning_rate": 9.266480439080804e-06, "loss": 0.0002, "step": 18463 }, { "epoch": 8.617969661610267, "grad_norm": 0.00726318359375, "learning_rate": 9.260312567131746e-06, "loss": 0.0001, "step": 18464 }, { "epoch": 8.618436406067678, "grad_norm": 0.068359375, "learning_rate": 9.254146648908857e-06, "loss": 0.0042, "step": 18465 }, { "epoch": 8.618903150525087, "grad_norm": 0.006011962890625, "learning_rate": 9.247982684544854e-06, "loss": 0.0001, "step": 18466 }, { "epoch": 8.619369894982498, "grad_norm": 0.007049560546875, "learning_rate": 9.241820674172486e-06, "loss": 0.0002, "step": 18467 }, { "epoch": 8.619836639439907, "grad_norm": 0.018798828125, "learning_rate": 9.235660617924424e-06, "loss": 0.0002, "step": 18468 }, { "epoch": 8.620303383897316, "grad_norm": 0.00811767578125, "learning_rate": 9.229502515933275e-06, "loss": 0.0001, "step": 18469 }, { "epoch": 8.620770128354726, "grad_norm": 0.0133056640625, "learning_rate": 9.223346368331643e-06, "loss": 0.0001, "step": 18470 }, { "epoch": 8.621236872812135, "grad_norm": 0.012451171875, "learning_rate": 9.21719217525211e-06, "loss": 0.0001, "step": 18471 }, { "epoch": 8.621703617269546, "grad_norm": 0.01165771484375, "learning_rate": 9.211039936827137e-06, "loss": 0.0002, "step": 18472 }, { "epoch": 8.622170361726955, "grad_norm": 0.00897216796875, "learning_rate": 9.204889653189197e-06, "loss": 0.0001, "step": 18473 }, { "epoch": 8.622637106184364, "grad_norm": 0.02294921875, "learning_rate": 9.19874132447075e-06, "loss": 0.0005, "step": 18474 }, { "epoch": 8.623103850641774, "grad_norm": 0.0166015625, "learning_rate": 9.192594950804124e-06, "loss": 0.0002, "step": 18475 }, { "epoch": 8.623570595099183, "grad_norm": 0.05908203125, "learning_rate": 9.1864505323217e-06, "loss": 0.0018, "step": 18476 }, { "epoch": 8.624037339556592, "grad_norm": 0.0126953125, "learning_rate": 9.18030806915574e-06, "loss": 0.0002, "step": 18477 }, { "epoch": 8.624504084014003, "grad_norm": 0.0167236328125, "learning_rate": 9.174167561438519e-06, "loss": 0.0002, "step": 18478 }, { "epoch": 8.624970828471412, "grad_norm": 0.01507568359375, "learning_rate": 9.168029009302248e-06, "loss": 0.0002, "step": 18479 }, { "epoch": 8.62543757292882, "grad_norm": 0.006072998046875, "learning_rate": 9.16189241287908e-06, "loss": 0.0001, "step": 18480 }, { "epoch": 8.625904317386231, "grad_norm": 0.007293701171875, "learning_rate": 9.155757772301177e-06, "loss": 0.0001, "step": 18481 }, { "epoch": 8.62637106184364, "grad_norm": 0.034423828125, "learning_rate": 9.149625087700587e-06, "loss": 0.0002, "step": 18482 }, { "epoch": 8.626837806301051, "grad_norm": 0.005767822265625, "learning_rate": 9.143494359209382e-06, "loss": 0.0001, "step": 18483 }, { "epoch": 8.62730455075846, "grad_norm": 0.014404296875, "learning_rate": 9.137365586959523e-06, "loss": 0.0002, "step": 18484 }, { "epoch": 8.627771295215869, "grad_norm": 0.005950927734375, "learning_rate": 9.131238771083007e-06, "loss": 0.0001, "step": 18485 }, { "epoch": 8.62823803967328, "grad_norm": 0.00628662109375, "learning_rate": 9.125113911711759e-06, "loss": 0.0001, "step": 18486 }, { "epoch": 8.628704784130688, "grad_norm": 0.01123046875, "learning_rate": 9.118991008977618e-06, "loss": 0.0001, "step": 18487 }, { "epoch": 8.629171528588097, "grad_norm": 0.008544921875, "learning_rate": 9.112870063012446e-06, "loss": 0.0001, "step": 18488 }, { "epoch": 8.629638273045508, "grad_norm": 0.017333984375, "learning_rate": 9.106751073948016e-06, "loss": 0.0001, "step": 18489 }, { "epoch": 8.630105017502917, "grad_norm": 0.0576171875, "learning_rate": 9.100634041916078e-06, "loss": 0.0003, "step": 18490 }, { "epoch": 8.630571761960327, "grad_norm": 0.02197265625, "learning_rate": 9.09451896704836e-06, "loss": 0.0001, "step": 18491 }, { "epoch": 8.631038506417736, "grad_norm": 0.012939453125, "learning_rate": 9.08840584947649e-06, "loss": 0.0002, "step": 18492 }, { "epoch": 8.631505250875145, "grad_norm": 0.0263671875, "learning_rate": 9.082294689332116e-06, "loss": 0.0002, "step": 18493 }, { "epoch": 8.631971995332556, "grad_norm": 0.020263671875, "learning_rate": 9.076185486746824e-06, "loss": 0.0002, "step": 18494 }, { "epoch": 8.632438739789965, "grad_norm": 0.0086669921875, "learning_rate": 9.070078241852131e-06, "loss": 0.0002, "step": 18495 }, { "epoch": 8.632905484247374, "grad_norm": 0.035400390625, "learning_rate": 9.063972954779531e-06, "loss": 0.0017, "step": 18496 }, { "epoch": 8.633372228704784, "grad_norm": 0.007080078125, "learning_rate": 9.057869625660509e-06, "loss": 0.0001, "step": 18497 }, { "epoch": 8.633838973162193, "grad_norm": 0.01190185546875, "learning_rate": 9.051768254626447e-06, "loss": 0.0001, "step": 18498 }, { "epoch": 8.634305717619604, "grad_norm": 0.005828857421875, "learning_rate": 9.045668841808719e-06, "loss": 0.0001, "step": 18499 }, { "epoch": 8.634772462077013, "grad_norm": 0.0108642578125, "learning_rate": 9.039571387338663e-06, "loss": 0.0001, "step": 18500 }, { "epoch": 8.635239206534422, "grad_norm": 0.10693359375, "learning_rate": 9.033475891347554e-06, "loss": 0.0006, "step": 18501 }, { "epoch": 8.635705950991833, "grad_norm": 0.07080078125, "learning_rate": 9.027382353966629e-06, "loss": 0.0017, "step": 18502 }, { "epoch": 8.636172695449241, "grad_norm": 0.0098876953125, "learning_rate": 9.02129077532713e-06, "loss": 0.0001, "step": 18503 }, { "epoch": 8.63663943990665, "grad_norm": 0.11083984375, "learning_rate": 9.01520115556015e-06, "loss": 0.0003, "step": 18504 }, { "epoch": 8.637106184364061, "grad_norm": 0.006500244140625, "learning_rate": 9.00911349479684e-06, "loss": 0.0002, "step": 18505 }, { "epoch": 8.63757292882147, "grad_norm": 0.06396484375, "learning_rate": 9.003027793168295e-06, "loss": 0.0049, "step": 18506 }, { "epoch": 8.638039673278879, "grad_norm": 0.01055908203125, "learning_rate": 8.996944050805511e-06, "loss": 0.0002, "step": 18507 }, { "epoch": 8.63850641773629, "grad_norm": 0.005706787109375, "learning_rate": 8.990862267839495e-06, "loss": 0.0001, "step": 18508 }, { "epoch": 8.638973162193698, "grad_norm": 0.00653076171875, "learning_rate": 8.984782444401185e-06, "loss": 0.0001, "step": 18509 }, { "epoch": 8.63943990665111, "grad_norm": 0.0184326171875, "learning_rate": 8.978704580621489e-06, "loss": 0.0002, "step": 18510 }, { "epoch": 8.639906651108518, "grad_norm": 0.043701171875, "learning_rate": 8.97262867663129e-06, "loss": 0.0023, "step": 18511 }, { "epoch": 8.640373395565927, "grad_norm": 0.0230712890625, "learning_rate": 8.966554732561383e-06, "loss": 0.0002, "step": 18512 }, { "epoch": 8.640840140023338, "grad_norm": 0.0208740234375, "learning_rate": 8.960482748542542e-06, "loss": 0.0002, "step": 18513 }, { "epoch": 8.641306884480747, "grad_norm": 0.04541015625, "learning_rate": 8.954412724705541e-06, "loss": 0.0016, "step": 18514 }, { "epoch": 8.641773628938157, "grad_norm": 0.02099609375, "learning_rate": 8.948344661181041e-06, "loss": 0.0002, "step": 18515 }, { "epoch": 8.642240373395566, "grad_norm": 0.00653076171875, "learning_rate": 8.942278558099693e-06, "loss": 0.0001, "step": 18516 }, { "epoch": 8.642707117852975, "grad_norm": 0.06640625, "learning_rate": 8.936214415592137e-06, "loss": 0.0035, "step": 18517 }, { "epoch": 8.643173862310386, "grad_norm": 0.0045166015625, "learning_rate": 8.930152233788901e-06, "loss": 0.0001, "step": 18518 }, { "epoch": 8.643640606767795, "grad_norm": 0.00445556640625, "learning_rate": 8.92409201282054e-06, "loss": 0.0001, "step": 18519 }, { "epoch": 8.644107351225204, "grad_norm": 0.006988525390625, "learning_rate": 8.918033752817546e-06, "loss": 0.0001, "step": 18520 }, { "epoch": 8.644574095682614, "grad_norm": 0.005859375, "learning_rate": 8.911977453910314e-06, "loss": 0.0002, "step": 18521 }, { "epoch": 8.645040840140023, "grad_norm": 0.00799560546875, "learning_rate": 8.905923116229287e-06, "loss": 0.0001, "step": 18522 }, { "epoch": 8.645507584597432, "grad_norm": 0.031494140625, "learning_rate": 8.899870739904815e-06, "loss": 0.0002, "step": 18523 }, { "epoch": 8.645974329054843, "grad_norm": 0.007293701171875, "learning_rate": 8.893820325067182e-06, "loss": 0.0002, "step": 18524 }, { "epoch": 8.646441073512252, "grad_norm": 0.03271484375, "learning_rate": 8.887771871846707e-06, "loss": 0.0002, "step": 18525 }, { "epoch": 8.646907817969662, "grad_norm": 0.08349609375, "learning_rate": 8.881725380373574e-06, "loss": 0.0028, "step": 18526 }, { "epoch": 8.647374562427071, "grad_norm": 0.068359375, "learning_rate": 8.87568085077799e-06, "loss": 0.0002, "step": 18527 }, { "epoch": 8.64784130688448, "grad_norm": 0.045654296875, "learning_rate": 8.869638283190119e-06, "loss": 0.002, "step": 18528 }, { "epoch": 8.64830805134189, "grad_norm": 0.025634765625, "learning_rate": 8.863597677740043e-06, "loss": 0.0003, "step": 18529 }, { "epoch": 8.6487747957993, "grad_norm": 0.01043701171875, "learning_rate": 8.857559034557817e-06, "loss": 0.0001, "step": 18530 }, { "epoch": 8.649241540256709, "grad_norm": 0.0263671875, "learning_rate": 8.851522353773456e-06, "loss": 0.0002, "step": 18531 }, { "epoch": 8.64970828471412, "grad_norm": 0.0308837890625, "learning_rate": 8.84548763551698e-06, "loss": 0.0018, "step": 18532 }, { "epoch": 8.650175029171528, "grad_norm": 0.01556396484375, "learning_rate": 8.839454879918274e-06, "loss": 0.0001, "step": 18533 }, { "epoch": 8.650641773628939, "grad_norm": 0.01483154296875, "learning_rate": 8.833424087107245e-06, "loss": 0.0001, "step": 18534 }, { "epoch": 8.651108518086348, "grad_norm": 0.04931640625, "learning_rate": 8.827395257213766e-06, "loss": 0.0023, "step": 18535 }, { "epoch": 8.651575262543757, "grad_norm": 0.00762939453125, "learning_rate": 8.821368390367601e-06, "loss": 0.0001, "step": 18536 }, { "epoch": 8.652042007001167, "grad_norm": 0.140625, "learning_rate": 8.815343486698568e-06, "loss": 0.0003, "step": 18537 }, { "epoch": 8.652508751458576, "grad_norm": 0.0125732421875, "learning_rate": 8.80932054633633e-06, "loss": 0.0001, "step": 18538 }, { "epoch": 8.652975495915985, "grad_norm": 0.00823974609375, "learning_rate": 8.803299569410606e-06, "loss": 0.0002, "step": 18539 }, { "epoch": 8.653442240373396, "grad_norm": 0.029296875, "learning_rate": 8.797280556051046e-06, "loss": 0.0022, "step": 18540 }, { "epoch": 8.653908984830805, "grad_norm": 0.0111083984375, "learning_rate": 8.791263506387205e-06, "loss": 0.0001, "step": 18541 }, { "epoch": 8.654375729288216, "grad_norm": 0.08740234375, "learning_rate": 8.785248420548665e-06, "loss": 0.0031, "step": 18542 }, { "epoch": 8.654842473745624, "grad_norm": 0.00555419921875, "learning_rate": 8.779235298664945e-06, "loss": 0.0001, "step": 18543 }, { "epoch": 8.655309218203033, "grad_norm": 0.05126953125, "learning_rate": 8.773224140865477e-06, "loss": 0.0003, "step": 18544 }, { "epoch": 8.655775962660444, "grad_norm": 0.037841796875, "learning_rate": 8.767214947279723e-06, "loss": 0.0027, "step": 18545 }, { "epoch": 8.656242707117853, "grad_norm": 0.0595703125, "learning_rate": 8.761207718037067e-06, "loss": 0.0002, "step": 18546 }, { "epoch": 8.656709451575262, "grad_norm": 0.01123046875, "learning_rate": 8.75520245326682e-06, "loss": 0.0001, "step": 18547 }, { "epoch": 8.657176196032673, "grad_norm": 0.00958251953125, "learning_rate": 8.749199153098298e-06, "loss": 0.0001, "step": 18548 }, { "epoch": 8.657642940490081, "grad_norm": 0.0185546875, "learning_rate": 8.743197817660787e-06, "loss": 0.0003, "step": 18549 }, { "epoch": 8.65810968494749, "grad_norm": 0.00946044921875, "learning_rate": 8.73719844708346e-06, "loss": 0.0001, "step": 18550 }, { "epoch": 8.658576429404901, "grad_norm": 0.01116943359375, "learning_rate": 8.731201041495507e-06, "loss": 0.0001, "step": 18551 }, { "epoch": 8.65904317386231, "grad_norm": 0.0093994140625, "learning_rate": 8.725205601026088e-06, "loss": 0.0002, "step": 18552 }, { "epoch": 8.65950991831972, "grad_norm": 0.00457763671875, "learning_rate": 8.719212125804233e-06, "loss": 0.0001, "step": 18553 }, { "epoch": 8.65997666277713, "grad_norm": 0.0181884765625, "learning_rate": 8.713220615959016e-06, "loss": 0.0002, "step": 18554 }, { "epoch": 8.660443407234538, "grad_norm": 0.00836181640625, "learning_rate": 8.70723107161947e-06, "loss": 0.0001, "step": 18555 }, { "epoch": 8.660910151691949, "grad_norm": 0.0216064453125, "learning_rate": 8.701243492914513e-06, "loss": 0.0002, "step": 18556 }, { "epoch": 8.661376896149358, "grad_norm": 0.00689697265625, "learning_rate": 8.695257879973095e-06, "loss": 0.0001, "step": 18557 }, { "epoch": 8.661843640606769, "grad_norm": 0.00592041015625, "learning_rate": 8.689274232924061e-06, "loss": 0.0001, "step": 18558 }, { "epoch": 8.662310385064178, "grad_norm": 0.00537109375, "learning_rate": 8.683292551896272e-06, "loss": 0.0001, "step": 18559 }, { "epoch": 8.662777129521587, "grad_norm": 0.0177001953125, "learning_rate": 8.677312837018514e-06, "loss": 0.0002, "step": 18560 }, { "epoch": 8.663243873978997, "grad_norm": 0.0169677734375, "learning_rate": 8.67133508841953e-06, "loss": 0.0005, "step": 18561 }, { "epoch": 8.663710618436406, "grad_norm": 0.007232666015625, "learning_rate": 8.665359306228028e-06, "loss": 0.0001, "step": 18562 }, { "epoch": 8.664177362893815, "grad_norm": 0.01251220703125, "learning_rate": 8.659385490572702e-06, "loss": 0.0001, "step": 18563 }, { "epoch": 8.664644107351226, "grad_norm": 0.1240234375, "learning_rate": 8.65341364158212e-06, "loss": 0.004, "step": 18564 }, { "epoch": 8.665110851808635, "grad_norm": 0.01531982421875, "learning_rate": 8.647443759384899e-06, "loss": 0.0001, "step": 18565 }, { "epoch": 8.665577596266044, "grad_norm": 0.00897216796875, "learning_rate": 8.641475844109593e-06, "loss": 0.0001, "step": 18566 }, { "epoch": 8.666044340723454, "grad_norm": 0.005767822265625, "learning_rate": 8.635509895884653e-06, "loss": 0.0001, "step": 18567 }, { "epoch": 8.666511085180863, "grad_norm": 0.01153564453125, "learning_rate": 8.629545914838555e-06, "loss": 0.0001, "step": 18568 }, { "epoch": 8.666977829638274, "grad_norm": 0.01312255859375, "learning_rate": 8.623583901099741e-06, "loss": 0.0001, "step": 18569 }, { "epoch": 8.667444574095683, "grad_norm": 0.006439208984375, "learning_rate": 8.617623854796519e-06, "loss": 0.0001, "step": 18570 }, { "epoch": 8.667911318553092, "grad_norm": 0.0076904296875, "learning_rate": 8.611665776057243e-06, "loss": 0.0001, "step": 18571 }, { "epoch": 8.668378063010502, "grad_norm": 0.01263427734375, "learning_rate": 8.605709665010232e-06, "loss": 0.0002, "step": 18572 }, { "epoch": 8.668844807467911, "grad_norm": 0.04931640625, "learning_rate": 8.59975552178367e-06, "loss": 0.0025, "step": 18573 }, { "epoch": 8.66931155192532, "grad_norm": 0.0113525390625, "learning_rate": 8.59380334650579e-06, "loss": 0.0001, "step": 18574 }, { "epoch": 8.66977829638273, "grad_norm": 0.01519775390625, "learning_rate": 8.587853139304735e-06, "loss": 0.0001, "step": 18575 }, { "epoch": 8.67024504084014, "grad_norm": 0.00970458984375, "learning_rate": 8.581904900308624e-06, "loss": 0.0001, "step": 18576 }, { "epoch": 8.67071178529755, "grad_norm": 0.01239013671875, "learning_rate": 8.575958629645554e-06, "loss": 0.0001, "step": 18577 }, { "epoch": 8.67117852975496, "grad_norm": 0.033203125, "learning_rate": 8.570014327443521e-06, "loss": 0.0002, "step": 18578 }, { "epoch": 8.671645274212368, "grad_norm": 0.03857421875, "learning_rate": 8.564071993830503e-06, "loss": 0.0025, "step": 18579 }, { "epoch": 8.672112018669779, "grad_norm": 0.007049560546875, "learning_rate": 8.558131628934474e-06, "loss": 0.0001, "step": 18580 }, { "epoch": 8.672578763127188, "grad_norm": 0.005462646484375, "learning_rate": 8.552193232883344e-06, "loss": 0.0001, "step": 18581 }, { "epoch": 8.673045507584597, "grad_norm": 0.016845703125, "learning_rate": 8.546256805804931e-06, "loss": 0.0002, "step": 18582 }, { "epoch": 8.673512252042007, "grad_norm": 0.048583984375, "learning_rate": 8.54032234782709e-06, "loss": 0.0022, "step": 18583 }, { "epoch": 8.673978996499416, "grad_norm": 0.0174560546875, "learning_rate": 8.534389859077596e-06, "loss": 0.0001, "step": 18584 }, { "epoch": 8.674445740956827, "grad_norm": 0.005401611328125, "learning_rate": 8.528459339684158e-06, "loss": 0.0001, "step": 18585 }, { "epoch": 8.674912485414236, "grad_norm": 0.0155029296875, "learning_rate": 8.522530789774474e-06, "loss": 0.0001, "step": 18586 }, { "epoch": 8.675379229871645, "grad_norm": 0.015625, "learning_rate": 8.51660420947622e-06, "loss": 0.0002, "step": 18587 }, { "epoch": 8.675845974329055, "grad_norm": 0.00982666015625, "learning_rate": 8.51067959891696e-06, "loss": 0.0001, "step": 18588 }, { "epoch": 8.676312718786464, "grad_norm": 0.00421142578125, "learning_rate": 8.504756958224292e-06, "loss": 0.0001, "step": 18589 }, { "epoch": 8.676779463243873, "grad_norm": 0.0050048828125, "learning_rate": 8.498836287525713e-06, "loss": 0.0001, "step": 18590 }, { "epoch": 8.677246207701284, "grad_norm": 0.00665283203125, "learning_rate": 8.492917586948712e-06, "loss": 0.0002, "step": 18591 }, { "epoch": 8.677712952158693, "grad_norm": 0.060546875, "learning_rate": 8.48700085662073e-06, "loss": 0.0031, "step": 18592 }, { "epoch": 8.678179696616102, "grad_norm": 0.0069580078125, "learning_rate": 8.481086096669144e-06, "loss": 0.0001, "step": 18593 }, { "epoch": 8.678646441073512, "grad_norm": 0.010009765625, "learning_rate": 8.475173307221317e-06, "loss": 0.0001, "step": 18594 }, { "epoch": 8.679113185530921, "grad_norm": 0.0087890625, "learning_rate": 8.469262488404572e-06, "loss": 0.0001, "step": 18595 }, { "epoch": 8.679579929988332, "grad_norm": 0.011962890625, "learning_rate": 8.46335364034615e-06, "loss": 0.0001, "step": 18596 }, { "epoch": 8.680046674445741, "grad_norm": 0.01031494140625, "learning_rate": 8.457446763173283e-06, "loss": 0.0001, "step": 18597 }, { "epoch": 8.68051341890315, "grad_norm": 0.006195068359375, "learning_rate": 8.451541857013168e-06, "loss": 0.0001, "step": 18598 }, { "epoch": 8.68098016336056, "grad_norm": 0.019775390625, "learning_rate": 8.445638921992915e-06, "loss": 0.0002, "step": 18599 }, { "epoch": 8.68144690781797, "grad_norm": 0.005950927734375, "learning_rate": 8.439737958239635e-06, "loss": 0.0001, "step": 18600 }, { "epoch": 8.68191365227538, "grad_norm": 0.010986328125, "learning_rate": 8.433838965880414e-06, "loss": 0.0001, "step": 18601 }, { "epoch": 8.682380396732789, "grad_norm": 0.006561279296875, "learning_rate": 8.427941945042206e-06, "loss": 0.0001, "step": 18602 }, { "epoch": 8.682847141190198, "grad_norm": 0.01458740234375, "learning_rate": 8.422046895852009e-06, "loss": 0.0002, "step": 18603 }, { "epoch": 8.683313885647609, "grad_norm": 0.034912109375, "learning_rate": 8.416153818436778e-06, "loss": 0.0002, "step": 18604 }, { "epoch": 8.683780630105018, "grad_norm": 0.049560546875, "learning_rate": 8.410262712923345e-06, "loss": 0.0002, "step": 18605 }, { "epoch": 8.684247374562426, "grad_norm": 0.005859375, "learning_rate": 8.404373579438585e-06, "loss": 0.0001, "step": 18606 }, { "epoch": 8.684714119019837, "grad_norm": 0.035888671875, "learning_rate": 8.398486418109286e-06, "loss": 0.002, "step": 18607 }, { "epoch": 8.685180863477246, "grad_norm": 0.040771484375, "learning_rate": 8.392601229062203e-06, "loss": 0.003, "step": 18608 }, { "epoch": 8.685647607934655, "grad_norm": 0.0087890625, "learning_rate": 8.386718012424066e-06, "loss": 0.0001, "step": 18609 }, { "epoch": 8.686114352392066, "grad_norm": 0.0279541015625, "learning_rate": 8.380836768321531e-06, "loss": 0.0005, "step": 18610 }, { "epoch": 8.686581096849475, "grad_norm": 0.004669189453125, "learning_rate": 8.37495749688123e-06, "loss": 0.0001, "step": 18611 }, { "epoch": 8.687047841306885, "grad_norm": 0.007781982421875, "learning_rate": 8.369080198229762e-06, "loss": 0.0001, "step": 18612 }, { "epoch": 8.687514585764294, "grad_norm": 0.005340576171875, "learning_rate": 8.363204872493657e-06, "loss": 0.0001, "step": 18613 }, { "epoch": 8.687981330221703, "grad_norm": 0.0086669921875, "learning_rate": 8.357331519799415e-06, "loss": 0.0001, "step": 18614 }, { "epoch": 8.688448074679114, "grad_norm": 0.0308837890625, "learning_rate": 8.351460140273537e-06, "loss": 0.0003, "step": 18615 }, { "epoch": 8.688914819136523, "grad_norm": 0.01458740234375, "learning_rate": 8.345590734042375e-06, "loss": 0.0001, "step": 18616 }, { "epoch": 8.689381563593932, "grad_norm": 0.01092529296875, "learning_rate": 8.33972330123235e-06, "loss": 0.0001, "step": 18617 }, { "epoch": 8.689848308051342, "grad_norm": 0.01177978515625, "learning_rate": 8.333857841969783e-06, "loss": 0.0001, "step": 18618 }, { "epoch": 8.690315052508751, "grad_norm": 0.052978515625, "learning_rate": 8.327994356380953e-06, "loss": 0.002, "step": 18619 }, { "epoch": 8.690781796966162, "grad_norm": 0.005950927734375, "learning_rate": 8.322132844592124e-06, "loss": 0.0001, "step": 18620 }, { "epoch": 8.69124854142357, "grad_norm": 0.0135498046875, "learning_rate": 8.316273306729493e-06, "loss": 0.0001, "step": 18621 }, { "epoch": 8.69171528588098, "grad_norm": 0.021484375, "learning_rate": 8.31041574291922e-06, "loss": 0.0002, "step": 18622 }, { "epoch": 8.69218203033839, "grad_norm": 0.00897216796875, "learning_rate": 8.304560153287421e-06, "loss": 0.0002, "step": 18623 }, { "epoch": 8.6926487747958, "grad_norm": 0.007720947265625, "learning_rate": 8.298706537960198e-06, "loss": 0.0001, "step": 18624 }, { "epoch": 8.693115519253208, "grad_norm": 0.212890625, "learning_rate": 8.292854897063574e-06, "loss": 0.0004, "step": 18625 }, { "epoch": 8.693582263710619, "grad_norm": 0.01043701171875, "learning_rate": 8.28700523072351e-06, "loss": 0.0002, "step": 18626 }, { "epoch": 8.694049008168028, "grad_norm": 0.0186767578125, "learning_rate": 8.281157539065998e-06, "loss": 0.0002, "step": 18627 }, { "epoch": 8.694515752625438, "grad_norm": 0.201171875, "learning_rate": 8.275311822216913e-06, "loss": 0.0003, "step": 18628 }, { "epoch": 8.694982497082847, "grad_norm": 0.010009765625, "learning_rate": 8.269468080302134e-06, "loss": 0.0001, "step": 18629 }, { "epoch": 8.695449241540256, "grad_norm": 0.08642578125, "learning_rate": 8.263626313447503e-06, "loss": 0.0049, "step": 18630 }, { "epoch": 8.695915985997667, "grad_norm": 0.01507568359375, "learning_rate": 8.257786521778766e-06, "loss": 0.0002, "step": 18631 }, { "epoch": 8.696382730455076, "grad_norm": 0.0064697265625, "learning_rate": 8.251948705421664e-06, "loss": 0.0001, "step": 18632 }, { "epoch": 8.696849474912485, "grad_norm": 0.0177001953125, "learning_rate": 8.246112864501931e-06, "loss": 0.0002, "step": 18633 }, { "epoch": 8.697316219369895, "grad_norm": 0.01251220703125, "learning_rate": 8.240278999145168e-06, "loss": 0.0001, "step": 18634 }, { "epoch": 8.697782963827304, "grad_norm": 0.0341796875, "learning_rate": 8.234447109477007e-06, "loss": 0.0021, "step": 18635 }, { "epoch": 8.698249708284713, "grad_norm": 0.019287109375, "learning_rate": 8.228617195623034e-06, "loss": 0.0002, "step": 18636 }, { "epoch": 8.698716452742124, "grad_norm": 0.01007080078125, "learning_rate": 8.222789257708741e-06, "loss": 0.0001, "step": 18637 }, { "epoch": 8.699183197199533, "grad_norm": 0.007293701171875, "learning_rate": 8.216963295859637e-06, "loss": 0.0001, "step": 18638 }, { "epoch": 8.699649941656943, "grad_norm": 0.00921630859375, "learning_rate": 8.211139310201133e-06, "loss": 0.0001, "step": 18639 }, { "epoch": 8.700116686114352, "grad_norm": 0.0081787109375, "learning_rate": 8.20531730085864e-06, "loss": 0.0001, "step": 18640 }, { "epoch": 8.700583430571761, "grad_norm": 0.01373291015625, "learning_rate": 8.199497267957523e-06, "loss": 0.0002, "step": 18641 }, { "epoch": 8.701050175029172, "grad_norm": 0.010009765625, "learning_rate": 8.193679211623084e-06, "loss": 0.0002, "step": 18642 }, { "epoch": 8.701516919486581, "grad_norm": 0.009033203125, "learning_rate": 8.187863131980578e-06, "loss": 0.0001, "step": 18643 }, { "epoch": 8.701983663943992, "grad_norm": 0.031494140625, "learning_rate": 8.18204902915527e-06, "loss": 0.0002, "step": 18644 }, { "epoch": 8.7024504084014, "grad_norm": 0.01025390625, "learning_rate": 8.176236903272304e-06, "loss": 0.0001, "step": 18645 }, { "epoch": 8.70291715285881, "grad_norm": 0.01068115234375, "learning_rate": 8.170426754456829e-06, "loss": 0.0001, "step": 18646 }, { "epoch": 8.70338389731622, "grad_norm": 0.0091552734375, "learning_rate": 8.164618582833983e-06, "loss": 0.0001, "step": 18647 }, { "epoch": 8.703850641773629, "grad_norm": 0.041748046875, "learning_rate": 8.15881238852877e-06, "loss": 0.0015, "step": 18648 }, { "epoch": 8.704317386231038, "grad_norm": 0.00653076171875, "learning_rate": 8.153008171666221e-06, "loss": 0.0001, "step": 18649 }, { "epoch": 8.704784130688449, "grad_norm": 0.00537109375, "learning_rate": 8.147205932371326e-06, "loss": 0.0001, "step": 18650 }, { "epoch": 8.705250875145857, "grad_norm": 0.0244140625, "learning_rate": 8.141405670768987e-06, "loss": 0.0002, "step": 18651 }, { "epoch": 8.705717619603266, "grad_norm": 0.00665283203125, "learning_rate": 8.135607386984112e-06, "loss": 0.0001, "step": 18652 }, { "epoch": 8.706184364060677, "grad_norm": 0.0269775390625, "learning_rate": 8.129811081141548e-06, "loss": 0.0002, "step": 18653 }, { "epoch": 8.706651108518086, "grad_norm": 0.07666015625, "learning_rate": 8.12401675336606e-06, "loss": 0.0059, "step": 18654 }, { "epoch": 8.707117852975497, "grad_norm": 0.0166015625, "learning_rate": 8.118224403782448e-06, "loss": 0.0002, "step": 18655 }, { "epoch": 8.707584597432906, "grad_norm": 0.00836181640625, "learning_rate": 8.112434032515393e-06, "loss": 0.0001, "step": 18656 }, { "epoch": 8.708051341890314, "grad_norm": 0.034912109375, "learning_rate": 8.106645639689581e-06, "loss": 0.0031, "step": 18657 }, { "epoch": 8.708518086347725, "grad_norm": 0.005615234375, "learning_rate": 8.100859225429669e-06, "loss": 0.0001, "step": 18658 }, { "epoch": 8.708984830805134, "grad_norm": 0.01336669921875, "learning_rate": 8.095074789860202e-06, "loss": 0.0001, "step": 18659 }, { "epoch": 8.709451575262543, "grad_norm": 0.03515625, "learning_rate": 8.089292333105748e-06, "loss": 0.0002, "step": 18660 }, { "epoch": 8.709918319719954, "grad_norm": 0.04736328125, "learning_rate": 8.083511855290815e-06, "loss": 0.0013, "step": 18661 }, { "epoch": 8.710385064177363, "grad_norm": 0.0118408203125, "learning_rate": 8.077733356539851e-06, "loss": 0.0001, "step": 18662 }, { "epoch": 8.710851808634773, "grad_norm": 0.015869140625, "learning_rate": 8.071956836977268e-06, "loss": 0.0002, "step": 18663 }, { "epoch": 8.711318553092182, "grad_norm": 0.0169677734375, "learning_rate": 8.066182296727464e-06, "loss": 0.0002, "step": 18664 }, { "epoch": 8.711785297549591, "grad_norm": 0.07421875, "learning_rate": 8.060409735914753e-06, "loss": 0.0028, "step": 18665 }, { "epoch": 8.712252042007002, "grad_norm": 0.007720947265625, "learning_rate": 8.054639154663424e-06, "loss": 0.0001, "step": 18666 }, { "epoch": 8.71271878646441, "grad_norm": 0.007080078125, "learning_rate": 8.048870553097742e-06, "loss": 0.0001, "step": 18667 }, { "epoch": 8.71318553092182, "grad_norm": 0.00677490234375, "learning_rate": 8.043103931341878e-06, "loss": 0.0001, "step": 18668 }, { "epoch": 8.71365227537923, "grad_norm": 0.052490234375, "learning_rate": 8.037339289520029e-06, "loss": 0.0028, "step": 18669 }, { "epoch": 8.71411901983664, "grad_norm": 0.0673828125, "learning_rate": 8.031576627756299e-06, "loss": 0.0025, "step": 18670 }, { "epoch": 8.71458576429405, "grad_norm": 0.053955078125, "learning_rate": 8.025815946174764e-06, "loss": 0.0033, "step": 18671 }, { "epoch": 8.715052508751459, "grad_norm": 0.01043701171875, "learning_rate": 8.020057244899448e-06, "loss": 0.0001, "step": 18672 }, { "epoch": 8.715519253208868, "grad_norm": 0.00836181640625, "learning_rate": 8.014300524054386e-06, "loss": 0.0001, "step": 18673 }, { "epoch": 8.715985997666278, "grad_norm": 0.01263427734375, "learning_rate": 8.008545783763467e-06, "loss": 0.0002, "step": 18674 }, { "epoch": 8.716452742123687, "grad_norm": 0.0068359375, "learning_rate": 8.002793024150624e-06, "loss": 0.0001, "step": 18675 }, { "epoch": 8.716919486581096, "grad_norm": 0.00927734375, "learning_rate": 7.997042245339737e-06, "loss": 0.0002, "step": 18676 }, { "epoch": 8.717386231038507, "grad_norm": 0.005157470703125, "learning_rate": 7.991293447454584e-06, "loss": 0.0001, "step": 18677 }, { "epoch": 8.717852975495916, "grad_norm": 0.0130615234375, "learning_rate": 7.985546630618968e-06, "loss": 0.0001, "step": 18678 }, { "epoch": 8.718319719953325, "grad_norm": 0.0272216796875, "learning_rate": 7.979801794956654e-06, "loss": 0.0001, "step": 18679 }, { "epoch": 8.718786464410735, "grad_norm": 0.05859375, "learning_rate": 7.974058940591277e-06, "loss": 0.0002, "step": 18680 }, { "epoch": 8.719253208868144, "grad_norm": 0.0299072265625, "learning_rate": 7.968318067646518e-06, "loss": 0.0002, "step": 18681 }, { "epoch": 8.719719953325555, "grad_norm": 0.00830078125, "learning_rate": 7.96257917624601e-06, "loss": 0.0001, "step": 18682 }, { "epoch": 8.720186697782964, "grad_norm": 0.006011962890625, "learning_rate": 7.956842266513265e-06, "loss": 0.0001, "step": 18683 }, { "epoch": 8.720653442240373, "grad_norm": 0.0047607421875, "learning_rate": 7.951107338571829e-06, "loss": 0.0001, "step": 18684 }, { "epoch": 8.721120186697783, "grad_norm": 0.007598876953125, "learning_rate": 7.94537439254519e-06, "loss": 0.0001, "step": 18685 }, { "epoch": 8.721586931155192, "grad_norm": 0.0072021484375, "learning_rate": 7.939643428556776e-06, "loss": 0.0001, "step": 18686 }, { "epoch": 8.722053675612603, "grad_norm": 0.0048828125, "learning_rate": 7.933914446729984e-06, "loss": 0.0001, "step": 18687 }, { "epoch": 8.722520420070012, "grad_norm": 0.007781982421875, "learning_rate": 7.928187447188152e-06, "loss": 0.0002, "step": 18688 }, { "epoch": 8.72298716452742, "grad_norm": 0.01312255859375, "learning_rate": 7.9224624300546e-06, "loss": 0.0001, "step": 18689 }, { "epoch": 8.723453908984832, "grad_norm": 0.005157470703125, "learning_rate": 7.91673939545261e-06, "loss": 0.0001, "step": 18690 }, { "epoch": 8.72392065344224, "grad_norm": 0.00836181640625, "learning_rate": 7.911018343505372e-06, "loss": 0.0001, "step": 18691 }, { "epoch": 8.72438739789965, "grad_norm": 0.0693359375, "learning_rate": 7.905299274336087e-06, "loss": 0.0002, "step": 18692 }, { "epoch": 8.72485414235706, "grad_norm": 0.0238037109375, "learning_rate": 7.899582188067912e-06, "loss": 0.0003, "step": 18693 }, { "epoch": 8.725320886814469, "grad_norm": 0.012939453125, "learning_rate": 7.893867084823892e-06, "loss": 0.0002, "step": 18694 }, { "epoch": 8.725787631271878, "grad_norm": 0.00677490234375, "learning_rate": 7.888153964727108e-06, "loss": 0.0001, "step": 18695 }, { "epoch": 8.726254375729289, "grad_norm": 0.00848388671875, "learning_rate": 7.882442827900593e-06, "loss": 0.0001, "step": 18696 }, { "epoch": 8.726721120186697, "grad_norm": 0.005645751953125, "learning_rate": 7.876733674467263e-06, "loss": 0.0002, "step": 18697 }, { "epoch": 8.727187864644108, "grad_norm": 0.0059814453125, "learning_rate": 7.871026504550072e-06, "loss": 0.0001, "step": 18698 }, { "epoch": 8.727654609101517, "grad_norm": 0.005645751953125, "learning_rate": 7.865321318271923e-06, "loss": 0.0001, "step": 18699 }, { "epoch": 8.728121353558926, "grad_norm": 0.006927490234375, "learning_rate": 7.859618115755607e-06, "loss": 0.0001, "step": 18700 }, { "epoch": 8.728588098016337, "grad_norm": 0.0087890625, "learning_rate": 7.853916897123948e-06, "loss": 0.0001, "step": 18701 }, { "epoch": 8.729054842473746, "grad_norm": 0.004608154296875, "learning_rate": 7.848217662499713e-06, "loss": 0.0001, "step": 18702 }, { "epoch": 8.729521586931154, "grad_norm": 0.0096435546875, "learning_rate": 7.842520412005583e-06, "loss": 0.0002, "step": 18703 }, { "epoch": 8.729988331388565, "grad_norm": 0.0269775390625, "learning_rate": 7.836825145764247e-06, "loss": 0.0002, "step": 18704 }, { "epoch": 8.730455075845974, "grad_norm": 0.068359375, "learning_rate": 7.831131863898312e-06, "loss": 0.0038, "step": 18705 }, { "epoch": 8.730921820303385, "grad_norm": 0.03173828125, "learning_rate": 7.825440566530373e-06, "loss": 0.0002, "step": 18706 }, { "epoch": 8.731388564760794, "grad_norm": 0.0169677734375, "learning_rate": 7.81975125378298e-06, "loss": 0.0002, "step": 18707 }, { "epoch": 8.731855309218203, "grad_norm": 0.01312255859375, "learning_rate": 7.814063925778603e-06, "loss": 0.0002, "step": 18708 }, { "epoch": 8.732322053675613, "grad_norm": 0.007415771484375, "learning_rate": 7.808378582639708e-06, "loss": 0.0002, "step": 18709 }, { "epoch": 8.732788798133022, "grad_norm": 0.0166015625, "learning_rate": 7.802695224488732e-06, "loss": 0.0001, "step": 18710 }, { "epoch": 8.733255542590431, "grad_norm": 0.00555419921875, "learning_rate": 7.797013851447999e-06, "loss": 0.0001, "step": 18711 }, { "epoch": 8.733722287047842, "grad_norm": 0.0048828125, "learning_rate": 7.791334463639866e-06, "loss": 0.0001, "step": 18712 }, { "epoch": 8.73418903150525, "grad_norm": 0.05224609375, "learning_rate": 7.785657061186624e-06, "loss": 0.0039, "step": 18713 }, { "epoch": 8.734655775962661, "grad_norm": 0.03271484375, "learning_rate": 7.779981644210488e-06, "loss": 0.0005, "step": 18714 }, { "epoch": 8.73512252042007, "grad_norm": 0.0093994140625, "learning_rate": 7.774308212833658e-06, "loss": 0.0002, "step": 18715 }, { "epoch": 8.735589264877479, "grad_norm": 0.025390625, "learning_rate": 7.768636767178317e-06, "loss": 0.0002, "step": 18716 }, { "epoch": 8.73605600933489, "grad_norm": 0.005828857421875, "learning_rate": 7.762967307366542e-06, "loss": 0.0001, "step": 18717 }, { "epoch": 8.736522753792299, "grad_norm": 0.041259765625, "learning_rate": 7.757299833520425e-06, "loss": 0.0019, "step": 18718 }, { "epoch": 8.736989498249708, "grad_norm": 0.006927490234375, "learning_rate": 7.751634345762005e-06, "loss": 0.0001, "step": 18719 }, { "epoch": 8.737456242707118, "grad_norm": 0.034912109375, "learning_rate": 7.745970844213235e-06, "loss": 0.0017, "step": 18720 }, { "epoch": 8.737922987164527, "grad_norm": 0.0078125, "learning_rate": 7.740309328996064e-06, "loss": 0.0002, "step": 18721 }, { "epoch": 8.738389731621936, "grad_norm": 0.013427734375, "learning_rate": 7.734649800232418e-06, "loss": 0.0002, "step": 18722 }, { "epoch": 8.738856476079347, "grad_norm": 0.041259765625, "learning_rate": 7.72899225804411e-06, "loss": 0.002, "step": 18723 }, { "epoch": 8.739323220536756, "grad_norm": 0.05712890625, "learning_rate": 7.723336702552975e-06, "loss": 0.0024, "step": 18724 }, { "epoch": 8.739789964994166, "grad_norm": 0.03466796875, "learning_rate": 7.717683133880804e-06, "loss": 0.0019, "step": 18725 }, { "epoch": 8.740256709451575, "grad_norm": 0.00982666015625, "learning_rate": 7.712031552149279e-06, "loss": 0.0001, "step": 18726 }, { "epoch": 8.740723453908984, "grad_norm": 0.0203857421875, "learning_rate": 7.706381957480114e-06, "loss": 0.0003, "step": 18727 }, { "epoch": 8.741190198366395, "grad_norm": 0.0810546875, "learning_rate": 7.700734349994965e-06, "loss": 0.0021, "step": 18728 }, { "epoch": 8.741656942823804, "grad_norm": 0.0107421875, "learning_rate": 7.695088729815403e-06, "loss": 0.0002, "step": 18729 }, { "epoch": 8.742123687281214, "grad_norm": 0.037353515625, "learning_rate": 7.689445097062985e-06, "loss": 0.0022, "step": 18730 }, { "epoch": 8.742590431738623, "grad_norm": 0.01177978515625, "learning_rate": 7.68380345185925e-06, "loss": 0.0002, "step": 18731 }, { "epoch": 8.743057176196032, "grad_norm": 0.00628662109375, "learning_rate": 7.678163794325644e-06, "loss": 0.0001, "step": 18732 }, { "epoch": 8.743523920653443, "grad_norm": 0.00885009765625, "learning_rate": 7.6725261245836e-06, "loss": 0.0001, "step": 18733 }, { "epoch": 8.743990665110852, "grad_norm": 0.005950927734375, "learning_rate": 7.666890442754527e-06, "loss": 0.0001, "step": 18734 }, { "epoch": 8.74445740956826, "grad_norm": 0.006744384765625, "learning_rate": 7.661256748959722e-06, "loss": 0.0001, "step": 18735 }, { "epoch": 8.744924154025671, "grad_norm": 0.0203857421875, "learning_rate": 7.655625043320536e-06, "loss": 0.0002, "step": 18736 }, { "epoch": 8.74539089848308, "grad_norm": 0.01104736328125, "learning_rate": 7.649995325958181e-06, "loss": 0.0002, "step": 18737 }, { "epoch": 8.74585764294049, "grad_norm": 0.003814697265625, "learning_rate": 7.644367596993896e-06, "loss": 0.0001, "step": 18738 }, { "epoch": 8.7463243873979, "grad_norm": 0.006927490234375, "learning_rate": 7.638741856548858e-06, "loss": 0.0002, "step": 18739 }, { "epoch": 8.746791131855309, "grad_norm": 0.005767822265625, "learning_rate": 7.633118104744175e-06, "loss": 0.0001, "step": 18740 }, { "epoch": 8.74725787631272, "grad_norm": 0.016357421875, "learning_rate": 7.627496341700935e-06, "loss": 0.0001, "step": 18741 }, { "epoch": 8.747724620770128, "grad_norm": 0.010986328125, "learning_rate": 7.62187656754021e-06, "loss": 0.0002, "step": 18742 }, { "epoch": 8.748191365227537, "grad_norm": 0.0078125, "learning_rate": 7.616258782382957e-06, "loss": 0.0001, "step": 18743 }, { "epoch": 8.748658109684948, "grad_norm": 0.051025390625, "learning_rate": 7.610642986350159e-06, "loss": 0.0002, "step": 18744 }, { "epoch": 8.749124854142357, "grad_norm": 0.014404296875, "learning_rate": 7.605029179562728e-06, "loss": 0.0002, "step": 18745 }, { "epoch": 8.749591598599766, "grad_norm": 0.0146484375, "learning_rate": 7.599417362141536e-06, "loss": 0.0001, "step": 18746 }, { "epoch": 8.750058343057177, "grad_norm": 0.005889892578125, "learning_rate": 7.593807534207398e-06, "loss": 0.0001, "step": 18747 }, { "epoch": 8.750525087514585, "grad_norm": 0.01043701171875, "learning_rate": 7.588199695881127e-06, "loss": 0.0001, "step": 18748 }, { "epoch": 8.750991831971996, "grad_norm": 0.01806640625, "learning_rate": 7.582593847283426e-06, "loss": 0.0001, "step": 18749 }, { "epoch": 8.751458576429405, "grad_norm": 0.0341796875, "learning_rate": 7.576989988535033e-06, "loss": 0.0015, "step": 18750 }, { "epoch": 8.751925320886814, "grad_norm": 0.01312255859375, "learning_rate": 7.571388119756595e-06, "loss": 0.0002, "step": 18751 }, { "epoch": 8.752392065344225, "grad_norm": 0.04541015625, "learning_rate": 7.565788241068705e-06, "loss": 0.0002, "step": 18752 }, { "epoch": 8.752858809801634, "grad_norm": 0.01068115234375, "learning_rate": 7.560190352591956e-06, "loss": 0.0001, "step": 18753 }, { "epoch": 8.753325554259042, "grad_norm": 0.005828857421875, "learning_rate": 7.554594454446895e-06, "loss": 0.0001, "step": 18754 }, { "epoch": 8.753792298716453, "grad_norm": 0.04345703125, "learning_rate": 7.549000546753959e-06, "loss": 0.003, "step": 18755 }, { "epoch": 8.754259043173862, "grad_norm": 0.00762939453125, "learning_rate": 7.543408629633641e-06, "loss": 0.0001, "step": 18756 }, { "epoch": 8.754725787631273, "grad_norm": 0.00921630859375, "learning_rate": 7.537818703206301e-06, "loss": 0.0001, "step": 18757 }, { "epoch": 8.755192532088682, "grad_norm": 0.046630859375, "learning_rate": 7.532230767592307e-06, "loss": 0.0018, "step": 18758 }, { "epoch": 8.75565927654609, "grad_norm": 0.01129150390625, "learning_rate": 7.526644822911999e-06, "loss": 0.0001, "step": 18759 }, { "epoch": 8.756126021003501, "grad_norm": 0.01092529296875, "learning_rate": 7.521060869285612e-06, "loss": 0.0002, "step": 18760 }, { "epoch": 8.756126021003501, "eval_loss": 2.457089900970459, "eval_runtime": 86.3037, "eval_samples_per_second": 20.903, "eval_steps_per_second": 2.619, "step": 18760 }, { "epoch": 8.75659276546091, "grad_norm": 0.01708984375, "learning_rate": 7.5154789068333955e-06, "loss": 0.0002, "step": 18761 }, { "epoch": 8.757059509918319, "grad_norm": 0.076171875, "learning_rate": 7.509898935675541e-06, "loss": 0.0015, "step": 18762 }, { "epoch": 8.75752625437573, "grad_norm": 0.00799560546875, "learning_rate": 7.504320955932165e-06, "loss": 0.0001, "step": 18763 }, { "epoch": 8.757992998833139, "grad_norm": 0.006317138671875, "learning_rate": 7.4987449677233924e-06, "loss": 0.0001, "step": 18764 }, { "epoch": 8.758459743290548, "grad_norm": 0.06201171875, "learning_rate": 7.493170971169283e-06, "loss": 0.0002, "step": 18765 }, { "epoch": 8.758926487747958, "grad_norm": 0.05029296875, "learning_rate": 7.487598966389831e-06, "loss": 0.0021, "step": 18766 }, { "epoch": 8.759393232205367, "grad_norm": 0.1005859375, "learning_rate": 7.482028953505016e-06, "loss": 0.0019, "step": 18767 }, { "epoch": 8.759859976662778, "grad_norm": 0.01446533203125, "learning_rate": 7.476460932634799e-06, "loss": 0.0001, "step": 18768 }, { "epoch": 8.760326721120187, "grad_norm": 0.0091552734375, "learning_rate": 7.470894903898995e-06, "loss": 0.0002, "step": 18769 }, { "epoch": 8.760793465577596, "grad_norm": 0.0126953125, "learning_rate": 7.465330867417497e-06, "loss": 0.0002, "step": 18770 }, { "epoch": 8.761260210035006, "grad_norm": 0.03857421875, "learning_rate": 7.459768823310098e-06, "loss": 0.0003, "step": 18771 }, { "epoch": 8.761726954492415, "grad_norm": 0.0079345703125, "learning_rate": 7.454208771696547e-06, "loss": 0.0001, "step": 18772 }, { "epoch": 8.762193698949824, "grad_norm": 0.080078125, "learning_rate": 7.448650712696559e-06, "loss": 0.0058, "step": 18773 }, { "epoch": 8.762660443407235, "grad_norm": 0.043701171875, "learning_rate": 7.443094646429827e-06, "loss": 0.0027, "step": 18774 }, { "epoch": 8.763127187864644, "grad_norm": 0.0069580078125, "learning_rate": 7.437540573015945e-06, "loss": 0.0001, "step": 18775 }, { "epoch": 8.763593932322054, "grad_norm": 0.044189453125, "learning_rate": 7.431988492574504e-06, "loss": 0.0036, "step": 18776 }, { "epoch": 8.764060676779463, "grad_norm": 0.010498046875, "learning_rate": 7.426438405225078e-06, "loss": 0.0002, "step": 18777 }, { "epoch": 8.764527421236872, "grad_norm": 0.0072021484375, "learning_rate": 7.4208903110871365e-06, "loss": 0.0002, "step": 18778 }, { "epoch": 8.764994165694283, "grad_norm": 0.0146484375, "learning_rate": 7.415344210280139e-06, "loss": 0.0002, "step": 18779 }, { "epoch": 8.765460910151692, "grad_norm": 0.01165771484375, "learning_rate": 7.4098001029235255e-06, "loss": 0.0002, "step": 18780 }, { "epoch": 8.7659276546091, "grad_norm": 0.00909423828125, "learning_rate": 7.40425798913662e-06, "loss": 0.0001, "step": 18781 }, { "epoch": 8.766394399066511, "grad_norm": 0.04345703125, "learning_rate": 7.398717869038785e-06, "loss": 0.0027, "step": 18782 }, { "epoch": 8.76686114352392, "grad_norm": 0.006072998046875, "learning_rate": 7.393179742749312e-06, "loss": 0.0001, "step": 18783 }, { "epoch": 8.767327887981331, "grad_norm": 0.08154296875, "learning_rate": 7.387643610387418e-06, "loss": 0.005, "step": 18784 }, { "epoch": 8.76779463243874, "grad_norm": 0.00811767578125, "learning_rate": 7.3821094720723185e-06, "loss": 0.0001, "step": 18785 }, { "epoch": 8.768261376896149, "grad_norm": 0.0498046875, "learning_rate": 7.376577327923151e-06, "loss": 0.002, "step": 18786 }, { "epoch": 8.76872812135356, "grad_norm": 0.01904296875, "learning_rate": 7.3710471780590425e-06, "loss": 0.0002, "step": 18787 }, { "epoch": 8.769194865810968, "grad_norm": 0.0146484375, "learning_rate": 7.3655190225990766e-06, "loss": 0.0001, "step": 18788 }, { "epoch": 8.769661610268377, "grad_norm": 0.007659912109375, "learning_rate": 7.359992861662246e-06, "loss": 0.0001, "step": 18789 }, { "epoch": 8.770128354725788, "grad_norm": 0.05029296875, "learning_rate": 7.354468695367567e-06, "loss": 0.0033, "step": 18790 }, { "epoch": 8.770595099183197, "grad_norm": 0.0113525390625, "learning_rate": 7.348946523833977e-06, "loss": 0.0002, "step": 18791 }, { "epoch": 8.771061843640608, "grad_norm": 0.005157470703125, "learning_rate": 7.343426347180349e-06, "loss": 0.0001, "step": 18792 }, { "epoch": 8.771528588098016, "grad_norm": 0.01190185546875, "learning_rate": 7.337908165525553e-06, "loss": 0.0001, "step": 18793 }, { "epoch": 8.771995332555425, "grad_norm": 0.013916015625, "learning_rate": 7.332391978988429e-06, "loss": 0.0002, "step": 18794 }, { "epoch": 8.772462077012836, "grad_norm": 0.007476806640625, "learning_rate": 7.326877787687703e-06, "loss": 0.0001, "step": 18795 }, { "epoch": 8.772928821470245, "grad_norm": 0.023193359375, "learning_rate": 7.321365591742113e-06, "loss": 0.0002, "step": 18796 }, { "epoch": 8.773395565927654, "grad_norm": 0.039306640625, "learning_rate": 7.3158553912703765e-06, "loss": 0.0025, "step": 18797 }, { "epoch": 8.773862310385065, "grad_norm": 0.009033203125, "learning_rate": 7.310347186391087e-06, "loss": 0.0001, "step": 18798 }, { "epoch": 8.774329054842473, "grad_norm": 0.055419921875, "learning_rate": 7.304840977222871e-06, "loss": 0.0013, "step": 18799 }, { "epoch": 8.774795799299884, "grad_norm": 0.006439208984375, "learning_rate": 7.29933676388429e-06, "loss": 0.0002, "step": 18800 }, { "epoch": 8.775262543757293, "grad_norm": 0.006378173828125, "learning_rate": 7.293834546493816e-06, "loss": 0.0001, "step": 18801 }, { "epoch": 8.775729288214702, "grad_norm": 0.0155029296875, "learning_rate": 7.288334325169955e-06, "loss": 0.0002, "step": 18802 }, { "epoch": 8.776196032672113, "grad_norm": 0.0164794921875, "learning_rate": 7.282836100031132e-06, "loss": 0.0002, "step": 18803 }, { "epoch": 8.776662777129522, "grad_norm": 0.01904296875, "learning_rate": 7.2773398711956986e-06, "loss": 0.0002, "step": 18804 }, { "epoch": 8.77712952158693, "grad_norm": 0.013916015625, "learning_rate": 7.271845638782038e-06, "loss": 0.0001, "step": 18805 }, { "epoch": 8.777596266044341, "grad_norm": 0.01019287109375, "learning_rate": 7.26635340290841e-06, "loss": 0.0002, "step": 18806 }, { "epoch": 8.77806301050175, "grad_norm": 0.033447265625, "learning_rate": 7.260863163693077e-06, "loss": 0.0002, "step": 18807 }, { "epoch": 8.778529754959159, "grad_norm": 0.0283203125, "learning_rate": 7.255374921254277e-06, "loss": 0.0007, "step": 18808 }, { "epoch": 8.77899649941657, "grad_norm": 0.0052490234375, "learning_rate": 7.249888675710137e-06, "loss": 0.0001, "step": 18809 }, { "epoch": 8.779463243873979, "grad_norm": 0.00848388671875, "learning_rate": 7.244404427178797e-06, "loss": 0.0001, "step": 18810 }, { "epoch": 8.77992998833139, "grad_norm": 0.01202392578125, "learning_rate": 7.238922175778373e-06, "loss": 0.0001, "step": 18811 }, { "epoch": 8.780396732788798, "grad_norm": 0.0103759765625, "learning_rate": 7.233441921626849e-06, "loss": 0.0002, "step": 18812 }, { "epoch": 8.780863477246207, "grad_norm": 0.0064697265625, "learning_rate": 7.227963664842252e-06, "loss": 0.0001, "step": 18813 }, { "epoch": 8.781330221703618, "grad_norm": 0.007049560546875, "learning_rate": 7.222487405542544e-06, "loss": 0.0001, "step": 18814 }, { "epoch": 8.781796966161027, "grad_norm": 0.013671875, "learning_rate": 7.217013143845597e-06, "loss": 0.0002, "step": 18815 }, { "epoch": 8.782263710618436, "grad_norm": 0.0194091796875, "learning_rate": 7.211540879869328e-06, "loss": 0.0002, "step": 18816 }, { "epoch": 8.782730455075846, "grad_norm": 0.00567626953125, "learning_rate": 7.20607061373153e-06, "loss": 0.0001, "step": 18817 }, { "epoch": 8.783197199533255, "grad_norm": 0.00579833984375, "learning_rate": 7.200602345549967e-06, "loss": 0.0001, "step": 18818 }, { "epoch": 8.783663943990666, "grad_norm": 0.0205078125, "learning_rate": 7.19513607544241e-06, "loss": 0.0002, "step": 18819 }, { "epoch": 8.784130688448075, "grad_norm": 0.01165771484375, "learning_rate": 7.189671803526554e-06, "loss": 0.0001, "step": 18820 }, { "epoch": 8.784597432905484, "grad_norm": 0.02001953125, "learning_rate": 7.184209529920017e-06, "loss": 0.0001, "step": 18821 }, { "epoch": 8.785064177362894, "grad_norm": 0.03662109375, "learning_rate": 7.178749254740435e-06, "loss": 0.0011, "step": 18822 }, { "epoch": 8.785530921820303, "grad_norm": 0.00848388671875, "learning_rate": 7.173290978105385e-06, "loss": 0.0001, "step": 18823 }, { "epoch": 8.785997666277712, "grad_norm": 0.0079345703125, "learning_rate": 7.1678347001323585e-06, "loss": 0.0002, "step": 18824 }, { "epoch": 8.786464410735123, "grad_norm": 0.042724609375, "learning_rate": 7.162380420938841e-06, "loss": 0.0016, "step": 18825 }, { "epoch": 8.786931155192532, "grad_norm": 0.00689697265625, "learning_rate": 7.156928140642305e-06, "loss": 0.0001, "step": 18826 }, { "epoch": 8.787397899649942, "grad_norm": 0.00927734375, "learning_rate": 7.151477859360089e-06, "loss": 0.0001, "step": 18827 }, { "epoch": 8.787864644107351, "grad_norm": 0.006805419921875, "learning_rate": 7.146029577209579e-06, "loss": 0.0001, "step": 18828 }, { "epoch": 8.78833138856476, "grad_norm": 0.011962890625, "learning_rate": 7.140583294308101e-06, "loss": 0.0001, "step": 18829 }, { "epoch": 8.788798133022171, "grad_norm": 0.0196533203125, "learning_rate": 7.135139010772862e-06, "loss": 0.0003, "step": 18830 }, { "epoch": 8.78926487747958, "grad_norm": 0.052001953125, "learning_rate": 7.129696726721125e-06, "loss": 0.0022, "step": 18831 }, { "epoch": 8.789731621936989, "grad_norm": 0.00823974609375, "learning_rate": 7.124256442270072e-06, "loss": 0.0001, "step": 18832 }, { "epoch": 8.7901983663944, "grad_norm": 0.0069580078125, "learning_rate": 7.1188181575368106e-06, "loss": 0.0001, "step": 18833 }, { "epoch": 8.790665110851808, "grad_norm": 0.052001953125, "learning_rate": 7.1133818726384585e-06, "loss": 0.0028, "step": 18834 }, { "epoch": 8.791131855309219, "grad_norm": 0.004547119140625, "learning_rate": 7.107947587692043e-06, "loss": 0.0001, "step": 18835 }, { "epoch": 8.791598599766628, "grad_norm": 0.005889892578125, "learning_rate": 7.102515302814583e-06, "loss": 0.0001, "step": 18836 }, { "epoch": 8.792065344224037, "grad_norm": 0.005584716796875, "learning_rate": 7.097085018123051e-06, "loss": 0.0001, "step": 18837 }, { "epoch": 8.792532088681448, "grad_norm": 0.01123046875, "learning_rate": 7.091656733734342e-06, "loss": 0.0002, "step": 18838 }, { "epoch": 8.792998833138856, "grad_norm": 0.0084228515625, "learning_rate": 7.086230449765352e-06, "loss": 0.0001, "step": 18839 }, { "epoch": 8.793465577596265, "grad_norm": 0.009521484375, "learning_rate": 7.080806166332921e-06, "loss": 0.0001, "step": 18840 }, { "epoch": 8.793932322053676, "grad_norm": 0.01153564453125, "learning_rate": 7.07538388355381e-06, "loss": 0.0001, "step": 18841 }, { "epoch": 8.794399066511085, "grad_norm": 0.01153564453125, "learning_rate": 7.0699636015447934e-06, "loss": 0.0001, "step": 18842 }, { "epoch": 8.794865810968496, "grad_norm": 0.005950927734375, "learning_rate": 7.064545320422578e-06, "loss": 0.0001, "step": 18843 }, { "epoch": 8.795332555425905, "grad_norm": 0.007568359375, "learning_rate": 7.059129040303802e-06, "loss": 0.0002, "step": 18844 }, { "epoch": 8.795799299883313, "grad_norm": 0.00537109375, "learning_rate": 7.053714761305097e-06, "loss": 0.0001, "step": 18845 }, { "epoch": 8.796266044340724, "grad_norm": 0.0091552734375, "learning_rate": 7.048302483543056e-06, "loss": 0.0001, "step": 18846 }, { "epoch": 8.796732788798133, "grad_norm": 0.0103759765625, "learning_rate": 7.042892207134178e-06, "loss": 0.0001, "step": 18847 }, { "epoch": 8.797199533255542, "grad_norm": 0.005767822265625, "learning_rate": 7.037483932194966e-06, "loss": 0.0001, "step": 18848 }, { "epoch": 8.797666277712953, "grad_norm": 0.00897216796875, "learning_rate": 7.032077658841885e-06, "loss": 0.0001, "step": 18849 }, { "epoch": 8.798133022170362, "grad_norm": 0.0240478515625, "learning_rate": 7.026673387191307e-06, "loss": 0.0002, "step": 18850 }, { "epoch": 8.79859976662777, "grad_norm": 0.00537109375, "learning_rate": 7.021271117359607e-06, "loss": 0.0001, "step": 18851 }, { "epoch": 8.799066511085181, "grad_norm": 0.01239013671875, "learning_rate": 7.015870849463113e-06, "loss": 0.0002, "step": 18852 }, { "epoch": 8.79953325554259, "grad_norm": 0.043701171875, "learning_rate": 7.010472583618078e-06, "loss": 0.0021, "step": 18853 }, { "epoch": 8.8, "grad_norm": 0.00787353515625, "learning_rate": 7.0050763199407625e-06, "loss": 0.0002, "step": 18854 }, { "epoch": 8.80046674445741, "grad_norm": 0.0142822265625, "learning_rate": 6.999682058547308e-06, "loss": 0.0001, "step": 18855 }, { "epoch": 8.800933488914819, "grad_norm": 0.00897216796875, "learning_rate": 6.9942897995538884e-06, "loss": 0.0001, "step": 18856 }, { "epoch": 8.80140023337223, "grad_norm": 0.0240478515625, "learning_rate": 6.988899543076611e-06, "loss": 0.0002, "step": 18857 }, { "epoch": 8.801866977829638, "grad_norm": 0.00836181640625, "learning_rate": 6.983511289231514e-06, "loss": 0.0002, "step": 18858 }, { "epoch": 8.802333722287047, "grad_norm": 0.00665283203125, "learning_rate": 6.9781250381346195e-06, "loss": 0.0001, "step": 18859 }, { "epoch": 8.802800466744458, "grad_norm": 0.04931640625, "learning_rate": 6.972740789901921e-06, "loss": 0.0002, "step": 18860 }, { "epoch": 8.803267211201867, "grad_norm": 0.046875, "learning_rate": 6.967358544649316e-06, "loss": 0.0034, "step": 18861 }, { "epoch": 8.803733955659277, "grad_norm": 0.00726318359375, "learning_rate": 6.9619783024926884e-06, "loss": 0.0001, "step": 18862 }, { "epoch": 8.804200700116686, "grad_norm": 0.018798828125, "learning_rate": 6.956600063547925e-06, "loss": 0.0001, "step": 18863 }, { "epoch": 8.804667444574095, "grad_norm": 0.007537841796875, "learning_rate": 6.9512238279307975e-06, "loss": 0.0001, "step": 18864 }, { "epoch": 8.805134189031506, "grad_norm": 0.0079345703125, "learning_rate": 6.945849595757037e-06, "loss": 0.0001, "step": 18865 }, { "epoch": 8.805600933488915, "grad_norm": 0.0120849609375, "learning_rate": 6.940477367142395e-06, "loss": 0.0001, "step": 18866 }, { "epoch": 8.806067677946324, "grad_norm": 0.0198974609375, "learning_rate": 6.935107142202513e-06, "loss": 0.0001, "step": 18867 }, { "epoch": 8.806534422403734, "grad_norm": 0.00592041015625, "learning_rate": 6.929738921053042e-06, "loss": 0.0002, "step": 18868 }, { "epoch": 8.807001166861143, "grad_norm": 0.014892578125, "learning_rate": 6.924372703809556e-06, "loss": 0.0002, "step": 18869 }, { "epoch": 8.807467911318554, "grad_norm": 0.009765625, "learning_rate": 6.919008490587586e-06, "loss": 0.0002, "step": 18870 }, { "epoch": 8.807934655775963, "grad_norm": 0.01953125, "learning_rate": 6.913646281502639e-06, "loss": 0.0001, "step": 18871 }, { "epoch": 8.808401400233372, "grad_norm": 0.00640869140625, "learning_rate": 6.908286076670179e-06, "loss": 0.0001, "step": 18872 }, { "epoch": 8.808868144690782, "grad_norm": 0.007110595703125, "learning_rate": 6.902927876205601e-06, "loss": 0.0001, "step": 18873 }, { "epoch": 8.809334889148191, "grad_norm": 0.00494384765625, "learning_rate": 6.8975716802242685e-06, "loss": 0.0001, "step": 18874 }, { "epoch": 8.8098016336056, "grad_norm": 0.00506591796875, "learning_rate": 6.892217488841535e-06, "loss": 0.0001, "step": 18875 }, { "epoch": 8.81026837806301, "grad_norm": 0.00897216796875, "learning_rate": 6.886865302172651e-06, "loss": 0.0001, "step": 18876 }, { "epoch": 8.81073512252042, "grad_norm": 0.01092529296875, "learning_rate": 6.88151512033286e-06, "loss": 0.0001, "step": 18877 }, { "epoch": 8.811201866977829, "grad_norm": 0.012451171875, "learning_rate": 6.8761669434373896e-06, "loss": 0.0001, "step": 18878 }, { "epoch": 8.81166861143524, "grad_norm": 0.0308837890625, "learning_rate": 6.87082077160135e-06, "loss": 0.0018, "step": 18879 }, { "epoch": 8.812135355892648, "grad_norm": 0.06689453125, "learning_rate": 6.865476604939869e-06, "loss": 0.0016, "step": 18880 }, { "epoch": 8.812602100350059, "grad_norm": 0.0191650390625, "learning_rate": 6.860134443568022e-06, "loss": 0.0002, "step": 18881 }, { "epoch": 8.813068844807468, "grad_norm": 0.00762939453125, "learning_rate": 6.854794287600807e-06, "loss": 0.0001, "step": 18882 }, { "epoch": 8.813535589264877, "grad_norm": 0.041259765625, "learning_rate": 6.8494561371532425e-06, "loss": 0.003, "step": 18883 }, { "epoch": 8.814002333722287, "grad_norm": 0.058837890625, "learning_rate": 6.844119992340214e-06, "loss": 0.0002, "step": 18884 }, { "epoch": 8.814469078179696, "grad_norm": 0.0113525390625, "learning_rate": 6.838785853276642e-06, "loss": 0.0001, "step": 18885 }, { "epoch": 8.814935822637107, "grad_norm": 0.019775390625, "learning_rate": 6.833453720077387e-06, "loss": 0.0002, "step": 18886 }, { "epoch": 8.815402567094516, "grad_norm": 0.00457763671875, "learning_rate": 6.828123592857239e-06, "loss": 0.0001, "step": 18887 }, { "epoch": 8.815869311551925, "grad_norm": 0.006134033203125, "learning_rate": 6.822795471730958e-06, "loss": 0.0001, "step": 18888 }, { "epoch": 8.816336056009336, "grad_norm": 0.01153564453125, "learning_rate": 6.817469356813289e-06, "loss": 0.0001, "step": 18889 }, { "epoch": 8.816802800466744, "grad_norm": 0.01324462890625, "learning_rate": 6.812145248218871e-06, "loss": 0.0002, "step": 18890 }, { "epoch": 8.817269544924153, "grad_norm": 0.0140380859375, "learning_rate": 6.806823146062368e-06, "loss": 0.0001, "step": 18891 }, { "epoch": 8.817736289381564, "grad_norm": 0.008544921875, "learning_rate": 6.801503050458369e-06, "loss": 0.0001, "step": 18892 }, { "epoch": 8.818203033838973, "grad_norm": 0.039794921875, "learning_rate": 6.7961849615214125e-06, "loss": 0.0004, "step": 18893 }, { "epoch": 8.818669778296382, "grad_norm": 0.01031494140625, "learning_rate": 6.790868879365997e-06, "loss": 0.0001, "step": 18894 }, { "epoch": 8.819136522753793, "grad_norm": 0.0067138671875, "learning_rate": 6.785554804106608e-06, "loss": 0.0001, "step": 18895 }, { "epoch": 8.819603267211201, "grad_norm": 0.004913330078125, "learning_rate": 6.780242735857634e-06, "loss": 0.0001, "step": 18896 }, { "epoch": 8.820070011668612, "grad_norm": 0.0556640625, "learning_rate": 6.774932674733458e-06, "loss": 0.0002, "step": 18897 }, { "epoch": 8.820536756126021, "grad_norm": 0.037109375, "learning_rate": 6.769624620848436e-06, "loss": 0.0006, "step": 18898 }, { "epoch": 8.82100350058343, "grad_norm": 0.0123291015625, "learning_rate": 6.7643185743168196e-06, "loss": 0.0001, "step": 18899 }, { "epoch": 8.82147024504084, "grad_norm": 0.0133056640625, "learning_rate": 6.759014535252872e-06, "loss": 0.0002, "step": 18900 }, { "epoch": 8.82193698949825, "grad_norm": 0.00885009765625, "learning_rate": 6.753712503770804e-06, "loss": 0.0001, "step": 18901 }, { "epoch": 8.822403733955658, "grad_norm": 0.00909423828125, "learning_rate": 6.748412479984756e-06, "loss": 0.0001, "step": 18902 }, { "epoch": 8.82287047841307, "grad_norm": 0.051025390625, "learning_rate": 6.743114464008871e-06, "loss": 0.0015, "step": 18903 }, { "epoch": 8.823337222870478, "grad_norm": 0.01531982421875, "learning_rate": 6.737818455957179e-06, "loss": 0.0002, "step": 18904 }, { "epoch": 8.823803967327889, "grad_norm": 0.0167236328125, "learning_rate": 6.732524455943734e-06, "loss": 0.0001, "step": 18905 }, { "epoch": 8.824270711785298, "grad_norm": 0.005767822265625, "learning_rate": 6.727232464082534e-06, "loss": 0.0001, "step": 18906 }, { "epoch": 8.824737456242707, "grad_norm": 0.00469970703125, "learning_rate": 6.721942480487487e-06, "loss": 0.0001, "step": 18907 }, { "epoch": 8.825204200700117, "grad_norm": 0.006988525390625, "learning_rate": 6.7166545052725236e-06, "loss": 0.0001, "step": 18908 }, { "epoch": 8.825670945157526, "grad_norm": 0.004241943359375, "learning_rate": 6.711368538551488e-06, "loss": 0.0001, "step": 18909 }, { "epoch": 8.826137689614935, "grad_norm": 0.0162353515625, "learning_rate": 6.7060845804381875e-06, "loss": 0.0001, "step": 18910 }, { "epoch": 8.826604434072346, "grad_norm": 0.010498046875, "learning_rate": 6.700802631046399e-06, "loss": 0.0001, "step": 18911 }, { "epoch": 8.827071178529755, "grad_norm": 0.0118408203125, "learning_rate": 6.695522690489864e-06, "loss": 0.0002, "step": 18912 }, { "epoch": 8.827537922987165, "grad_norm": 0.041259765625, "learning_rate": 6.690244758882247e-06, "loss": 0.0028, "step": 18913 }, { "epoch": 8.828004667444574, "grad_norm": 0.007720947265625, "learning_rate": 6.684968836337169e-06, "loss": 0.0001, "step": 18914 }, { "epoch": 8.828471411901983, "grad_norm": 0.0439453125, "learning_rate": 6.679694922968271e-06, "loss": 0.0018, "step": 18915 }, { "epoch": 8.828938156359394, "grad_norm": 0.01239013671875, "learning_rate": 6.674423018889054e-06, "loss": 0.0002, "step": 18916 }, { "epoch": 8.829404900816803, "grad_norm": 0.0712890625, "learning_rate": 6.669153124213068e-06, "loss": 0.0034, "step": 18917 }, { "epoch": 8.829871645274212, "grad_norm": 0.0732421875, "learning_rate": 6.66388523905378e-06, "loss": 0.0018, "step": 18918 }, { "epoch": 8.830338389731622, "grad_norm": 0.007171630859375, "learning_rate": 6.658619363524576e-06, "loss": 0.0002, "step": 18919 }, { "epoch": 8.830805134189031, "grad_norm": 0.00732421875, "learning_rate": 6.653355497738867e-06, "loss": 0.0001, "step": 18920 }, { "epoch": 8.83127187864644, "grad_norm": 0.017822265625, "learning_rate": 6.648093641810005e-06, "loss": 0.0002, "step": 18921 }, { "epoch": 8.83173862310385, "grad_norm": 0.0128173828125, "learning_rate": 6.6428337958512444e-06, "loss": 0.0002, "step": 18922 }, { "epoch": 8.83220536756126, "grad_norm": 0.01019287109375, "learning_rate": 6.63757595997585e-06, "loss": 0.0001, "step": 18923 }, { "epoch": 8.83267211201867, "grad_norm": 0.00531005859375, "learning_rate": 6.632320134297054e-06, "loss": 0.0001, "step": 18924 }, { "epoch": 8.83313885647608, "grad_norm": 0.00927734375, "learning_rate": 6.627066318927977e-06, "loss": 0.0001, "step": 18925 }, { "epoch": 8.833605600933488, "grad_norm": 0.00787353515625, "learning_rate": 6.6218145139817615e-06, "loss": 0.0001, "step": 18926 }, { "epoch": 8.834072345390899, "grad_norm": 0.0252685546875, "learning_rate": 6.616564719571505e-06, "loss": 0.0003, "step": 18927 }, { "epoch": 8.834539089848308, "grad_norm": 0.044921875, "learning_rate": 6.611316935810208e-06, "loss": 0.0002, "step": 18928 }, { "epoch": 8.835005834305719, "grad_norm": 0.010986328125, "learning_rate": 6.6060711628108676e-06, "loss": 0.0002, "step": 18929 }, { "epoch": 8.835472578763127, "grad_norm": 0.053955078125, "learning_rate": 6.600827400686449e-06, "loss": 0.0017, "step": 18930 }, { "epoch": 8.835939323220536, "grad_norm": 0.006805419921875, "learning_rate": 6.59558564954984e-06, "loss": 0.0001, "step": 18931 }, { "epoch": 8.836406067677947, "grad_norm": 0.00628662109375, "learning_rate": 6.5903459095138955e-06, "loss": 0.0001, "step": 18932 }, { "epoch": 8.836872812135356, "grad_norm": 0.01019287109375, "learning_rate": 6.585108180691468e-06, "loss": 0.0001, "step": 18933 }, { "epoch": 8.837339556592765, "grad_norm": 0.005401611328125, "learning_rate": 6.57987246319528e-06, "loss": 0.0001, "step": 18934 }, { "epoch": 8.837806301050176, "grad_norm": 0.0107421875, "learning_rate": 6.5746387571381185e-06, "loss": 0.0002, "step": 18935 }, { "epoch": 8.838273045507584, "grad_norm": 0.01104736328125, "learning_rate": 6.569407062632616e-06, "loss": 0.0001, "step": 18936 }, { "epoch": 8.838739789964993, "grad_norm": 0.0108642578125, "learning_rate": 6.564177379791447e-06, "loss": 0.0002, "step": 18937 }, { "epoch": 8.839206534422404, "grad_norm": 0.007293701171875, "learning_rate": 6.558949708727225e-06, "loss": 0.0001, "step": 18938 }, { "epoch": 8.839673278879813, "grad_norm": 0.0098876953125, "learning_rate": 6.553724049552468e-06, "loss": 0.0002, "step": 18939 }, { "epoch": 8.840140023337224, "grad_norm": 0.006866455078125, "learning_rate": 6.54850040237972e-06, "loss": 0.0001, "step": 18940 }, { "epoch": 8.840606767794633, "grad_norm": 0.0576171875, "learning_rate": 6.5432787673214594e-06, "loss": 0.0025, "step": 18941 }, { "epoch": 8.841073512252041, "grad_norm": 0.00341796875, "learning_rate": 6.538059144490083e-06, "loss": 0.0001, "step": 18942 }, { "epoch": 8.841540256709452, "grad_norm": 0.006103515625, "learning_rate": 6.5328415339979795e-06, "loss": 0.0001, "step": 18943 }, { "epoch": 8.842007001166861, "grad_norm": 0.0220947265625, "learning_rate": 6.527625935957526e-06, "loss": 0.0001, "step": 18944 }, { "epoch": 8.84247374562427, "grad_norm": 0.0908203125, "learning_rate": 6.522412350480989e-06, "loss": 0.0002, "step": 18945 }, { "epoch": 8.84294049008168, "grad_norm": 0.0086669921875, "learning_rate": 6.517200777680621e-06, "loss": 0.0001, "step": 18946 }, { "epoch": 8.84340723453909, "grad_norm": 0.00604248046875, "learning_rate": 6.511991217668656e-06, "loss": 0.0001, "step": 18947 }, { "epoch": 8.8438739789965, "grad_norm": 0.00897216796875, "learning_rate": 6.506783670557226e-06, "loss": 0.0001, "step": 18948 }, { "epoch": 8.844340723453909, "grad_norm": 0.049560546875, "learning_rate": 6.501578136458486e-06, "loss": 0.0018, "step": 18949 }, { "epoch": 8.844807467911318, "grad_norm": 0.0198974609375, "learning_rate": 6.496374615484513e-06, "loss": 0.0002, "step": 18950 }, { "epoch": 8.845274212368729, "grad_norm": 0.01129150390625, "learning_rate": 6.4911731077473284e-06, "loss": 0.0001, "step": 18951 }, { "epoch": 8.845740956826138, "grad_norm": 0.0128173828125, "learning_rate": 6.485973613358953e-06, "loss": 0.0001, "step": 18952 }, { "epoch": 8.846207701283546, "grad_norm": 0.01275634765625, "learning_rate": 6.48077613243131e-06, "loss": 0.0001, "step": 18953 }, { "epoch": 8.846674445740957, "grad_norm": 0.0050048828125, "learning_rate": 6.475580665076308e-06, "loss": 0.0001, "step": 18954 }, { "epoch": 8.847141190198366, "grad_norm": 0.0302734375, "learning_rate": 6.4703872114058484e-06, "loss": 0.0002, "step": 18955 }, { "epoch": 8.847607934655777, "grad_norm": 0.006378173828125, "learning_rate": 6.465195771531696e-06, "loss": 0.0001, "step": 18956 }, { "epoch": 8.848074679113186, "grad_norm": 0.0238037109375, "learning_rate": 6.460006345565672e-06, "loss": 0.0002, "step": 18957 }, { "epoch": 8.848541423570595, "grad_norm": 0.004180908203125, "learning_rate": 6.4548189336195e-06, "loss": 0.0001, "step": 18958 }, { "epoch": 8.849008168028005, "grad_norm": 0.068359375, "learning_rate": 6.449633535804866e-06, "loss": 0.0043, "step": 18959 }, { "epoch": 8.849474912485414, "grad_norm": 0.006622314453125, "learning_rate": 6.444450152233416e-06, "loss": 0.0001, "step": 18960 }, { "epoch": 8.849941656942823, "grad_norm": 0.115234375, "learning_rate": 6.4392687830167385e-06, "loss": 0.0004, "step": 18961 }, { "epoch": 8.850408401400234, "grad_norm": 0.0093994140625, "learning_rate": 6.434089428266443e-06, "loss": 0.0002, "step": 18962 }, { "epoch": 8.850875145857643, "grad_norm": 0.0361328125, "learning_rate": 6.428912088093985e-06, "loss": 0.0014, "step": 18963 }, { "epoch": 8.851341890315052, "grad_norm": 0.0072021484375, "learning_rate": 6.423736762610899e-06, "loss": 0.0001, "step": 18964 }, { "epoch": 8.851808634772462, "grad_norm": 0.004913330078125, "learning_rate": 6.418563451928561e-06, "loss": 0.0001, "step": 18965 }, { "epoch": 8.852275379229871, "grad_norm": 0.044677734375, "learning_rate": 6.413392156158382e-06, "loss": 0.0005, "step": 18966 }, { "epoch": 8.852742123687282, "grad_norm": 0.06396484375, "learning_rate": 6.408222875411718e-06, "loss": 0.0025, "step": 18967 }, { "epoch": 8.85320886814469, "grad_norm": 0.006805419921875, "learning_rate": 6.403055609799846e-06, "loss": 0.0001, "step": 18968 }, { "epoch": 8.8536756126021, "grad_norm": 0.006195068359375, "learning_rate": 6.397890359434034e-06, "loss": 0.0001, "step": 18969 }, { "epoch": 8.85414235705951, "grad_norm": 0.0076904296875, "learning_rate": 6.3927271244255124e-06, "loss": 0.0001, "step": 18970 }, { "epoch": 8.85460910151692, "grad_norm": 0.060302734375, "learning_rate": 6.3875659048854286e-06, "loss": 0.0019, "step": 18971 }, { "epoch": 8.85507584597433, "grad_norm": 0.004669189453125, "learning_rate": 6.3824067009249035e-06, "loss": 0.0001, "step": 18972 }, { "epoch": 8.855542590431739, "grad_norm": 0.01495361328125, "learning_rate": 6.377249512655048e-06, "loss": 0.0001, "step": 18973 }, { "epoch": 8.856009334889148, "grad_norm": 0.01287841796875, "learning_rate": 6.372094340186874e-06, "loss": 0.0001, "step": 18974 }, { "epoch": 8.856476079346558, "grad_norm": 0.004547119140625, "learning_rate": 6.366941183631392e-06, "loss": 0.0001, "step": 18975 }, { "epoch": 8.856942823803967, "grad_norm": 0.0228271484375, "learning_rate": 6.361790043099569e-06, "loss": 0.0001, "step": 18976 }, { "epoch": 8.857409568261376, "grad_norm": 0.01513671875, "learning_rate": 6.356640918702283e-06, "loss": 0.0002, "step": 18977 }, { "epoch": 8.857876312718787, "grad_norm": 0.0186767578125, "learning_rate": 6.351493810550413e-06, "loss": 0.0002, "step": 18978 }, { "epoch": 8.858343057176196, "grad_norm": 0.011962890625, "learning_rate": 6.3463487187548135e-06, "loss": 0.0001, "step": 18979 }, { "epoch": 8.858809801633605, "grad_norm": 0.007049560546875, "learning_rate": 6.3412056434262064e-06, "loss": 0.0001, "step": 18980 }, { "epoch": 8.859276546091015, "grad_norm": 0.005401611328125, "learning_rate": 6.336064584675372e-06, "loss": 0.0001, "step": 18981 }, { "epoch": 8.859743290548424, "grad_norm": 0.00714111328125, "learning_rate": 6.330925542612997e-06, "loss": 0.0001, "step": 18982 }, { "epoch": 8.860210035005835, "grad_norm": 0.048583984375, "learning_rate": 6.325788517349706e-06, "loss": 0.0008, "step": 18983 }, { "epoch": 8.860676779463244, "grad_norm": 0.0296630859375, "learning_rate": 6.320653508996144e-06, "loss": 0.0002, "step": 18984 }, { "epoch": 8.861143523920653, "grad_norm": 0.007415771484375, "learning_rate": 6.315520517662832e-06, "loss": 0.0001, "step": 18985 }, { "epoch": 8.861610268378064, "grad_norm": 0.0177001953125, "learning_rate": 6.3103895434603044e-06, "loss": 0.0002, "step": 18986 }, { "epoch": 8.862077012835472, "grad_norm": 0.043212890625, "learning_rate": 6.305260586499062e-06, "loss": 0.0022, "step": 18987 }, { "epoch": 8.862543757292881, "grad_norm": 0.004913330078125, "learning_rate": 6.300133646889494e-06, "loss": 0.0001, "step": 18988 }, { "epoch": 8.863010501750292, "grad_norm": 0.01141357421875, "learning_rate": 6.295008724742013e-06, "loss": 0.0001, "step": 18989 }, { "epoch": 8.863477246207701, "grad_norm": 0.00799560546875, "learning_rate": 6.289885820166974e-06, "loss": 0.0001, "step": 18990 }, { "epoch": 8.863943990665112, "grad_norm": 0.0172119140625, "learning_rate": 6.2847649332746565e-06, "loss": 0.0002, "step": 18991 }, { "epoch": 8.86441073512252, "grad_norm": 0.053955078125, "learning_rate": 6.279646064175327e-06, "loss": 0.0022, "step": 18992 }, { "epoch": 8.86487747957993, "grad_norm": 0.033935546875, "learning_rate": 6.274529212979208e-06, "loss": 0.0009, "step": 18993 }, { "epoch": 8.86534422403734, "grad_norm": 0.1162109375, "learning_rate": 6.269414379796468e-06, "loss": 0.0034, "step": 18994 }, { "epoch": 8.865810968494749, "grad_norm": 0.005401611328125, "learning_rate": 6.264301564737218e-06, "loss": 0.0001, "step": 18995 }, { "epoch": 8.866277712952158, "grad_norm": 0.01483154296875, "learning_rate": 6.259190767911571e-06, "loss": 0.0002, "step": 18996 }, { "epoch": 8.866744457409569, "grad_norm": 0.033935546875, "learning_rate": 6.25408198942955e-06, "loss": 0.0006, "step": 18997 }, { "epoch": 8.867211201866978, "grad_norm": 0.006011962890625, "learning_rate": 6.248975229401155e-06, "loss": 0.0001, "step": 18998 }, { "epoch": 8.867677946324388, "grad_norm": 0.00494384765625, "learning_rate": 6.243870487936354e-06, "loss": 0.0001, "step": 18999 }, { "epoch": 8.868144690781797, "grad_norm": 0.01116943359375, "learning_rate": 6.238767765145026e-06, "loss": 0.0001, "step": 19000 }, { "epoch": 8.868611435239206, "grad_norm": 0.0257568359375, "learning_rate": 6.233667061137072e-06, "loss": 0.0002, "step": 19001 }, { "epoch": 8.869078179696617, "grad_norm": 0.00518798828125, "learning_rate": 6.228568376022293e-06, "loss": 0.0001, "step": 19002 }, { "epoch": 8.869544924154026, "grad_norm": 0.00701904296875, "learning_rate": 6.223471709910478e-06, "loss": 0.0001, "step": 19003 }, { "epoch": 8.870011668611435, "grad_norm": 0.040283203125, "learning_rate": 6.218377062911373e-06, "loss": 0.0002, "step": 19004 }, { "epoch": 8.870478413068845, "grad_norm": 0.0133056640625, "learning_rate": 6.213284435134659e-06, "loss": 0.0001, "step": 19005 }, { "epoch": 8.870945157526254, "grad_norm": 0.01318359375, "learning_rate": 6.208193826689979e-06, "loss": 0.0002, "step": 19006 }, { "epoch": 8.871411901983663, "grad_norm": 0.0245361328125, "learning_rate": 6.203105237686968e-06, "loss": 0.0001, "step": 19007 }, { "epoch": 8.871878646441074, "grad_norm": 0.006927490234375, "learning_rate": 6.198018668235173e-06, "loss": 0.0001, "step": 19008 }, { "epoch": 8.872345390898483, "grad_norm": 0.03271484375, "learning_rate": 6.192934118444093e-06, "loss": 0.0021, "step": 19009 }, { "epoch": 8.872812135355893, "grad_norm": 0.0205078125, "learning_rate": 6.1878515884232215e-06, "loss": 0.0002, "step": 19010 }, { "epoch": 8.873278879813302, "grad_norm": 0.0120849609375, "learning_rate": 6.182771078282013e-06, "loss": 0.0001, "step": 19011 }, { "epoch": 8.873745624270711, "grad_norm": 0.0201416015625, "learning_rate": 6.177692588129813e-06, "loss": 0.0002, "step": 19012 }, { "epoch": 8.874212368728122, "grad_norm": 0.06640625, "learning_rate": 6.172616118076002e-06, "loss": 0.0003, "step": 19013 }, { "epoch": 8.87467911318553, "grad_norm": 0.006378173828125, "learning_rate": 6.167541668229859e-06, "loss": 0.0001, "step": 19014 }, { "epoch": 8.875145857642941, "grad_norm": 0.006927490234375, "learning_rate": 6.162469238700652e-06, "loss": 0.0001, "step": 19015 }, { "epoch": 8.87561260210035, "grad_norm": 0.00531005859375, "learning_rate": 6.157398829597616e-06, "loss": 0.0001, "step": 19016 }, { "epoch": 8.87607934655776, "grad_norm": 0.006927490234375, "learning_rate": 6.152330441029885e-06, "loss": 0.0001, "step": 19017 }, { "epoch": 8.87654609101517, "grad_norm": 0.005126953125, "learning_rate": 6.147264073106607e-06, "loss": 0.0001, "step": 19018 }, { "epoch": 8.877012835472579, "grad_norm": 0.0400390625, "learning_rate": 6.14219972593687e-06, "loss": 0.0016, "step": 19019 }, { "epoch": 8.877479579929988, "grad_norm": 0.0093994140625, "learning_rate": 6.137137399629711e-06, "loss": 0.0001, "step": 19020 }, { "epoch": 8.877946324387398, "grad_norm": 0.01055908203125, "learning_rate": 6.13207709429412e-06, "loss": 0.0002, "step": 19021 }, { "epoch": 8.878413068844807, "grad_norm": 0.005279541015625, "learning_rate": 6.1270188100390645e-06, "loss": 0.0001, "step": 19022 }, { "epoch": 8.878879813302216, "grad_norm": 0.01080322265625, "learning_rate": 6.121962546973437e-06, "loss": 0.0002, "step": 19023 }, { "epoch": 8.879346557759627, "grad_norm": 0.006378173828125, "learning_rate": 6.116908305206126e-06, "loss": 0.0001, "step": 19024 }, { "epoch": 8.879813302217036, "grad_norm": 0.008056640625, "learning_rate": 6.1118560848459464e-06, "loss": 0.0001, "step": 19025 }, { "epoch": 8.880280046674446, "grad_norm": 0.044677734375, "learning_rate": 6.106805886001665e-06, "loss": 0.0026, "step": 19026 }, { "epoch": 8.880746791131855, "grad_norm": 0.049560546875, "learning_rate": 6.10175770878203e-06, "loss": 0.0002, "step": 19027 }, { "epoch": 8.881213535589264, "grad_norm": 0.00433349609375, "learning_rate": 6.096711553295753e-06, "loss": 0.0001, "step": 19028 }, { "epoch": 8.881680280046675, "grad_norm": 0.01409912109375, "learning_rate": 6.091667419651437e-06, "loss": 0.0002, "step": 19029 }, { "epoch": 8.882147024504084, "grad_norm": 0.00799560546875, "learning_rate": 6.086625307957727e-06, "loss": 0.0001, "step": 19030 }, { "epoch": 8.882613768961493, "grad_norm": 0.006378173828125, "learning_rate": 6.081585218323183e-06, "loss": 0.0001, "step": 19031 }, { "epoch": 8.883080513418903, "grad_norm": 0.0162353515625, "learning_rate": 6.076547150856293e-06, "loss": 0.0002, "step": 19032 }, { "epoch": 8.883547257876312, "grad_norm": 0.0301513671875, "learning_rate": 6.071511105665572e-06, "loss": 0.0023, "step": 19033 }, { "epoch": 8.884014002333723, "grad_norm": 0.00726318359375, "learning_rate": 6.066477082859412e-06, "loss": 0.0002, "step": 19034 }, { "epoch": 8.884480746791132, "grad_norm": 0.00482177734375, "learning_rate": 6.061445082546235e-06, "loss": 0.0001, "step": 19035 }, { "epoch": 8.88494749124854, "grad_norm": 0.0059814453125, "learning_rate": 6.056415104834379e-06, "loss": 0.0001, "step": 19036 }, { "epoch": 8.885414235705952, "grad_norm": 0.00921630859375, "learning_rate": 6.051387149832122e-06, "loss": 0.0001, "step": 19037 }, { "epoch": 8.88588098016336, "grad_norm": 0.00970458984375, "learning_rate": 6.046361217647733e-06, "loss": 0.0001, "step": 19038 }, { "epoch": 8.88634772462077, "grad_norm": 0.008544921875, "learning_rate": 6.041337308389461e-06, "loss": 0.0001, "step": 19039 }, { "epoch": 8.88681446907818, "grad_norm": 0.033203125, "learning_rate": 6.0363154221654174e-06, "loss": 0.0002, "step": 19040 }, { "epoch": 8.887281213535589, "grad_norm": 0.0069580078125, "learning_rate": 6.03129555908376e-06, "loss": 0.0001, "step": 19041 }, { "epoch": 8.887747957993, "grad_norm": 0.0185546875, "learning_rate": 6.026277719252582e-06, "loss": 0.0001, "step": 19042 }, { "epoch": 8.888214702450409, "grad_norm": 0.0067138671875, "learning_rate": 6.021261902779896e-06, "loss": 0.0002, "step": 19043 }, { "epoch": 8.888681446907817, "grad_norm": 0.00921630859375, "learning_rate": 6.016248109773715e-06, "loss": 0.0002, "step": 19044 }, { "epoch": 8.889148191365228, "grad_norm": 0.007232666015625, "learning_rate": 6.011236340342008e-06, "loss": 0.0001, "step": 19045 }, { "epoch": 8.889614935822637, "grad_norm": 0.0654296875, "learning_rate": 6.006226594592645e-06, "loss": 0.0012, "step": 19046 }, { "epoch": 8.890081680280046, "grad_norm": 0.0078125, "learning_rate": 6.001218872633507e-06, "loss": 0.0001, "step": 19047 }, { "epoch": 8.890548424737457, "grad_norm": 0.00836181640625, "learning_rate": 5.99621317457244e-06, "loss": 0.0001, "step": 19048 }, { "epoch": 8.891015169194866, "grad_norm": 0.034912109375, "learning_rate": 5.991209500517192e-06, "loss": 0.0018, "step": 19049 }, { "epoch": 8.891481913652274, "grad_norm": 0.045654296875, "learning_rate": 5.986207850575521e-06, "loss": 0.0048, "step": 19050 }, { "epoch": 8.891948658109685, "grad_norm": 0.006744384765625, "learning_rate": 5.981208224855084e-06, "loss": 0.0001, "step": 19051 }, { "epoch": 8.892415402567094, "grad_norm": 0.00543212890625, "learning_rate": 5.976210623463552e-06, "loss": 0.0001, "step": 19052 }, { "epoch": 8.892882147024505, "grad_norm": 0.00469970703125, "learning_rate": 5.9712150465085385e-06, "loss": 0.0001, "step": 19053 }, { "epoch": 8.893348891481914, "grad_norm": 0.0286865234375, "learning_rate": 5.96622149409759e-06, "loss": 0.0002, "step": 19054 }, { "epoch": 8.893815635939323, "grad_norm": 0.0120849609375, "learning_rate": 5.961229966338222e-06, "loss": 0.0002, "step": 19055 }, { "epoch": 8.894282380396733, "grad_norm": 0.0079345703125, "learning_rate": 5.956240463337903e-06, "loss": 0.0002, "step": 19056 }, { "epoch": 8.894749124854142, "grad_norm": 0.004730224609375, "learning_rate": 5.951252985204092e-06, "loss": 0.0001, "step": 19057 }, { "epoch": 8.895215869311553, "grad_norm": 0.07373046875, "learning_rate": 5.946267532044136e-06, "loss": 0.002, "step": 19058 }, { "epoch": 8.895682613768962, "grad_norm": 0.0390625, "learning_rate": 5.941284103965384e-06, "loss": 0.0029, "step": 19059 }, { "epoch": 8.89614935822637, "grad_norm": 0.01324462890625, "learning_rate": 5.93630270107517e-06, "loss": 0.0002, "step": 19060 }, { "epoch": 8.896616102683781, "grad_norm": 0.0576171875, "learning_rate": 5.9313233234807084e-06, "loss": 0.0016, "step": 19061 }, { "epoch": 8.89708284714119, "grad_norm": 0.01043701171875, "learning_rate": 5.9263459712892375e-06, "loss": 0.0001, "step": 19062 }, { "epoch": 8.8975495915986, "grad_norm": 0.0086669921875, "learning_rate": 5.921370644607904e-06, "loss": 0.0001, "step": 19063 }, { "epoch": 8.89801633605601, "grad_norm": 0.005828857421875, "learning_rate": 5.916397343543833e-06, "loss": 0.0001, "step": 19064 }, { "epoch": 8.898483080513419, "grad_norm": 0.01385498046875, "learning_rate": 5.91142606820414e-06, "loss": 0.0002, "step": 19065 }, { "epoch": 8.898949824970828, "grad_norm": 0.01226806640625, "learning_rate": 5.9064568186958265e-06, "loss": 0.0001, "step": 19066 }, { "epoch": 8.899416569428238, "grad_norm": 0.009521484375, "learning_rate": 5.901489595125887e-06, "loss": 0.0001, "step": 19067 }, { "epoch": 8.899883313885647, "grad_norm": 0.01080322265625, "learning_rate": 5.8965243976013e-06, "loss": 0.0002, "step": 19068 }, { "epoch": 8.900350058343058, "grad_norm": 0.060302734375, "learning_rate": 5.891561226228925e-06, "loss": 0.0003, "step": 19069 }, { "epoch": 8.900816802800467, "grad_norm": 0.01416015625, "learning_rate": 5.886600081115668e-06, "loss": 0.0001, "step": 19070 }, { "epoch": 8.901283547257876, "grad_norm": 0.01116943359375, "learning_rate": 5.88164096236834e-06, "loss": 0.0002, "step": 19071 }, { "epoch": 8.901750291715286, "grad_norm": 0.01263427734375, "learning_rate": 5.876683870093691e-06, "loss": 0.0002, "step": 19072 }, { "epoch": 8.902217036172695, "grad_norm": 0.0791015625, "learning_rate": 5.871728804398469e-06, "loss": 0.004, "step": 19073 }, { "epoch": 8.902683780630104, "grad_norm": 0.032958984375, "learning_rate": 5.866775765389387e-06, "loss": 0.0002, "step": 19074 }, { "epoch": 8.903150525087515, "grad_norm": 0.006256103515625, "learning_rate": 5.861824753173039e-06, "loss": 0.0001, "step": 19075 }, { "epoch": 8.903617269544924, "grad_norm": 0.00872802734375, "learning_rate": 5.856875767856063e-06, "loss": 0.0002, "step": 19076 }, { "epoch": 8.904084014002335, "grad_norm": 0.01385498046875, "learning_rate": 5.851928809545015e-06, "loss": 0.0002, "step": 19077 }, { "epoch": 8.904550758459743, "grad_norm": 0.005767822265625, "learning_rate": 5.846983878346379e-06, "loss": 0.0001, "step": 19078 }, { "epoch": 8.905017502917152, "grad_norm": 0.00970458984375, "learning_rate": 5.8420409743666584e-06, "loss": 0.0001, "step": 19079 }, { "epoch": 8.905484247374563, "grad_norm": 0.059326171875, "learning_rate": 5.837100097712267e-06, "loss": 0.0005, "step": 19080 }, { "epoch": 8.905950991831972, "grad_norm": 0.040771484375, "learning_rate": 5.8321612484895764e-06, "loss": 0.0019, "step": 19081 }, { "epoch": 8.90641773628938, "grad_norm": 0.0206298828125, "learning_rate": 5.827224426804956e-06, "loss": 0.0002, "step": 19082 }, { "epoch": 8.906884480746792, "grad_norm": 0.01300048828125, "learning_rate": 5.8222896327646546e-06, "loss": 0.0002, "step": 19083 }, { "epoch": 8.9073512252042, "grad_norm": 0.0791015625, "learning_rate": 5.817356866474955e-06, "loss": 0.0041, "step": 19084 }, { "epoch": 8.907817969661611, "grad_norm": 0.0087890625, "learning_rate": 5.81242612804207e-06, "loss": 0.0002, "step": 19085 }, { "epoch": 8.90828471411902, "grad_norm": 0.0361328125, "learning_rate": 5.807497417572139e-06, "loss": 0.0002, "step": 19086 }, { "epoch": 8.908751458576429, "grad_norm": 0.048828125, "learning_rate": 5.8025707351713086e-06, "loss": 0.0031, "step": 19087 }, { "epoch": 8.90921820303384, "grad_norm": 0.0263671875, "learning_rate": 5.79764608094564e-06, "loss": 0.0002, "step": 19088 }, { "epoch": 8.909684947491249, "grad_norm": 0.0126953125, "learning_rate": 5.79272345500117e-06, "loss": 0.0001, "step": 19089 }, { "epoch": 8.910151691948657, "grad_norm": 0.068359375, "learning_rate": 5.78780285744388e-06, "loss": 0.0051, "step": 19090 }, { "epoch": 8.910618436406068, "grad_norm": 0.004180908203125, "learning_rate": 5.782884288379753e-06, "loss": 0.0001, "step": 19091 }, { "epoch": 8.911085180863477, "grad_norm": 0.01446533203125, "learning_rate": 5.777967747914637e-06, "loss": 0.0002, "step": 19092 }, { "epoch": 8.911551925320886, "grad_norm": 0.01312255859375, "learning_rate": 5.773053236154413e-06, "loss": 0.0001, "step": 19093 }, { "epoch": 8.912018669778297, "grad_norm": 0.010498046875, "learning_rate": 5.76814075320492e-06, "loss": 0.0001, "step": 19094 }, { "epoch": 8.912485414235706, "grad_norm": 0.00604248046875, "learning_rate": 5.763230299171895e-06, "loss": 0.0001, "step": 19095 }, { "epoch": 8.912952158693116, "grad_norm": 0.0255126953125, "learning_rate": 5.758321874161088e-06, "loss": 0.0002, "step": 19096 }, { "epoch": 8.913418903150525, "grad_norm": 0.01611328125, "learning_rate": 5.753415478278168e-06, "loss": 0.0001, "step": 19097 }, { "epoch": 8.913885647607934, "grad_norm": 0.0054931640625, "learning_rate": 5.748511111628785e-06, "loss": 0.0002, "step": 19098 }, { "epoch": 8.914352392065345, "grad_norm": 0.00762939453125, "learning_rate": 5.743608774318521e-06, "loss": 0.0001, "step": 19099 }, { "epoch": 8.914819136522754, "grad_norm": 0.0103759765625, "learning_rate": 5.7387084664529574e-06, "loss": 0.0001, "step": 19100 }, { "epoch": 8.915285880980164, "grad_norm": 0.0123291015625, "learning_rate": 5.733810188137567e-06, "loss": 0.0001, "step": 19101 }, { "epoch": 8.915752625437573, "grad_norm": 0.006805419921875, "learning_rate": 5.728913939477854e-06, "loss": 0.0001, "step": 19102 }, { "epoch": 8.916219369894982, "grad_norm": 0.007232666015625, "learning_rate": 5.724019720579222e-06, "loss": 0.0002, "step": 19103 }, { "epoch": 8.916686114352393, "grad_norm": 0.01068115234375, "learning_rate": 5.719127531547019e-06, "loss": 0.0002, "step": 19104 }, { "epoch": 8.917152858809802, "grad_norm": 0.00958251953125, "learning_rate": 5.7142373724866195e-06, "loss": 0.0002, "step": 19105 }, { "epoch": 8.91761960326721, "grad_norm": 0.01068115234375, "learning_rate": 5.709349243503303e-06, "loss": 0.0002, "step": 19106 }, { "epoch": 8.918086347724621, "grad_norm": 0.01513671875, "learning_rate": 5.70446314470231e-06, "loss": 0.0001, "step": 19107 }, { "epoch": 8.91855309218203, "grad_norm": 0.00592041015625, "learning_rate": 5.699579076188844e-06, "loss": 0.0001, "step": 19108 }, { "epoch": 8.919019836639439, "grad_norm": 0.008056640625, "learning_rate": 5.6946970380680866e-06, "loss": 0.0001, "step": 19109 }, { "epoch": 8.91948658109685, "grad_norm": 0.02294921875, "learning_rate": 5.689817030445121e-06, "loss": 0.0002, "step": 19110 }, { "epoch": 8.919953325554259, "grad_norm": 0.0225830078125, "learning_rate": 5.6849390534250315e-06, "loss": 0.0002, "step": 19111 }, { "epoch": 8.92042007001167, "grad_norm": 0.01141357421875, "learning_rate": 5.680063107112865e-06, "loss": 0.0002, "step": 19112 }, { "epoch": 8.920886814469078, "grad_norm": 0.0096435546875, "learning_rate": 5.675189191613573e-06, "loss": 0.0001, "step": 19113 }, { "epoch": 8.921353558926487, "grad_norm": 0.007781982421875, "learning_rate": 5.6703173070321245e-06, "loss": 0.0001, "step": 19114 }, { "epoch": 8.921820303383898, "grad_norm": 0.0191650390625, "learning_rate": 5.665447453473394e-06, "loss": 0.0002, "step": 19115 }, { "epoch": 8.922287047841307, "grad_norm": 0.06591796875, "learning_rate": 5.660579631042251e-06, "loss": 0.0044, "step": 19116 }, { "epoch": 8.922753792298716, "grad_norm": 0.0054931640625, "learning_rate": 5.6557138398435015e-06, "loss": 0.0001, "step": 19117 }, { "epoch": 8.923220536756126, "grad_norm": 0.00543212890625, "learning_rate": 5.650850079981906e-06, "loss": 0.0001, "step": 19118 }, { "epoch": 8.923687281213535, "grad_norm": 0.014892578125, "learning_rate": 5.645988351562192e-06, "loss": 0.0002, "step": 19119 }, { "epoch": 8.924154025670946, "grad_norm": 0.0093994140625, "learning_rate": 5.641128654689043e-06, "loss": 0.0001, "step": 19120 }, { "epoch": 8.924620770128355, "grad_norm": 0.00872802734375, "learning_rate": 5.636270989467074e-06, "loss": 0.0001, "step": 19121 }, { "epoch": 8.925087514585764, "grad_norm": 0.0078125, "learning_rate": 5.631415356000891e-06, "loss": 0.0002, "step": 19122 }, { "epoch": 8.925554259043174, "grad_norm": 0.0216064453125, "learning_rate": 5.626561754395054e-06, "loss": 0.0001, "step": 19123 }, { "epoch": 8.926021003500583, "grad_norm": 0.00531005859375, "learning_rate": 5.621710184754037e-06, "loss": 0.0001, "step": 19124 }, { "epoch": 8.926487747957992, "grad_norm": 0.006256103515625, "learning_rate": 5.616860647182309e-06, "loss": 0.0001, "step": 19125 }, { "epoch": 8.926954492415403, "grad_norm": 0.0111083984375, "learning_rate": 5.612013141784312e-06, "loss": 0.0002, "step": 19126 }, { "epoch": 8.927421236872812, "grad_norm": 0.007049560546875, "learning_rate": 5.607167668664382e-06, "loss": 0.0001, "step": 19127 }, { "epoch": 8.927887981330223, "grad_norm": 0.015869140625, "learning_rate": 5.60232422792687e-06, "loss": 0.0002, "step": 19128 }, { "epoch": 8.928354725787631, "grad_norm": 0.0361328125, "learning_rate": 5.597482819676058e-06, "loss": 0.0017, "step": 19129 }, { "epoch": 8.92882147024504, "grad_norm": 0.00421142578125, "learning_rate": 5.592643444016166e-06, "loss": 0.0001, "step": 19130 }, { "epoch": 8.929288214702451, "grad_norm": 0.01708984375, "learning_rate": 5.587806101051429e-06, "loss": 0.0001, "step": 19131 }, { "epoch": 8.92975495915986, "grad_norm": 0.006134033203125, "learning_rate": 5.582970790885955e-06, "loss": 0.0001, "step": 19132 }, { "epoch": 8.930221703617269, "grad_norm": 0.005615234375, "learning_rate": 5.578137513623882e-06, "loss": 0.0001, "step": 19133 }, { "epoch": 8.93068844807468, "grad_norm": 0.00701904296875, "learning_rate": 5.573306269369294e-06, "loss": 0.0002, "step": 19134 }, { "epoch": 8.931155192532088, "grad_norm": 0.006378173828125, "learning_rate": 5.568477058226163e-06, "loss": 0.0001, "step": 19135 }, { "epoch": 8.931621936989497, "grad_norm": 0.01239013671875, "learning_rate": 5.563649880298505e-06, "loss": 0.0002, "step": 19136 }, { "epoch": 8.932088681446908, "grad_norm": 0.038330078125, "learning_rate": 5.558824735690249e-06, "loss": 0.0019, "step": 19137 }, { "epoch": 8.932555425904317, "grad_norm": 0.0050048828125, "learning_rate": 5.5540016245052785e-06, "loss": 0.0001, "step": 19138 }, { "epoch": 8.933022170361728, "grad_norm": 0.01531982421875, "learning_rate": 5.5491805468474324e-06, "loss": 0.0001, "step": 19139 }, { "epoch": 8.933488914819137, "grad_norm": 0.04345703125, "learning_rate": 5.544361502820539e-06, "loss": 0.0019, "step": 19140 }, { "epoch": 8.933955659276545, "grad_norm": 0.013427734375, "learning_rate": 5.539544492528337e-06, "loss": 0.0002, "step": 19141 }, { "epoch": 8.934422403733956, "grad_norm": 0.01300048828125, "learning_rate": 5.534729516074533e-06, "loss": 0.0001, "step": 19142 }, { "epoch": 8.934889148191365, "grad_norm": 0.00830078125, "learning_rate": 5.529916573562843e-06, "loss": 0.0001, "step": 19143 }, { "epoch": 8.935355892648776, "grad_norm": 0.007720947265625, "learning_rate": 5.525105665096842e-06, "loss": 0.0001, "step": 19144 }, { "epoch": 8.935822637106185, "grad_norm": 0.016357421875, "learning_rate": 5.520296790780144e-06, "loss": 0.0001, "step": 19145 }, { "epoch": 8.936289381563594, "grad_norm": 0.04296875, "learning_rate": 5.51548995071629e-06, "loss": 0.002, "step": 19146 }, { "epoch": 8.936756126021004, "grad_norm": 0.00762939453125, "learning_rate": 5.5106851450087535e-06, "loss": 0.0001, "step": 19147 }, { "epoch": 8.937222870478413, "grad_norm": 0.0164794921875, "learning_rate": 5.505882373761018e-06, "loss": 0.0001, "step": 19148 }, { "epoch": 8.937689614935822, "grad_norm": 0.0137939453125, "learning_rate": 5.501081637076478e-06, "loss": 0.0001, "step": 19149 }, { "epoch": 8.938156359393233, "grad_norm": 0.06884765625, "learning_rate": 5.496282935058505e-06, "loss": 0.0022, "step": 19150 }, { "epoch": 8.938623103850642, "grad_norm": 0.0169677734375, "learning_rate": 5.491486267810397e-06, "loss": 0.0001, "step": 19151 }, { "epoch": 8.93908984830805, "grad_norm": 0.00494384765625, "learning_rate": 5.48669163543547e-06, "loss": 0.0001, "step": 19152 }, { "epoch": 8.939556592765461, "grad_norm": 0.041259765625, "learning_rate": 5.48189903803692e-06, "loss": 0.0031, "step": 19153 }, { "epoch": 8.94002333722287, "grad_norm": 0.005706787109375, "learning_rate": 5.47710847571794e-06, "loss": 0.0001, "step": 19154 }, { "epoch": 8.94049008168028, "grad_norm": 0.00469970703125, "learning_rate": 5.472319948581717e-06, "loss": 0.0001, "step": 19155 }, { "epoch": 8.94095682613769, "grad_norm": 0.07958984375, "learning_rate": 5.4675334567313104e-06, "loss": 0.0021, "step": 19156 }, { "epoch": 8.941423570595099, "grad_norm": 0.0084228515625, "learning_rate": 5.462749000269795e-06, "loss": 0.0002, "step": 19157 }, { "epoch": 8.94189031505251, "grad_norm": 0.00775146484375, "learning_rate": 5.4579665793001886e-06, "loss": 0.0001, "step": 19158 }, { "epoch": 8.942357059509918, "grad_norm": 0.007354736328125, "learning_rate": 5.453186193925453e-06, "loss": 0.0001, "step": 19159 }, { "epoch": 8.942823803967327, "grad_norm": 0.060302734375, "learning_rate": 5.4484078442485265e-06, "loss": 0.0016, "step": 19160 }, { "epoch": 8.943290548424738, "grad_norm": 0.0556640625, "learning_rate": 5.443631530372284e-06, "loss": 0.0027, "step": 19161 }, { "epoch": 8.943757292882147, "grad_norm": 0.01025390625, "learning_rate": 5.438857252399565e-06, "loss": 0.0001, "step": 19162 }, { "epoch": 8.944224037339557, "grad_norm": 0.004486083984375, "learning_rate": 5.434085010433177e-06, "loss": 0.0001, "step": 19163 }, { "epoch": 8.944690781796966, "grad_norm": 0.01177978515625, "learning_rate": 5.429314804575847e-06, "loss": 0.0001, "step": 19164 }, { "epoch": 8.945157526254375, "grad_norm": 0.0186767578125, "learning_rate": 5.4245466349302945e-06, "loss": 0.0002, "step": 19165 }, { "epoch": 8.945624270711786, "grad_norm": 0.0106201171875, "learning_rate": 5.4197805015992035e-06, "loss": 0.0001, "step": 19166 }, { "epoch": 8.946091015169195, "grad_norm": 0.007354736328125, "learning_rate": 5.415016404685158e-06, "loss": 0.0001, "step": 19167 }, { "epoch": 8.946557759626604, "grad_norm": 0.055419921875, "learning_rate": 5.410254344290766e-06, "loss": 0.0002, "step": 19168 }, { "epoch": 8.947024504084014, "grad_norm": 0.00750732421875, "learning_rate": 5.405494320518545e-06, "loss": 0.0001, "step": 19169 }, { "epoch": 8.947491248541423, "grad_norm": 0.010009765625, "learning_rate": 5.400736333470979e-06, "loss": 0.0002, "step": 19170 }, { "epoch": 8.947957992998834, "grad_norm": 0.022705078125, "learning_rate": 5.39598038325052e-06, "loss": 0.0002, "step": 19171 }, { "epoch": 8.948424737456243, "grad_norm": 0.00531005859375, "learning_rate": 5.391226469959587e-06, "loss": 0.0001, "step": 19172 }, { "epoch": 8.948891481913652, "grad_norm": 0.0096435546875, "learning_rate": 5.386474593700497e-06, "loss": 0.0001, "step": 19173 }, { "epoch": 8.949358226371062, "grad_norm": 0.017578125, "learning_rate": 5.38172475457559e-06, "loss": 0.0002, "step": 19174 }, { "epoch": 8.949824970828471, "grad_norm": 0.01025390625, "learning_rate": 5.376976952687141e-06, "loss": 0.0002, "step": 19175 }, { "epoch": 8.95029171528588, "grad_norm": 0.01409912109375, "learning_rate": 5.372231188137345e-06, "loss": 0.0001, "step": 19176 }, { "epoch": 8.950758459743291, "grad_norm": 0.0093994140625, "learning_rate": 5.367487461028409e-06, "loss": 0.0002, "step": 19177 }, { "epoch": 8.9512252042007, "grad_norm": 0.00634765625, "learning_rate": 5.362745771462474e-06, "loss": 0.0001, "step": 19178 }, { "epoch": 8.951691948658109, "grad_norm": 0.006256103515625, "learning_rate": 5.358006119541614e-06, "loss": 0.0002, "step": 19179 }, { "epoch": 8.95215869311552, "grad_norm": 0.005615234375, "learning_rate": 5.353268505367903e-06, "loss": 0.0001, "step": 19180 }, { "epoch": 8.952625437572928, "grad_norm": 0.005950927734375, "learning_rate": 5.348532929043326e-06, "loss": 0.0001, "step": 19181 }, { "epoch": 8.953092182030339, "grad_norm": 0.035888671875, "learning_rate": 5.343799390669846e-06, "loss": 0.0032, "step": 19182 }, { "epoch": 8.953558926487748, "grad_norm": 0.009765625, "learning_rate": 5.339067890349403e-06, "loss": 0.0002, "step": 19183 }, { "epoch": 8.954025670945157, "grad_norm": 0.0107421875, "learning_rate": 5.334338428183838e-06, "loss": 0.0002, "step": 19184 }, { "epoch": 8.954492415402568, "grad_norm": 0.00933837890625, "learning_rate": 5.329611004275004e-06, "loss": 0.0002, "step": 19185 }, { "epoch": 8.954959159859976, "grad_norm": 0.0654296875, "learning_rate": 5.324885618724696e-06, "loss": 0.0018, "step": 19186 }, { "epoch": 8.955425904317387, "grad_norm": 0.0111083984375, "learning_rate": 5.320162271634632e-06, "loss": 0.0002, "step": 19187 }, { "epoch": 8.955892648774796, "grad_norm": 0.00823974609375, "learning_rate": 5.315440963106522e-06, "loss": 0.0001, "step": 19188 }, { "epoch": 8.956359393232205, "grad_norm": 0.006561279296875, "learning_rate": 5.310721693242038e-06, "loss": 0.0001, "step": 19189 }, { "epoch": 8.956826137689616, "grad_norm": 0.01239013671875, "learning_rate": 5.3060044621427665e-06, "loss": 0.0002, "step": 19190 }, { "epoch": 8.957292882147025, "grad_norm": 0.09228515625, "learning_rate": 5.30128926991027e-06, "loss": 0.0005, "step": 19191 }, { "epoch": 8.957759626604433, "grad_norm": 0.005767822265625, "learning_rate": 5.296576116646113e-06, "loss": 0.0001, "step": 19192 }, { "epoch": 8.958226371061844, "grad_norm": 0.005767822265625, "learning_rate": 5.2918650024517235e-06, "loss": 0.0001, "step": 19193 }, { "epoch": 8.958693115519253, "grad_norm": 0.059326171875, "learning_rate": 5.287155927428566e-06, "loss": 0.005, "step": 19194 }, { "epoch": 8.959159859976662, "grad_norm": 0.00537109375, "learning_rate": 5.282448891678038e-06, "loss": 0.0001, "step": 19195 }, { "epoch": 8.959626604434073, "grad_norm": 0.0157470703125, "learning_rate": 5.277743895301468e-06, "loss": 0.0001, "step": 19196 }, { "epoch": 8.960093348891482, "grad_norm": 0.017578125, "learning_rate": 5.273040938400165e-06, "loss": 0.0002, "step": 19197 }, { "epoch": 8.960560093348892, "grad_norm": 0.004119873046875, "learning_rate": 5.268340021075424e-06, "loss": 0.0001, "step": 19198 }, { "epoch": 8.961026837806301, "grad_norm": 0.00909423828125, "learning_rate": 5.263641143428399e-06, "loss": 0.0001, "step": 19199 }, { "epoch": 8.96149358226371, "grad_norm": 0.006744384765625, "learning_rate": 5.2589443055602876e-06, "loss": 0.0001, "step": 19200 }, { "epoch": 8.96196032672112, "grad_norm": 0.027099609375, "learning_rate": 5.25424950757224e-06, "loss": 0.0002, "step": 19201 }, { "epoch": 8.96242707117853, "grad_norm": 0.0069580078125, "learning_rate": 5.24955674956531e-06, "loss": 0.0001, "step": 19202 }, { "epoch": 8.962893815635939, "grad_norm": 0.0194091796875, "learning_rate": 5.24486603164055e-06, "loss": 0.0001, "step": 19203 }, { "epoch": 8.96336056009335, "grad_norm": 0.013916015625, "learning_rate": 5.240177353898968e-06, "loss": 0.0002, "step": 19204 }, { "epoch": 8.963827304550758, "grad_norm": 0.00628662109375, "learning_rate": 5.235490716441494e-06, "loss": 0.0001, "step": 19205 }, { "epoch": 8.964294049008169, "grad_norm": 0.01123046875, "learning_rate": 5.230806119369036e-06, "loss": 0.0001, "step": 19206 }, { "epoch": 8.964760793465578, "grad_norm": 0.00482177734375, "learning_rate": 5.226123562782503e-06, "loss": 0.0001, "step": 19207 }, { "epoch": 8.965227537922987, "grad_norm": 0.00567626953125, "learning_rate": 5.2214430467826595e-06, "loss": 0.0001, "step": 19208 }, { "epoch": 8.965694282380397, "grad_norm": 0.0135498046875, "learning_rate": 5.216764571470301e-06, "loss": 0.0001, "step": 19209 }, { "epoch": 8.966161026837806, "grad_norm": 0.0269775390625, "learning_rate": 5.2120881369461915e-06, "loss": 0.0002, "step": 19210 }, { "epoch": 8.966627771295215, "grad_norm": 0.00860595703125, "learning_rate": 5.207413743310974e-06, "loss": 0.0002, "step": 19211 }, { "epoch": 8.967094515752626, "grad_norm": 0.007171630859375, "learning_rate": 5.202741390665322e-06, "loss": 0.0001, "step": 19212 }, { "epoch": 8.967561260210035, "grad_norm": 0.003692626953125, "learning_rate": 5.198071079109823e-06, "loss": 0.0001, "step": 19213 }, { "epoch": 8.968028004667445, "grad_norm": 0.00775146484375, "learning_rate": 5.193402808745029e-06, "loss": 0.0001, "step": 19214 }, { "epoch": 8.968494749124854, "grad_norm": 0.006134033203125, "learning_rate": 5.188736579671483e-06, "loss": 0.0001, "step": 19215 }, { "epoch": 8.968961493582263, "grad_norm": 0.004913330078125, "learning_rate": 5.1840723919896135e-06, "loss": 0.0001, "step": 19216 }, { "epoch": 8.969428238039674, "grad_norm": 0.0142822265625, "learning_rate": 5.179410245799865e-06, "loss": 0.0002, "step": 19217 }, { "epoch": 8.969894982497083, "grad_norm": 0.0086669921875, "learning_rate": 5.1747501412026444e-06, "loss": 0.0001, "step": 19218 }, { "epoch": 8.970361726954492, "grad_norm": 0.006927490234375, "learning_rate": 5.170092078298239e-06, "loss": 0.0002, "step": 19219 }, { "epoch": 8.970828471411902, "grad_norm": 0.080078125, "learning_rate": 5.165436057186968e-06, "loss": 0.0041, "step": 19220 }, { "epoch": 8.971295215869311, "grad_norm": 0.030029296875, "learning_rate": 5.160782077969095e-06, "loss": 0.0023, "step": 19221 }, { "epoch": 8.97176196032672, "grad_norm": 0.0081787109375, "learning_rate": 5.156130140744797e-06, "loss": 0.0001, "step": 19222 }, { "epoch": 8.972228704784131, "grad_norm": 0.0037078857421875, "learning_rate": 5.151480245614249e-06, "loss": 0.0001, "step": 19223 }, { "epoch": 8.97269544924154, "grad_norm": 0.01019287109375, "learning_rate": 5.146832392677581e-06, "loss": 0.0001, "step": 19224 }, { "epoch": 8.97316219369895, "grad_norm": 0.0164794921875, "learning_rate": 5.142186582034836e-06, "loss": 0.0002, "step": 19225 }, { "epoch": 8.97362893815636, "grad_norm": 0.034912109375, "learning_rate": 5.137542813786056e-06, "loss": 0.0002, "step": 19226 }, { "epoch": 8.974095682613768, "grad_norm": 0.01129150390625, "learning_rate": 5.13290108803125e-06, "loss": 0.0001, "step": 19227 }, { "epoch": 8.974562427071179, "grad_norm": 0.005767822265625, "learning_rate": 5.128261404870327e-06, "loss": 0.0001, "step": 19228 }, { "epoch": 8.975029171528588, "grad_norm": 0.0419921875, "learning_rate": 5.123623764403207e-06, "loss": 0.0023, "step": 19229 }, { "epoch": 8.975495915985999, "grad_norm": 0.047119140625, "learning_rate": 5.1189881667297325e-06, "loss": 0.0026, "step": 19230 }, { "epoch": 8.975962660443408, "grad_norm": 0.007080078125, "learning_rate": 5.114354611949712e-06, "loss": 0.0001, "step": 19231 }, { "epoch": 8.976429404900816, "grad_norm": 0.0205078125, "learning_rate": 5.109723100162922e-06, "loss": 0.0003, "step": 19232 }, { "epoch": 8.976896149358227, "grad_norm": 0.01123046875, "learning_rate": 5.10509363146906e-06, "loss": 0.0002, "step": 19233 }, { "epoch": 8.977362893815636, "grad_norm": 0.005157470703125, "learning_rate": 5.100466205967836e-06, "loss": 0.0001, "step": 19234 }, { "epoch": 8.977829638273045, "grad_norm": 0.00921630859375, "learning_rate": 5.095840823758869e-06, "loss": 0.0001, "step": 19235 }, { "epoch": 8.978296382730456, "grad_norm": 0.006866455078125, "learning_rate": 5.0912174849417236e-06, "loss": 0.0001, "step": 19236 }, { "epoch": 8.978763127187865, "grad_norm": 0.00482177734375, "learning_rate": 5.086596189615989e-06, "loss": 0.0001, "step": 19237 }, { "epoch": 8.979229871645273, "grad_norm": 0.0076904296875, "learning_rate": 5.08197693788115e-06, "loss": 0.0001, "step": 19238 }, { "epoch": 8.979696616102684, "grad_norm": 0.10302734375, "learning_rate": 5.07735972983664e-06, "loss": 0.0003, "step": 19239 }, { "epoch": 8.980163360560093, "grad_norm": 0.06494140625, "learning_rate": 5.07274456558191e-06, "loss": 0.0019, "step": 19240 }, { "epoch": 8.980630105017504, "grad_norm": 0.01214599609375, "learning_rate": 5.068131445216317e-06, "loss": 0.0002, "step": 19241 }, { "epoch": 8.981096849474913, "grad_norm": 0.044677734375, "learning_rate": 5.06352036883917e-06, "loss": 0.0002, "step": 19242 }, { "epoch": 8.981563593932322, "grad_norm": 0.0167236328125, "learning_rate": 5.0589113365497655e-06, "loss": 0.0002, "step": 19243 }, { "epoch": 8.982030338389732, "grad_norm": 0.038330078125, "learning_rate": 5.054304348447347e-06, "loss": 0.0019, "step": 19244 }, { "epoch": 8.982497082847141, "grad_norm": 0.006011962890625, "learning_rate": 5.049699404631092e-06, "loss": 0.0001, "step": 19245 }, { "epoch": 8.98296382730455, "grad_norm": 0.039306640625, "learning_rate": 5.045096505200165e-06, "loss": 0.0001, "step": 19246 }, { "epoch": 8.98343057176196, "grad_norm": 0.00994873046875, "learning_rate": 5.040495650253674e-06, "loss": 0.0001, "step": 19247 }, { "epoch": 8.98389731621937, "grad_norm": 0.01141357421875, "learning_rate": 5.035896839890652e-06, "loss": 0.0001, "step": 19248 }, { "epoch": 8.98436406067678, "grad_norm": 0.007110595703125, "learning_rate": 5.031300074210132e-06, "loss": 0.0001, "step": 19249 }, { "epoch": 8.98483080513419, "grad_norm": 0.008544921875, "learning_rate": 5.026705353311112e-06, "loss": 0.0001, "step": 19250 }, { "epoch": 8.985297549591598, "grad_norm": 0.07763671875, "learning_rate": 5.022112677292479e-06, "loss": 0.004, "step": 19251 }, { "epoch": 8.985764294049009, "grad_norm": 0.01080322265625, "learning_rate": 5.017522046253142e-06, "loss": 0.0001, "step": 19252 }, { "epoch": 8.986231038506418, "grad_norm": 0.06982421875, "learning_rate": 5.012933460291958e-06, "loss": 0.0019, "step": 19253 }, { "epoch": 8.986697782963827, "grad_norm": 0.0078125, "learning_rate": 5.008346919507689e-06, "loss": 0.0001, "step": 19254 }, { "epoch": 8.987164527421237, "grad_norm": 0.0089111328125, "learning_rate": 5.003762423999114e-06, "loss": 0.0001, "step": 19255 }, { "epoch": 8.987631271878646, "grad_norm": 0.0113525390625, "learning_rate": 4.999179973864942e-06, "loss": 0.0002, "step": 19256 }, { "epoch": 8.988098016336057, "grad_norm": 0.006561279296875, "learning_rate": 4.994599569203817e-06, "loss": 0.0001, "step": 19257 }, { "epoch": 8.988564760793466, "grad_norm": 0.003448486328125, "learning_rate": 4.990021210114371e-06, "loss": 0.0001, "step": 19258 }, { "epoch": 8.989031505250875, "grad_norm": 0.004730224609375, "learning_rate": 4.985444896695201e-06, "loss": 0.0001, "step": 19259 }, { "epoch": 8.989498249708285, "grad_norm": 0.00537109375, "learning_rate": 4.980870629044809e-06, "loss": 0.0001, "step": 19260 }, { "epoch": 8.989964994165694, "grad_norm": 0.00836181640625, "learning_rate": 4.9762984072617035e-06, "loss": 0.0002, "step": 19261 }, { "epoch": 8.990431738623103, "grad_norm": 0.015380859375, "learning_rate": 4.971728231444317e-06, "loss": 0.0002, "step": 19262 }, { "epoch": 8.990898483080514, "grad_norm": 0.052734375, "learning_rate": 4.96716010169106e-06, "loss": 0.0022, "step": 19263 }, { "epoch": 8.991365227537923, "grad_norm": 0.07568359375, "learning_rate": 4.962594018100297e-06, "loss": 0.0002, "step": 19264 }, { "epoch": 8.991831971995332, "grad_norm": 0.035400390625, "learning_rate": 4.958029980770318e-06, "loss": 0.001, "step": 19265 }, { "epoch": 8.992298716452742, "grad_norm": 0.0050048828125, "learning_rate": 4.953467989799409e-06, "loss": 0.0001, "step": 19266 }, { "epoch": 8.992765460910151, "grad_norm": 0.046142578125, "learning_rate": 4.948908045285794e-06, "loss": 0.0002, "step": 19267 }, { "epoch": 8.993232205367562, "grad_norm": 0.0230712890625, "learning_rate": 4.944350147327648e-06, "loss": 0.0001, "step": 19268 }, { "epoch": 8.99369894982497, "grad_norm": 0.0751953125, "learning_rate": 4.939794296023104e-06, "loss": 0.0021, "step": 19269 }, { "epoch": 8.99416569428238, "grad_norm": 0.00897216796875, "learning_rate": 4.935240491470272e-06, "loss": 0.0002, "step": 19270 }, { "epoch": 8.99463243873979, "grad_norm": 0.01025390625, "learning_rate": 4.9306887337671744e-06, "loss": 0.0001, "step": 19271 }, { "epoch": 8.9950991831972, "grad_norm": 0.005462646484375, "learning_rate": 4.926139023011822e-06, "loss": 0.0001, "step": 19272 }, { "epoch": 8.995565927654608, "grad_norm": 0.0419921875, "learning_rate": 4.921591359302202e-06, "loss": 0.0024, "step": 19273 }, { "epoch": 8.996032672112019, "grad_norm": 0.0081787109375, "learning_rate": 4.9170457427362036e-06, "loss": 0.0001, "step": 19274 }, { "epoch": 8.996499416569428, "grad_norm": 0.00872802734375, "learning_rate": 4.912502173411704e-06, "loss": 0.0001, "step": 19275 }, { "epoch": 8.996966161026839, "grad_norm": 0.00762939453125, "learning_rate": 4.907960651426546e-06, "loss": 0.0001, "step": 19276 }, { "epoch": 8.997432905484247, "grad_norm": 0.0079345703125, "learning_rate": 4.903421176878487e-06, "loss": 0.0001, "step": 19277 }, { "epoch": 8.997899649941656, "grad_norm": 0.006256103515625, "learning_rate": 4.898883749865291e-06, "loss": 0.0001, "step": 19278 } ], "logging_steps": 1, "max_steps": 21420, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 2142, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.045162537989492e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }